mirror of
https://github.com/ralsina/tartrazine.git
synced 2025-06-27 14:47:50 -03:00
Merge commit 'f955c625aded244864e83a872b396868a490dbc5' as 'go-enry'
This commit is contained in:
6
go-enry/benchmarks/csv/enry-distribution.csv
Normal file
6
go-enry/benchmarks/csv/enry-distribution.csv
Normal file
@ -0,0 +1,6 @@
|
||||
timeInterval,enry,numberOfFiles
|
||||
1us-10us,enry,83
|
||||
10us-100us,enry,1341
|
||||
100us-1ms,enry,314
|
||||
1ms-10ms,enry,146
|
||||
10ms-100ms,enry,48
|
|
13532
go-enry/benchmarks/csv/enry-samples.csv
Normal file
13532
go-enry/benchmarks/csv/enry-samples.csv
Normal file
File diff suppressed because it is too large
Load Diff
8
go-enry/benchmarks/csv/enry-total.csv
Normal file
8
go-enry/benchmarks/csv/enry-total.csv
Normal file
@ -0,0 +1,8 @@
|
||||
function,tool,iterations,ns/op
|
||||
GetLanguage(),enry,100,2333748307
|
||||
Classify(),enry,3,53842505853
|
||||
GetLanguagesByModeline(),enry,1000,228234491
|
||||
GetLanguagesByFilename(),enry,1000000,124782
|
||||
GetLanguagesByShebang(),enry,100000,2339138
|
||||
GetLanguagesByExtension(),enry,200000,1110007
|
||||
GetLanguagesByContent(),enry,500,342358978
|
|
6
go-enry/benchmarks/csv/linguist-distribution.csv
Normal file
6
go-enry/benchmarks/csv/linguist-distribution.csv
Normal file
@ -0,0 +1,6 @@
|
||||
timeInterval,linguist,numberOfFiles
|
||||
1us-10us,linguist,0
|
||||
10us-100us,linguist,120
|
||||
100us-1ms,linguist,1070
|
||||
1ms-10ms,linguist,816
|
||||
10ms-100ms,linguist,71
|
|
14554
go-enry/benchmarks/csv/linguist-samples.csv
Normal file
14554
go-enry/benchmarks/csv/linguist-samples.csv
Normal file
File diff suppressed because it is too large
Load Diff
8
go-enry/benchmarks/csv/linguist-total.csv
Normal file
8
go-enry/benchmarks/csv/linguist-total.csv
Normal file
@ -0,0 +1,8 @@
|
||||
function,tool,iterations,ns/op
|
||||
GetLanguage(),linguist,5,3822076000
|
||||
Classify(),linguist,5,329660597600
|
||||
GetLanguagesByModeline(),linguist,5,2770912600
|
||||
GetLanguagesByFilename(),linguist,5,34159000
|
||||
GetLanguagesByShebang(),linguist,5,159317200
|
||||
GetLanguagesByExtension(),linguist,5,354929800
|
||||
GetLanguagesByContent(),linguist,5,3881611000
|
|
BIN
go-enry/benchmarks/histogram/distribution.png
Normal file
BIN
go-enry/benchmarks/histogram/distribution.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 16 KiB |
126
go-enry/benchmarks/linguist-samples.rb
Executable file
126
go-enry/benchmarks/linguist-samples.rb
Executable file
@ -0,0 +1,126 @@
|
||||
#!/usr/bin/env ruby
|
||||
|
||||
require 'benchmark'
|
||||
require 'linguist'
|
||||
|
||||
iterations = (ARGV[0] || 1).to_i
|
||||
|
||||
# BenchBlob wraps a FileBlob to keep data loaded and to clean attributes added by language detection.
|
||||
class BenchBlob < Linguist::FileBlob
|
||||
attr_accessor :data
|
||||
|
||||
def initialize(path, base_path = nil)
|
||||
super
|
||||
@data = File.read(@fullpath)
|
||||
end
|
||||
|
||||
def clean
|
||||
@_mime_type = nil
|
||||
@detect_encoding = nil
|
||||
@lines = nil
|
||||
end
|
||||
end
|
||||
|
||||
def get_samples(root)
|
||||
samples = Array.new
|
||||
Dir.foreach(root) do |file|
|
||||
path = File.join(root, file)
|
||||
if file == "." or file == ".."
|
||||
next
|
||||
elsif File.directory?(path)
|
||||
get_samples(path).each do |blob|
|
||||
samples << blob
|
||||
end
|
||||
else
|
||||
samples << BenchBlob.new(path)
|
||||
end
|
||||
end
|
||||
return samples
|
||||
end
|
||||
|
||||
samples = get_samples('.linguist/samples')
|
||||
languages = Linguist::Language.all
|
||||
|
||||
samples.each do |blob|
|
||||
sample_name = blob.path.gsub(/\s/, '_')
|
||||
Benchmark.bmbm do |bm|
|
||||
bm.report('GetLanguage()_SAMPLE_' + sample_name + ' ' + iterations.to_s) do
|
||||
iterations.times do
|
||||
Linguist::detect(blob)
|
||||
blob.clean
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
samples.each do |blob|
|
||||
sample_name = blob.path.gsub(/\s/, '_')
|
||||
Benchmark.bmbm do |bm|
|
||||
bm.report('Classify()_SAMPLE_' + sample_name + ' ' + iterations.to_s) do
|
||||
iterations.times do
|
||||
Linguist::Classifier.classify(Linguist::Samples.cache, blob.data)
|
||||
blob.clean
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
samples.each do |blob|
|
||||
sample_name = blob.path.gsub(/\s/, '_')
|
||||
Benchmark.bmbm do |bm|
|
||||
bm.report('GetLanguagesByModeline()_SAMPLE_' + sample_name + ' ' + iterations.to_s) do
|
||||
iterations.times do
|
||||
Linguist::Strategy::Modeline.call(blob, languages)
|
||||
blob.clean
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
samples.each do |blob|
|
||||
sample_name = blob.path.gsub(/\s/, '_')
|
||||
Benchmark.bmbm do |bm|
|
||||
bm.report('GetLanguagesByFilename()_SAMPLE_' + sample_name + ' ' + iterations.to_s) do
|
||||
iterations.times do
|
||||
Linguist::Strategy::Filename.call(blob, languages)
|
||||
blob.clean
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
samples.each do |blob|
|
||||
sample_name = blob.path.gsub(/\s/, '_')
|
||||
Benchmark.bmbm do |bm|
|
||||
bm.report('GetLanguagesByShebang()_SAMPLE_' + sample_name + ' ' + iterations.to_s) do
|
||||
iterations.times do
|
||||
Linguist::Shebang.call(blob, languages)
|
||||
blob.clean
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
samples.each do |blob|
|
||||
sample_name = blob.path.gsub(/\s/, '_')
|
||||
Benchmark.bmbm do |bm|
|
||||
bm.report('GetLanguagesByExtension()_SAMPLE_' + sample_name + ' ' + iterations.to_s) do
|
||||
iterations.times do
|
||||
Linguist::Strategy::Extension.call(blob, languages)
|
||||
blob.clean
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
samples.each do |blob|
|
||||
sample_name = blob.path.gsub(/\s/, '_')
|
||||
Benchmark.bmbm do |bm|
|
||||
bm.report('GetLanguagesByContent()_SAMPLE_' + sample_name + ' ' + iterations.to_s) do
|
||||
iterations.times do
|
||||
Linguist::Heuristics.call(blob, languages)
|
||||
blob.clean
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
120
go-enry/benchmarks/linguist-total.rb
Executable file
120
go-enry/benchmarks/linguist-total.rb
Executable file
@ -0,0 +1,120 @@
|
||||
#!/usr/bin/env ruby
|
||||
|
||||
require 'benchmark'
|
||||
require 'linguist'
|
||||
|
||||
iterations = (ARGV[0] || 1).to_i
|
||||
|
||||
# BenchBlob wraps a FileBlob to keep data loaded and to clean attributes added by language detection.
|
||||
class BenchBlob < Linguist::FileBlob
|
||||
attr_accessor :data
|
||||
attr_accessor :fullpath
|
||||
|
||||
def initialize(path, base_path = nil)
|
||||
super
|
||||
@data = File.read(@fullpath)
|
||||
end
|
||||
|
||||
def clean
|
||||
@_mime_type = nil
|
||||
@detect_encoding = nil
|
||||
@lines = nil
|
||||
end
|
||||
end
|
||||
|
||||
def get_samples(root)
|
||||
samples = Array.new
|
||||
Dir.foreach(root) do |file|
|
||||
path = File.join(root, file)
|
||||
if file == "." or file == ".."
|
||||
next
|
||||
elsif File.directory?(path)
|
||||
get_samples(path).each do |blob|
|
||||
samples << blob
|
||||
end
|
||||
else
|
||||
samples << BenchBlob.new(path)
|
||||
end
|
||||
end
|
||||
return samples
|
||||
end
|
||||
|
||||
samples = get_samples('.linguist/samples')
|
||||
languages = Linguist::Language.all
|
||||
|
||||
Benchmark.bmbm do |bm|
|
||||
time = bm.report('GetLanguage()_TOTAL ' + iterations.to_s) do
|
||||
iterations.times do
|
||||
samples.each do |blob|
|
||||
Linguist::detect(blob)
|
||||
blob.clean
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
Benchmark.bmbm do |bm|
|
||||
bm.report('Classify()_TOTAL ' + iterations.to_s) do
|
||||
iterations.times do
|
||||
samples.each do |blob|
|
||||
Linguist::Classifier.classify(Linguist::Samples.cache, blob.data)
|
||||
blob.clean
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
Benchmark.bmbm do |bm|
|
||||
bm.report('GetLanguagesByModeline()_TOTAL ' + iterations.to_s) do
|
||||
iterations.times do
|
||||
samples.each do |blob|
|
||||
Linguist::Strategy::Modeline.call(blob, languages)
|
||||
blob.clean
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
Benchmark.bmbm do |bm|
|
||||
bm.report('GetLanguagesByFilename()_TOTAL ' + iterations.to_s) do
|
||||
iterations.times do
|
||||
samples.each do |blob|
|
||||
Linguist::Strategy::Filename.call(blob, languages)
|
||||
blob.clean
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
Benchmark.bmbm do |bm|
|
||||
bm.report('GetLanguagesByShebang()_TOTAL ' + iterations.to_s) do
|
||||
iterations.times do
|
||||
samples.each do |blob|
|
||||
Linguist::Shebang.call(blob, languages)
|
||||
blob.clean
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
Benchmark.bmbm do |bm|
|
||||
bm.report('GetLanguagesByExtension()_TOTAL ' + iterations.to_s) do
|
||||
iterations.times do
|
||||
samples.each do |blob|
|
||||
Linguist::Strategy::Extension.call(blob, languages)
|
||||
blob.clean
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
Benchmark.bmbm do |bm|
|
||||
bm.report('GetLanguagesByContent()_TOTAL ' + iterations.to_s) do
|
||||
iterations.times do
|
||||
samples.each do |blob|
|
||||
Linguist::Heuristics.call(blob, languages)
|
||||
blob.clean
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
7
go-enry/benchmarks/parse.sh
Executable file
7
go-enry/benchmarks/parse.sh
Executable file
@ -0,0 +1,7 @@
|
||||
#!/usr/bin/env bash
|
||||
set -e
|
||||
|
||||
cd benchmarks/output
|
||||
go run ../parser/main.go -outdir ../csv
|
||||
cd ../csv
|
||||
go run ../parser/main.go -distribution
|
386
go-enry/benchmarks/parser/main.go
Normal file
386
go-enry/benchmarks/parser/main.go
Normal file
@ -0,0 +1,386 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"encoding/csv"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"math"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
const (
|
||||
// functions benchmarked
|
||||
getLanguageFunc = "GetLanguage()"
|
||||
classifyFunc = "Classify()"
|
||||
modelineFunc = "GetLanguagesByModeline()"
|
||||
filenameFunc = "GetLanguagesByFilename()"
|
||||
shebangFunc = "GetLanguagesByShebang()"
|
||||
extensionFunc = "GetLanguagesByExtension()"
|
||||
contentFunc = "GetLanguagesByContent()"
|
||||
|
||||
// benchmark's outputs
|
||||
enryTotalBench = "enry_total.bench"
|
||||
enrySamplesBench = "enry_samples.bench"
|
||||
linguistTotalBench = "linguist_total.bench"
|
||||
linguistSamplesBench = "linguist_samples.bench"
|
||||
|
||||
// files to generate
|
||||
enryTotalCSV = "enry-total.csv"
|
||||
enrySamplesCSV = "enry-samples.csv"
|
||||
linguistTotalCSV = "linguist-total.csv"
|
||||
linguistSamplesCSV = "linguist-samples.csv"
|
||||
|
||||
// files to generate with flag distribution
|
||||
enryDistributionCSV = "enry-distribution.csv"
|
||||
linguistDistributionCSV = "linguist-distribution.csv"
|
||||
)
|
||||
|
||||
var (
|
||||
// flags
|
||||
distribution bool
|
||||
outDir string
|
||||
|
||||
enryFunctions = []string{getLanguageFunc, classifyFunc, modelineFunc, filenameFunc, shebangFunc, extensionFunc, contentFunc}
|
||||
distributionIntervals = []string{"1us-10us", "10us-100us", "100us-1ms", "1ms-10ms", "10ms-100ms"}
|
||||
)
|
||||
|
||||
func main() {
|
||||
flag.BoolVar(&distribution, "distribution", false, "generate enry-distribution.csv and linguist-distribution.csv")
|
||||
flag.StringVar(&outDir, "outdir", "", "path to leave csv files")
|
||||
flag.Parse()
|
||||
|
||||
if distribution {
|
||||
generateDistributionCSV()
|
||||
return
|
||||
}
|
||||
|
||||
generateCSV()
|
||||
}
|
||||
|
||||
func generateDistributionCSV() {
|
||||
CSVFiles := []struct {
|
||||
in string
|
||||
out string
|
||||
tool string
|
||||
}{
|
||||
{in: enrySamplesCSV, out: enryDistributionCSV, tool: "enry"},
|
||||
{in: linguistSamplesCSV, out: linguistDistributionCSV, tool: "linguist"},
|
||||
}
|
||||
|
||||
for _, CSVFile := range CSVFiles {
|
||||
f, err := os.Open(CSVFile.in)
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
continue
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
r := csv.NewReader(f)
|
||||
CSVSamples, err := r.ReadAll()
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
continue
|
||||
}
|
||||
|
||||
CSVDistribution, err := buildDistribution(CSVSamples[1:], CSVFile.tool)
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
continue
|
||||
}
|
||||
|
||||
if err := writeCSV(CSVDistribution, filepath.Join(outDir, CSVFile.out)); err != nil {
|
||||
log.Println(err)
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func buildDistribution(CSVSamples [][]string, tool string) ([][]string, error) {
|
||||
count := make(map[string]int, len(distributionIntervals))
|
||||
for _, row := range CSVSamples {
|
||||
if row[1] != getLanguageFunc {
|
||||
continue
|
||||
}
|
||||
|
||||
num, err := strconv.ParseFloat(row[len(row)-1], 64)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
arrangeByTime(count, num)
|
||||
}
|
||||
|
||||
CSVDistribution := make([][]string, 0, len(count)+1)
|
||||
firstLine := []string{"timeInterval", tool, "numberOfFiles"}
|
||||
CSVDistribution = append(CSVDistribution, firstLine)
|
||||
for _, interval := range distributionIntervals {
|
||||
number := strconv.FormatInt(int64(count[interval]), 10)
|
||||
row := []string{interval, tool, number}
|
||||
CSVDistribution = append(CSVDistribution, row)
|
||||
}
|
||||
|
||||
printDistributionInfo(count, tool)
|
||||
return CSVDistribution, nil
|
||||
}
|
||||
|
||||
func printDistributionInfo(count map[string]int, tool string) {
|
||||
total := 0
|
||||
for _, v := range count {
|
||||
total += v
|
||||
}
|
||||
|
||||
fmt.Println(tool, "files", total)
|
||||
fmt.Println("Distribution")
|
||||
for _, interval := range distributionIntervals {
|
||||
fmt.Println("\t", interval, count[interval])
|
||||
}
|
||||
|
||||
fmt.Println("Percentage")
|
||||
for _, interval := range distributionIntervals {
|
||||
p := (float64(count[interval]) / float64(total)) * 100.00
|
||||
fmt.Printf("\t %s %f%%\n", interval, p)
|
||||
}
|
||||
|
||||
fmt.Printf("\n\n")
|
||||
}
|
||||
|
||||
func arrangeByTime(count map[string]int, num float64) {
|
||||
switch {
|
||||
case num > 1000.00 && num <= 10000.00:
|
||||
count[distributionIntervals[0]]++
|
||||
case num > 10000.00 && num <= 100000.00:
|
||||
count[distributionIntervals[1]]++
|
||||
case num > 100000.00 && num <= 1000000.00:
|
||||
count[distributionIntervals[2]]++
|
||||
case num > 1000000.00 && num <= 10000000.00:
|
||||
count[distributionIntervals[3]]++
|
||||
case num > 10000000.00 && num <= 100000000.00:
|
||||
count[distributionIntervals[4]]++
|
||||
}
|
||||
}
|
||||
|
||||
func writeCSV(CSVData [][]string, outPath string) error {
|
||||
out, err := os.Create(outPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
w := csv.NewWriter(out)
|
||||
w.WriteAll(CSVData)
|
||||
|
||||
if err := w.Error(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
type parse func(data []byte, tool string) ([][]string, error)
|
||||
|
||||
func generateCSV() {
|
||||
bmFiles := []struct {
|
||||
in string
|
||||
out string
|
||||
tool string
|
||||
parse parse
|
||||
}{
|
||||
{in: enryTotalBench, out: enryTotalCSV, tool: "enry", parse: parseTotal},
|
||||
{in: linguistTotalBench, out: linguistTotalCSV, tool: "linguist", parse: parseTotal},
|
||||
{in: enrySamplesBench, out: enrySamplesCSV, tool: "enry", parse: parseSamples},
|
||||
{in: linguistSamplesBench, out: linguistSamplesCSV, tool: "linguist", parse: parseSamples},
|
||||
}
|
||||
|
||||
for _, bmFile := range bmFiles {
|
||||
buf, err := ioutil.ReadFile(bmFile.in)
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
continue
|
||||
}
|
||||
|
||||
info, err := bmFile.parse(buf, bmFile.tool)
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
continue
|
||||
}
|
||||
|
||||
if err := writeCSV(info, filepath.Join(outDir, bmFile.out)); err != nil {
|
||||
log.Println(err)
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func parseTotal(data []byte, tool string) ([][]string, error) {
|
||||
const totalLine = "_TOTAL"
|
||||
parsedInfo := map[string][]string{}
|
||||
buf := bufio.NewScanner(bytes.NewReader(data))
|
||||
for buf.Scan() {
|
||||
line := buf.Text()
|
||||
if strings.Contains(line, totalLine) {
|
||||
split := strings.Fields(line)
|
||||
row, err := getRow(split, tool)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
parsedInfo[row[0]] = row
|
||||
}
|
||||
}
|
||||
|
||||
if err := buf.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
firstLine := []string{"function", "tool", "iterations", "ns/op"}
|
||||
return prepareInfoForCSV(parsedInfo, firstLine), nil
|
||||
}
|
||||
|
||||
func getRow(line []string, tool string) ([]string, error) {
|
||||
row := make([]string, 0, 3)
|
||||
for _, function := range enryFunctions {
|
||||
if strings.Contains(line[0], function) {
|
||||
row = append(row, function)
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
row = append(row, tool)
|
||||
iterations := line[1]
|
||||
row = append(row, iterations)
|
||||
|
||||
average, err := getAverage(line)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
||||
}
|
||||
|
||||
row = append(row, average)
|
||||
return row, nil
|
||||
}
|
||||
|
||||
func getAverage(line []string) (string, error) {
|
||||
average := line[len(line)-1]
|
||||
if !strings.HasSuffix(average, ")") {
|
||||
return line[2], nil
|
||||
}
|
||||
|
||||
totalTime := strings.Trim(average, "() ")
|
||||
time, err := strconv.ParseFloat(totalTime, 64)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
iterations := line[1]
|
||||
i, err := strconv.ParseFloat(iterations, 64)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
avg := (time * math.Pow10(9)) / i
|
||||
return fmt.Sprintf("%d", int(avg)), nil
|
||||
}
|
||||
|
||||
func prepareInfoForCSV(parsedInfo map[string][]string, firstLine []string) [][]string {
|
||||
info := createInfoWithFirstLine(firstLine, len(parsedInfo))
|
||||
for _, function := range enryFunctions {
|
||||
info = append(info, parsedInfo[function])
|
||||
}
|
||||
|
||||
return info
|
||||
}
|
||||
|
||||
func createInfoWithFirstLine(firstLine []string, sliceLength int) (info [][]string) {
|
||||
if len(firstLine) > 0 {
|
||||
info = make([][]string, 0, sliceLength+1)
|
||||
info = append(info, firstLine)
|
||||
} else {
|
||||
info = make([][]string, 0, sliceLength)
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
type enryFuncs map[string][]string
|
||||
|
||||
func newEnryFuncs() enryFuncs {
|
||||
return enryFuncs{
|
||||
getLanguageFunc: nil,
|
||||
classifyFunc: nil,
|
||||
modelineFunc: nil,
|
||||
filenameFunc: nil,
|
||||
shebangFunc: nil,
|
||||
extensionFunc: nil,
|
||||
contentFunc: nil,
|
||||
}
|
||||
}
|
||||
|
||||
func parseSamples(data []byte, tool string) ([][]string, error) {
|
||||
const sampleLine = "SAMPLE_"
|
||||
parsedInfo := map[string]enryFuncs{}
|
||||
buf := bufio.NewScanner(bytes.NewReader(data))
|
||||
for buf.Scan() {
|
||||
line := buf.Text()
|
||||
if strings.Contains(line, sampleLine) {
|
||||
split := strings.Fields(line)
|
||||
name := getSampleName(split[0])
|
||||
if _, ok := parsedInfo[name]; !ok {
|
||||
parsedInfo[name] = newEnryFuncs()
|
||||
}
|
||||
|
||||
row := make([]string, 0, 4)
|
||||
row = append(row, name)
|
||||
r, err := getRow(split, tool)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
row = append(row, r...)
|
||||
function := row[1]
|
||||
parsedInfo[name][function] = row
|
||||
}
|
||||
}
|
||||
|
||||
if err := buf.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
firstLine := []string{"file", "function", "tool", "iterations", "ns/op"}
|
||||
return prepareSamplesInfoForCSV(parsedInfo, firstLine), nil
|
||||
}
|
||||
|
||||
func getSampleName(s string) string {
|
||||
start := strings.Index(s, "SAMPLE_") + len("SAMPLE_")
|
||||
suffix := fmt.Sprintf("-%d", runtime.GOMAXPROCS(-1))
|
||||
name := strings.TrimSuffix(s[start:], suffix)
|
||||
return name
|
||||
}
|
||||
|
||||
func prepareSamplesInfoForCSV(parsedInfo map[string]enryFuncs, firstLine []string) [][]string {
|
||||
info := createInfoWithFirstLine(firstLine, len(parsedInfo)*len(enryFunctions))
|
||||
orderedKeys := sortKeys(parsedInfo)
|
||||
for _, path := range orderedKeys {
|
||||
sampleInfo := prepareInfoForCSV(parsedInfo[path], nil)
|
||||
info = append(info, sampleInfo...)
|
||||
}
|
||||
|
||||
return info
|
||||
}
|
||||
|
||||
func sortKeys(parsedInfo map[string]enryFuncs) []string {
|
||||
keys := make([]string, 0, len(parsedInfo))
|
||||
for key := range parsedInfo {
|
||||
keys = append(keys, key)
|
||||
}
|
||||
|
||||
sort.Strings(keys)
|
||||
return keys
|
||||
}
|
21
go-enry/benchmarks/plot-histogram.gp
Executable file
21
go-enry/benchmarks/plot-histogram.gp
Executable file
@ -0,0 +1,21 @@
|
||||
#!/usr/bin/env gnuplot
|
||||
|
||||
set terminal png large font "arial,26" size 1920,1080
|
||||
set output 'benchmarks/histogram/distribution.png'
|
||||
|
||||
set datafile separator comma
|
||||
set key under
|
||||
|
||||
set style data histogram
|
||||
set style histogram clustered gap 1 title offset 1,1
|
||||
set style fill solid noborder
|
||||
set boxwidth 0.95
|
||||
set grid y
|
||||
set bmargin 12
|
||||
set autoscale
|
||||
set title "Number of files per processing time"
|
||||
|
||||
plot newhistogram, 'benchmarks/csv/enry-distribution.csv' using 3:xtic(1) title "enry", 'benchmarks/csv/linguist-distribution.csv' using 3 title "linguist"
|
||||
|
||||
unset output
|
||||
|
6
go-enry/benchmarks/run-benchmarks.sh
Executable file
6
go-enry/benchmarks/run-benchmarks.sh
Executable file
@ -0,0 +1,6 @@
|
||||
#!/usr/bin/env bash
|
||||
set -e
|
||||
|
||||
mkdir -p benchmarks/output
|
||||
go test -run NONE -bench=. -benchtime=120s -timeout=100h > benchmarks/output/enry_total.bench
|
||||
benchmarks/linguist-total.rb 5 > benchmarks/output/linguist_total.bench
|
7
go-enry/benchmarks/run.sh
Executable file
7
go-enry/benchmarks/run.sh
Executable file
@ -0,0 +1,7 @@
|
||||
#!/usr/bin/env bash
|
||||
set -e
|
||||
|
||||
benchmarks/run-benchmarks.sh
|
||||
make benchmarks-slow
|
||||
benchmarks/parse.sh
|
||||
benchmarks/plot-histogram.gp
|
9
go-enry/benchmarks/soft-hard-info.txt
Normal file
9
go-enry/benchmarks/soft-hard-info.txt
Normal file
@ -0,0 +1,9 @@
|
||||
# Hardware and software used to run benchmarks
|
||||
|
||||
MacBookPro13,1
|
||||
Darwin Kernel Version 16.7.0: Tue Jan 30 11:27:06 PST 2018; root:xnu-3789.73.11~1/RELEASE_X86_64 x86_64 i386
|
||||
go version go1.10.3 darwin/amd64
|
||||
ruby 2.4.1p111 (2017-03-22 revision 58053) [x86_64-darwin16]
|
||||
|
||||
github/linguist v7.1.3 commit: e761f9b013e5b61161481fcb898b59721ee40e3d
|
||||
src-d/enry v1.6.7 commit: 3d356c70ae322f41048f74d01c5e8572f5898d34
|
Reference in New Issue
Block a user