Merge pull request #184 from bzz/maintenance/update-benchmark

Update benchmarks to latest Enry and Github-Linguist
This commit is contained in:
Alexander 2018-12-28 12:03:17 +01:00 committed by GitHub
commit f28fc12300
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 28117 additions and 25770 deletions

View File

@ -217,13 +217,27 @@ Golang's regexp engine being slower than Ruby's, which uses the [oniguruma](http
You can find scripts and additional information (like software and hardware used You can find scripts and additional information (like software and hardware used
and benchmarks' results per sample file) in [*benchmarks*](https://github.com/src-d/enry/blob/master/benchmarks) directory. and benchmarks' results per sample file) in [*benchmarks*](https://github.com/src-d/enry/blob/master/benchmarks) directory.
If you want to reproduce the same benchmarks you can run:
benchmarks/run.sh ### Benchmark Dependencies
As benchmarks depend on Ruby and Github-Linguist gem make sure you have:
- Ruby (e.g using [`rbenv`](https://github.com/rbenv/rbenv)), [`bundler`](https://bundler.io/) installed
- Docker
- [native dependencies](https://github.com/github/linguist/#dependencies) installed
- Build the gem `cd .linguist && bundle install && rake build_gem && cd -`
- Install it `gem install --no-rdoc --no-ri --local .linguist/github-linguist-*.gem`
from the root's project directory and it'll run benchmarks for enry and linguist, parse the output, create csv files and create a histogram (you must have installed [gnuplot](http://gnuplot.info) in your system to get the histogram).
This can take some time, so to run local benchmarks for a quick check you can either: ### How to reproduce current results
If you want to reproduce the same benchmarks as reported above:
- Make sure all [dependencies](#benchmark-dependencies) are installed
- Install [gnuplot](http://gnuplot.info) (in order to plot the histogram)
- Run `ENRY_TEST_REPO=.linguist benchmarks/run.sh` (takes ~15h)
It will run the benchmarks for enry and linguist, parse the output, create csv files and plot the histogram. This takes some time.
### Quick
To run quicker benchmarks you can either:
make benchmarks make benchmarks
@ -231,7 +245,7 @@ to get average times for the main detection function and strategies for the whol
make benchmarks-samples make benchmarks-samples
if you want to see measures by sample file. if you want to see measures per sample file.
Why Enry? Why Enry?

View File

@ -2,6 +2,7 @@ package enry
import ( import (
"flag" "flag"
"fmt"
"io/ioutil" "io/ioutil"
"log" "log"
"os" "os"
@ -110,11 +111,9 @@ func getSamples(dir string) ([]*sample, error) {
filename: path, filename: path,
content: content, content: content,
} }
samples = append(samples, s) samples = append(samples, s)
return nil return nil
}) })
return samples, err return samples, err
} }
@ -157,17 +156,7 @@ func BenchmarkStrategiesTotal(b *testing.B) {
b.SkipNow() b.SkipNow()
} }
benchmarks := []struct { benchmarks := benchmarkForAllStrategies("TOTAL")
name string
strategy Strategy
candidates []string
}{
{name: "GetLanguagesByModeline()_TOTAL", strategy: GetLanguagesByModeline},
{name: "GetLanguagesByFilename()_TOTAL", strategy: GetLanguagesByFilename},
{name: "GetLanguagesByShebang()_TOTAL", strategy: GetLanguagesByShebang},
{name: "GetLanguagesByExtension()_TOTAL", strategy: GetLanguagesByExtension},
{name: "GetLanguagesByContent()_TOTAL", strategy: GetLanguagesByContent},
}
var o []string var o []string
for _, benchmark := range benchmarks { for _, benchmark := range benchmarks {
@ -222,17 +211,7 @@ func BenchmarkStrategiesPerSample(b *testing.B) {
b.SkipNow() b.SkipNow()
} }
benchmarks := []struct { benchmarks := benchmarkForAllStrategies("SAMPLE")
name string
strategy Strategy
candidates []string
}{
{name: "GetLanguagesByModeline()_SAMPLE_", strategy: GetLanguagesByModeline},
{name: "GetLanguagesByFilename()_SAMPLE_", strategy: GetLanguagesByFilename},
{name: "GetLanguagesByShebang()_SAMPLE_", strategy: GetLanguagesByShebang},
{name: "GetLanguagesByExtension()_SAMPLE_", strategy: GetLanguagesByExtension},
{name: "GetLanguagesByContent()_SAMPLE_", strategy: GetLanguagesByContent},
}
var o []string var o []string
for _, benchmark := range benchmarks { for _, benchmark := range benchmarks {
@ -247,3 +226,19 @@ func BenchmarkStrategiesPerSample(b *testing.B) {
} }
} }
} }
type strategyName struct {
name string
strategy Strategy
candidates []string
}
func benchmarkForAllStrategies(class string) []strategyName {
return []strategyName{
{name: fmt.Sprintf("GetLanguagesByModeline()_%s_", class), strategy: GetLanguagesByModeline},
{name: fmt.Sprintf("GetLanguagesByFilename()_%s_", class), strategy: GetLanguagesByFilename},
{name: fmt.Sprintf("GetLanguagesByShebang()_%s_", class), strategy: GetLanguagesByShebang},
{name: fmt.Sprintf("GetLanguagesByExtension()_%s_", class), strategy: GetLanguagesByExtension},
{name: fmt.Sprintf("GetLanguagesByContent()_%s_", class), strategy: GetLanguagesByContent},
}
}

View File

@ -1,6 +1,6 @@
timeInterval,enry,numberOfFiles timeInterval,enry,numberOfFiles
1us-10us,enry,96 1us-10us,enry,83
10us-100us,enry,1244 10us-100us,enry,1341
100us-1ms,enry,321 100us-1ms,enry,314
1ms-10ms,enry,135 1ms-10ms,enry,146
10ms-100ms,enry,43 10ms-100ms,enry,48

1 timeInterval enry numberOfFiles
2 1us-10us enry 96 83
3 10us-100us enry 1244 1341
4 100us-1ms enry 321 314
5 1ms-10ms enry 135 146
6 10ms-100ms enry 43 48

File diff suppressed because it is too large Load Diff

View File

@ -1,8 +1,8 @@
function,tool,iterations,ns/op function,tool,iterations,ns/op
GetLanguage(),enry,100,1915861259 GetLanguage(),enry,100,2333748307
Classify(),enry,5,39977943775 Classify(),enry,3,53842505853
GetLanguagesByModeline(),enry,1000,196571071 GetLanguagesByModeline(),enry,1000,228234491
GetLanguagesByFilename(),enry,2000000,89774 GetLanguagesByFilename(),enry,1000000,124782
GetLanguagesByShebang(),enry,100000,1892569 GetLanguagesByShebang(),enry,100000,2339138
GetLanguagesByExtension(),enry,200000,921160 GetLanguagesByExtension(),enry,200000,1110007
GetLanguagesByContent(),enry,1000,286159159 GetLanguagesByContent(),enry,500,342358978

1 function tool iterations ns/op
2 GetLanguage() enry 100 1915861259 2333748307
3 Classify() enry 5 3 39977943775 53842505853
4 GetLanguagesByModeline() enry 1000 196571071 228234491
5 GetLanguagesByFilename() enry 2000000 1000000 89774 124782
6 GetLanguagesByShebang() enry 100000 1892569 2339138
7 GetLanguagesByExtension() enry 200000 921160 1110007
8 GetLanguagesByContent() enry 1000 500 286159159 342358978

View File

@ -1,6 +1,6 @@
timeInterval,linguist,numberOfFiles timeInterval,linguist,numberOfFiles
1us-10us,linguist,0 1us-10us,linguist,0
10us-100us,linguist,74 10us-100us,linguist,120
100us-1ms,linguist,920 100us-1ms,linguist,1070
1ms-10ms,linguist,788 1ms-10ms,linguist,816
10ms-100ms,linguist,57 10ms-100ms,linguist,71

1 timeInterval linguist numberOfFiles
2 1us-10us linguist 0
3 10us-100us linguist 74 120
4 100us-1ms linguist 920 1070
5 1ms-10ms linguist 788 816
6 10ms-100ms linguist 57 71

File diff suppressed because it is too large Load Diff

View File

@ -1,8 +1,8 @@
function,tool,iterations,ns/op function,tool,iterations,ns/op
GetLanguage(),linguist,5,3979096800 GetLanguage(),linguist,5,3822076000
Classify(),linguist,5,178253431800 Classify(),linguist,5,329660597600
GetLanguagesByModeline(),linguist,5,2582204000 GetLanguagesByModeline(),linguist,5,2770912600
GetLanguagesByFilename(),linguist,5,2688800 GetLanguagesByFilename(),linguist,5,34159000
GetLanguagesByShebang(),linguist,5,77155200 GetLanguagesByShebang(),linguist,5,159317200
GetLanguagesByExtension(),linguist,5,6688800 GetLanguagesByExtension(),linguist,5,354929800
GetLanguagesByContent(),linguist,5,161719000 GetLanguagesByContent(),linguist,5,3881611000

1 function tool iterations ns/op
2 GetLanguage() linguist 5 3979096800 3822076000
3 Classify() linguist 5 178253431800 329660597600
4 GetLanguagesByModeline() linguist 5 2582204000 2770912600
5 GetLanguagesByFilename() linguist 5 2688800 34159000
6 GetLanguagesByShebang() linguist 5 77155200 159317200
7 GetLanguagesByExtension() linguist 5 6688800 354929800
8 GetLanguagesByContent() linguist 5 161719000 3881611000

Binary file not shown.

Before

Width:  |  Height:  |  Size: 17 KiB

After

Width:  |  Height:  |  Size: 16 KiB

View File

@ -1,9 +1,9 @@
# Hardware and software used to run benchmarks # Hardware and software used to run benchmarks
Dell XPS 9360 MacBookPro13,1
Linux 4.11.6-3-ARCH #1 SMP PREEMPT Thu Jun 22 12:21:46 CEST 2017 x86_64 Darwin Kernel Version 16.7.0: Tue Jan 30 11:27:06 PST 2018; root:xnu-3789.73.11~1/RELEASE_X86_64 x86_64 i386
go version go1.8.3 linux/amd64 go version go1.10.3 darwin/amd64
ruby 2.4.1p111 (2017-03-22 revision 58053) [x86_64-linux] ruby 2.4.1p111 (2017-03-22 revision 58053) [x86_64-darwin16]
github/linguist/samples commit: d5c8db3fb91963c4b2762ca2ea2ff7cfac109f68
github/linguist v7.1.3 commit: e761f9b013e5b61161481fcb898b59721ee40e3d
src-d/enry v1.6.7 commit: 3d356c70ae322f41048f74d01c5e8572f5898d34