Merge pull request #184 from bzz/maintenance/update-benchmark

Update benchmarks to latest Enry and Github-Linguist
This commit is contained in:
Alexander 2018-12-28 12:03:17 +01:00 committed by GitHub
commit f28fc12300
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 28117 additions and 25770 deletions

View File

@ -217,13 +217,27 @@ Golang's regexp engine being slower than Ruby's, which uses the [oniguruma](http
You can find scripts and additional information (like software and hardware used
and benchmarks' results per sample file) in [*benchmarks*](https://github.com/src-d/enry/blob/master/benchmarks) directory.
If you want to reproduce the same benchmarks you can run:
benchmarks/run.sh
### Benchmark Dependencies
As benchmarks depend on Ruby and Github-Linguist gem make sure you have:
- Ruby (e.g using [`rbenv`](https://github.com/rbenv/rbenv)), [`bundler`](https://bundler.io/) installed
- Docker
- [native dependencies](https://github.com/github/linguist/#dependencies) installed
- Build the gem `cd .linguist && bundle install && rake build_gem && cd -`
- Install it `gem install --no-rdoc --no-ri --local .linguist/github-linguist-*.gem`
from the root's project directory and it'll run benchmarks for enry and linguist, parse the output, create csv files and create a histogram (you must have installed [gnuplot](http://gnuplot.info) in your system to get the histogram).
This can take some time, so to run local benchmarks for a quick check you can either:
### How to reproduce current results
If you want to reproduce the same benchmarks as reported above:
- Make sure all [dependencies](#benchmark-dependencies) are installed
- Install [gnuplot](http://gnuplot.info) (in order to plot the histogram)
- Run `ENRY_TEST_REPO=.linguist benchmarks/run.sh` (takes ~15h)
It will run the benchmarks for enry and linguist, parse the output, create csv files and plot the histogram. This takes some time.
### Quick
To run quicker benchmarks you can either:
make benchmarks
@ -231,7 +245,7 @@ to get average times for the main detection function and strategies for the whol
make benchmarks-samples
if you want to see measures by sample file.
if you want to see measures per sample file.
Why Enry?

View File

@ -2,6 +2,7 @@ package enry
import (
"flag"
"fmt"
"io/ioutil"
"log"
"os"
@ -110,11 +111,9 @@ func getSamples(dir string) ([]*sample, error) {
filename: path,
content: content,
}
samples = append(samples, s)
return nil
})
return samples, err
}
@ -157,17 +156,7 @@ func BenchmarkStrategiesTotal(b *testing.B) {
b.SkipNow()
}
benchmarks := []struct {
name string
strategy Strategy
candidates []string
}{
{name: "GetLanguagesByModeline()_TOTAL", strategy: GetLanguagesByModeline},
{name: "GetLanguagesByFilename()_TOTAL", strategy: GetLanguagesByFilename},
{name: "GetLanguagesByShebang()_TOTAL", strategy: GetLanguagesByShebang},
{name: "GetLanguagesByExtension()_TOTAL", strategy: GetLanguagesByExtension},
{name: "GetLanguagesByContent()_TOTAL", strategy: GetLanguagesByContent},
}
benchmarks := benchmarkForAllStrategies("TOTAL")
var o []string
for _, benchmark := range benchmarks {
@ -222,17 +211,7 @@ func BenchmarkStrategiesPerSample(b *testing.B) {
b.SkipNow()
}
benchmarks := []struct {
name string
strategy Strategy
candidates []string
}{
{name: "GetLanguagesByModeline()_SAMPLE_", strategy: GetLanguagesByModeline},
{name: "GetLanguagesByFilename()_SAMPLE_", strategy: GetLanguagesByFilename},
{name: "GetLanguagesByShebang()_SAMPLE_", strategy: GetLanguagesByShebang},
{name: "GetLanguagesByExtension()_SAMPLE_", strategy: GetLanguagesByExtension},
{name: "GetLanguagesByContent()_SAMPLE_", strategy: GetLanguagesByContent},
}
benchmarks := benchmarkForAllStrategies("SAMPLE")
var o []string
for _, benchmark := range benchmarks {
@ -247,3 +226,19 @@ func BenchmarkStrategiesPerSample(b *testing.B) {
}
}
}
type strategyName struct {
name string
strategy Strategy
candidates []string
}
func benchmarkForAllStrategies(class string) []strategyName {
return []strategyName{
{name: fmt.Sprintf("GetLanguagesByModeline()_%s_", class), strategy: GetLanguagesByModeline},
{name: fmt.Sprintf("GetLanguagesByFilename()_%s_", class), strategy: GetLanguagesByFilename},
{name: fmt.Sprintf("GetLanguagesByShebang()_%s_", class), strategy: GetLanguagesByShebang},
{name: fmt.Sprintf("GetLanguagesByExtension()_%s_", class), strategy: GetLanguagesByExtension},
{name: fmt.Sprintf("GetLanguagesByContent()_%s_", class), strategy: GetLanguagesByContent},
}
}

View File

@ -1,6 +1,6 @@
timeInterval,enry,numberOfFiles
1us-10us,enry,96
10us-100us,enry,1244
100us-1ms,enry,321
1ms-10ms,enry,135
10ms-100ms,enry,43
1us-10us,enry,83
10us-100us,enry,1341
100us-1ms,enry,314
1ms-10ms,enry,146
10ms-100ms,enry,48

1 timeInterval enry numberOfFiles
2 1us-10us enry 96 83
3 10us-100us enry 1244 1341
4 100us-1ms enry 321 314
5 1ms-10ms enry 135 146
6 10ms-100ms enry 43 48

File diff suppressed because it is too large Load Diff

View File

@ -1,8 +1,8 @@
function,tool,iterations,ns/op
GetLanguage(),enry,100,1915861259
Classify(),enry,5,39977943775
GetLanguagesByModeline(),enry,1000,196571071
GetLanguagesByFilename(),enry,2000000,89774
GetLanguagesByShebang(),enry,100000,1892569
GetLanguagesByExtension(),enry,200000,921160
GetLanguagesByContent(),enry,1000,286159159
GetLanguage(),enry,100,2333748307
Classify(),enry,3,53842505853
GetLanguagesByModeline(),enry,1000,228234491
GetLanguagesByFilename(),enry,1000000,124782
GetLanguagesByShebang(),enry,100000,2339138
GetLanguagesByExtension(),enry,200000,1110007
GetLanguagesByContent(),enry,500,342358978

1 function tool iterations ns/op
2 GetLanguage() enry 100 1915861259 2333748307
3 Classify() enry 5 3 39977943775 53842505853
4 GetLanguagesByModeline() enry 1000 196571071 228234491
5 GetLanguagesByFilename() enry 2000000 1000000 89774 124782
6 GetLanguagesByShebang() enry 100000 1892569 2339138
7 GetLanguagesByExtension() enry 200000 921160 1110007
8 GetLanguagesByContent() enry 1000 500 286159159 342358978

View File

@ -1,6 +1,6 @@
timeInterval,linguist,numberOfFiles
1us-10us,linguist,0
10us-100us,linguist,74
100us-1ms,linguist,920
1ms-10ms,linguist,788
10ms-100ms,linguist,57
10us-100us,linguist,120
100us-1ms,linguist,1070
1ms-10ms,linguist,816
10ms-100ms,linguist,71

1 timeInterval linguist numberOfFiles
2 1us-10us linguist 0
3 10us-100us linguist 74 120
4 100us-1ms linguist 920 1070
5 1ms-10ms linguist 788 816
6 10ms-100ms linguist 57 71

File diff suppressed because it is too large Load Diff

View File

@ -1,8 +1,8 @@
function,tool,iterations,ns/op
GetLanguage(),linguist,5,3979096800
Classify(),linguist,5,178253431800
GetLanguagesByModeline(),linguist,5,2582204000
GetLanguagesByFilename(),linguist,5,2688800
GetLanguagesByShebang(),linguist,5,77155200
GetLanguagesByExtension(),linguist,5,6688800
GetLanguagesByContent(),linguist,5,161719000
GetLanguage(),linguist,5,3822076000
Classify(),linguist,5,329660597600
GetLanguagesByModeline(),linguist,5,2770912600
GetLanguagesByFilename(),linguist,5,34159000
GetLanguagesByShebang(),linguist,5,159317200
GetLanguagesByExtension(),linguist,5,354929800
GetLanguagesByContent(),linguist,5,3881611000

1 function tool iterations ns/op
2 GetLanguage() linguist 5 3979096800 3822076000
3 Classify() linguist 5 178253431800 329660597600
4 GetLanguagesByModeline() linguist 5 2582204000 2770912600
5 GetLanguagesByFilename() linguist 5 2688800 34159000
6 GetLanguagesByShebang() linguist 5 77155200 159317200
7 GetLanguagesByExtension() linguist 5 6688800 354929800
8 GetLanguagesByContent() linguist 5 161719000 3881611000

Binary file not shown.

Before

Width:  |  Height:  |  Size: 17 KiB

After

Width:  |  Height:  |  Size: 16 KiB

View File

@ -1,9 +1,9 @@
# Hardware and software used to run benchmarks
Dell XPS 9360
Linux 4.11.6-3-ARCH #1 SMP PREEMPT Thu Jun 22 12:21:46 CEST 2017 x86_64
go version go1.8.3 linux/amd64
ruby 2.4.1p111 (2017-03-22 revision 58053) [x86_64-linux]
github/linguist/samples commit: d5c8db3fb91963c4b2762ca2ea2ff7cfac109f68
MacBookPro13,1
Darwin Kernel Version 16.7.0: Tue Jan 30 11:27:06 PST 2018; root:xnu-3789.73.11~1/RELEASE_X86_64 x86_64 i386
go version go1.10.3 darwin/amd64
ruby 2.4.1p111 (2017-03-22 revision 58053) [x86_64-darwin16]
github/linguist v7.1.3 commit: e761f9b013e5b61161481fcb898b59721ee40e3d
src-d/enry v1.6.7 commit: 3d356c70ae322f41048f74d01c5e8572f5898d34