mirror of
https://github.com/ralsina/tartrazine.git
synced 2024-11-10 13:32:24 +00:00
Merge pull request #184 from bzz/maintenance/update-benchmark
Update benchmarks to latest Enry and Github-Linguist
This commit is contained in:
commit
f28fc12300
24
README.md
24
README.md
@ -217,13 +217,27 @@ Golang's regexp engine being slower than Ruby's, which uses the [oniguruma](http
|
||||
You can find scripts and additional information (like software and hardware used
|
||||
and benchmarks' results per sample file) in [*benchmarks*](https://github.com/src-d/enry/blob/master/benchmarks) directory.
|
||||
|
||||
If you want to reproduce the same benchmarks you can run:
|
||||
|
||||
benchmarks/run.sh
|
||||
### Benchmark Dependencies
|
||||
As benchmarks depend on Ruby and Github-Linguist gem make sure you have:
|
||||
- Ruby (e.g using [`rbenv`](https://github.com/rbenv/rbenv)), [`bundler`](https://bundler.io/) installed
|
||||
- Docker
|
||||
- [native dependencies](https://github.com/github/linguist/#dependencies) installed
|
||||
- Build the gem `cd .linguist && bundle install && rake build_gem && cd -`
|
||||
- Install it `gem install --no-rdoc --no-ri --local .linguist/github-linguist-*.gem`
|
||||
|
||||
from the root's project directory and it'll run benchmarks for enry and linguist, parse the output, create csv files and create a histogram (you must have installed [gnuplot](http://gnuplot.info) in your system to get the histogram).
|
||||
|
||||
This can take some time, so to run local benchmarks for a quick check you can either:
|
||||
### How to reproduce current results
|
||||
|
||||
If you want to reproduce the same benchmarks as reported above:
|
||||
- Make sure all [dependencies](#benchmark-dependencies) are installed
|
||||
- Install [gnuplot](http://gnuplot.info) (in order to plot the histogram)
|
||||
- Run `ENRY_TEST_REPO=.linguist benchmarks/run.sh` (takes ~15h)
|
||||
|
||||
It will run the benchmarks for enry and linguist, parse the output, create csv files and plot the histogram. This takes some time.
|
||||
|
||||
### Quick
|
||||
To run quicker benchmarks you can either:
|
||||
|
||||
make benchmarks
|
||||
|
||||
@ -231,7 +245,7 @@ to get average times for the main detection function and strategies for the whol
|
||||
|
||||
make benchmarks-samples
|
||||
|
||||
if you want to see measures by sample file.
|
||||
if you want to see measures per sample file.
|
||||
|
||||
|
||||
Why Enry?
|
||||
|
@ -2,6 +2,7 @@ package enry
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"os"
|
||||
@ -110,11 +111,9 @@ func getSamples(dir string) ([]*sample, error) {
|
||||
filename: path,
|
||||
content: content,
|
||||
}
|
||||
|
||||
samples = append(samples, s)
|
||||
return nil
|
||||
})
|
||||
|
||||
return samples, err
|
||||
}
|
||||
|
||||
@ -157,17 +156,7 @@ func BenchmarkStrategiesTotal(b *testing.B) {
|
||||
b.SkipNow()
|
||||
}
|
||||
|
||||
benchmarks := []struct {
|
||||
name string
|
||||
strategy Strategy
|
||||
candidates []string
|
||||
}{
|
||||
{name: "GetLanguagesByModeline()_TOTAL", strategy: GetLanguagesByModeline},
|
||||
{name: "GetLanguagesByFilename()_TOTAL", strategy: GetLanguagesByFilename},
|
||||
{name: "GetLanguagesByShebang()_TOTAL", strategy: GetLanguagesByShebang},
|
||||
{name: "GetLanguagesByExtension()_TOTAL", strategy: GetLanguagesByExtension},
|
||||
{name: "GetLanguagesByContent()_TOTAL", strategy: GetLanguagesByContent},
|
||||
}
|
||||
benchmarks := benchmarkForAllStrategies("TOTAL")
|
||||
|
||||
var o []string
|
||||
for _, benchmark := range benchmarks {
|
||||
@ -222,17 +211,7 @@ func BenchmarkStrategiesPerSample(b *testing.B) {
|
||||
b.SkipNow()
|
||||
}
|
||||
|
||||
benchmarks := []struct {
|
||||
name string
|
||||
strategy Strategy
|
||||
candidates []string
|
||||
}{
|
||||
{name: "GetLanguagesByModeline()_SAMPLE_", strategy: GetLanguagesByModeline},
|
||||
{name: "GetLanguagesByFilename()_SAMPLE_", strategy: GetLanguagesByFilename},
|
||||
{name: "GetLanguagesByShebang()_SAMPLE_", strategy: GetLanguagesByShebang},
|
||||
{name: "GetLanguagesByExtension()_SAMPLE_", strategy: GetLanguagesByExtension},
|
||||
{name: "GetLanguagesByContent()_SAMPLE_", strategy: GetLanguagesByContent},
|
||||
}
|
||||
benchmarks := benchmarkForAllStrategies("SAMPLE")
|
||||
|
||||
var o []string
|
||||
for _, benchmark := range benchmarks {
|
||||
@ -247,3 +226,19 @@ func BenchmarkStrategiesPerSample(b *testing.B) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type strategyName struct {
|
||||
name string
|
||||
strategy Strategy
|
||||
candidates []string
|
||||
}
|
||||
|
||||
func benchmarkForAllStrategies(class string) []strategyName {
|
||||
return []strategyName{
|
||||
{name: fmt.Sprintf("GetLanguagesByModeline()_%s_", class), strategy: GetLanguagesByModeline},
|
||||
{name: fmt.Sprintf("GetLanguagesByFilename()_%s_", class), strategy: GetLanguagesByFilename},
|
||||
{name: fmt.Sprintf("GetLanguagesByShebang()_%s_", class), strategy: GetLanguagesByShebang},
|
||||
{name: fmt.Sprintf("GetLanguagesByExtension()_%s_", class), strategy: GetLanguagesByExtension},
|
||||
{name: fmt.Sprintf("GetLanguagesByContent()_%s_", class), strategy: GetLanguagesByContent},
|
||||
}
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
timeInterval,enry,numberOfFiles
|
||||
1us-10us,enry,96
|
||||
10us-100us,enry,1244
|
||||
100us-1ms,enry,321
|
||||
1ms-10ms,enry,135
|
||||
10ms-100ms,enry,43
|
||||
1us-10us,enry,83
|
||||
10us-100us,enry,1341
|
||||
100us-1ms,enry,314
|
||||
1ms-10ms,enry,146
|
||||
10ms-100ms,enry,48
|
||||
|
|
File diff suppressed because it is too large
Load Diff
@ -1,8 +1,8 @@
|
||||
function,tool,iterations,ns/op
|
||||
GetLanguage(),enry,100,1915861259
|
||||
Classify(),enry,5,39977943775
|
||||
GetLanguagesByModeline(),enry,1000,196571071
|
||||
GetLanguagesByFilename(),enry,2000000,89774
|
||||
GetLanguagesByShebang(),enry,100000,1892569
|
||||
GetLanguagesByExtension(),enry,200000,921160
|
||||
GetLanguagesByContent(),enry,1000,286159159
|
||||
GetLanguage(),enry,100,2333748307
|
||||
Classify(),enry,3,53842505853
|
||||
GetLanguagesByModeline(),enry,1000,228234491
|
||||
GetLanguagesByFilename(),enry,1000000,124782
|
||||
GetLanguagesByShebang(),enry,100000,2339138
|
||||
GetLanguagesByExtension(),enry,200000,1110007
|
||||
GetLanguagesByContent(),enry,500,342358978
|
||||
|
|
@ -1,6 +1,6 @@
|
||||
timeInterval,linguist,numberOfFiles
|
||||
1us-10us,linguist,0
|
||||
10us-100us,linguist,74
|
||||
100us-1ms,linguist,920
|
||||
1ms-10ms,linguist,788
|
||||
10ms-100ms,linguist,57
|
||||
10us-100us,linguist,120
|
||||
100us-1ms,linguist,1070
|
||||
1ms-10ms,linguist,816
|
||||
10ms-100ms,linguist,71
|
||||
|
|
File diff suppressed because it is too large
Load Diff
@ -1,8 +1,8 @@
|
||||
function,tool,iterations,ns/op
|
||||
GetLanguage(),linguist,5,3979096800
|
||||
Classify(),linguist,5,178253431800
|
||||
GetLanguagesByModeline(),linguist,5,2582204000
|
||||
GetLanguagesByFilename(),linguist,5,2688800
|
||||
GetLanguagesByShebang(),linguist,5,77155200
|
||||
GetLanguagesByExtension(),linguist,5,6688800
|
||||
GetLanguagesByContent(),linguist,5,161719000
|
||||
GetLanguage(),linguist,5,3822076000
|
||||
Classify(),linguist,5,329660597600
|
||||
GetLanguagesByModeline(),linguist,5,2770912600
|
||||
GetLanguagesByFilename(),linguist,5,34159000
|
||||
GetLanguagesByShebang(),linguist,5,159317200
|
||||
GetLanguagesByExtension(),linguist,5,354929800
|
||||
GetLanguagesByContent(),linguist,5,3881611000
|
||||
|
|
Binary file not shown.
Before Width: | Height: | Size: 17 KiB After Width: | Height: | Size: 16 KiB |
@ -1,9 +1,9 @@
|
||||
# Hardware and software used to run benchmarks
|
||||
|
||||
Dell XPS 9360
|
||||
Linux 4.11.6-3-ARCH #1 SMP PREEMPT Thu Jun 22 12:21:46 CEST 2017 x86_64
|
||||
go version go1.8.3 linux/amd64
|
||||
ruby 2.4.1p111 (2017-03-22 revision 58053) [x86_64-linux]
|
||||
|
||||
github/linguist/samples commit: d5c8db3fb91963c4b2762ca2ea2ff7cfac109f68
|
||||
MacBookPro13,1
|
||||
Darwin Kernel Version 16.7.0: Tue Jan 30 11:27:06 PST 2018; root:xnu-3789.73.11~1/RELEASE_X86_64 x86_64 i386
|
||||
go version go1.10.3 darwin/amd64
|
||||
ruby 2.4.1p111 (2017-03-22 revision 58053) [x86_64-darwin16]
|
||||
|
||||
github/linguist v7.1.3 commit: e761f9b013e5b61161481fcb898b59721ee40e3d
|
||||
src-d/enry v1.6.7 commit: 3d356c70ae322f41048f74d01c5e8572f5898d34
|
Loading…
Reference in New Issue
Block a user