mirror of
https://github.com/ralsina/tartrazine.git
synced 2024-11-13 23:12:24 +00:00
Merge pull request #184 from bzz/maintenance/update-benchmark
Update benchmarks to latest Enry and Github-Linguist
This commit is contained in:
commit
f28fc12300
24
README.md
24
README.md
@ -217,13 +217,27 @@ Golang's regexp engine being slower than Ruby's, which uses the [oniguruma](http
|
|||||||
You can find scripts and additional information (like software and hardware used
|
You can find scripts and additional information (like software and hardware used
|
||||||
and benchmarks' results per sample file) in [*benchmarks*](https://github.com/src-d/enry/blob/master/benchmarks) directory.
|
and benchmarks' results per sample file) in [*benchmarks*](https://github.com/src-d/enry/blob/master/benchmarks) directory.
|
||||||
|
|
||||||
If you want to reproduce the same benchmarks you can run:
|
|
||||||
|
|
||||||
benchmarks/run.sh
|
### Benchmark Dependencies
|
||||||
|
As benchmarks depend on Ruby and Github-Linguist gem make sure you have:
|
||||||
|
- Ruby (e.g using [`rbenv`](https://github.com/rbenv/rbenv)), [`bundler`](https://bundler.io/) installed
|
||||||
|
- Docker
|
||||||
|
- [native dependencies](https://github.com/github/linguist/#dependencies) installed
|
||||||
|
- Build the gem `cd .linguist && bundle install && rake build_gem && cd -`
|
||||||
|
- Install it `gem install --no-rdoc --no-ri --local .linguist/github-linguist-*.gem`
|
||||||
|
|
||||||
from the root's project directory and it'll run benchmarks for enry and linguist, parse the output, create csv files and create a histogram (you must have installed [gnuplot](http://gnuplot.info) in your system to get the histogram).
|
|
||||||
|
|
||||||
This can take some time, so to run local benchmarks for a quick check you can either:
|
### How to reproduce current results
|
||||||
|
|
||||||
|
If you want to reproduce the same benchmarks as reported above:
|
||||||
|
- Make sure all [dependencies](#benchmark-dependencies) are installed
|
||||||
|
- Install [gnuplot](http://gnuplot.info) (in order to plot the histogram)
|
||||||
|
- Run `ENRY_TEST_REPO=.linguist benchmarks/run.sh` (takes ~15h)
|
||||||
|
|
||||||
|
It will run the benchmarks for enry and linguist, parse the output, create csv files and plot the histogram. This takes some time.
|
||||||
|
|
||||||
|
### Quick
|
||||||
|
To run quicker benchmarks you can either:
|
||||||
|
|
||||||
make benchmarks
|
make benchmarks
|
||||||
|
|
||||||
@ -231,7 +245,7 @@ to get average times for the main detection function and strategies for the whol
|
|||||||
|
|
||||||
make benchmarks-samples
|
make benchmarks-samples
|
||||||
|
|
||||||
if you want to see measures by sample file.
|
if you want to see measures per sample file.
|
||||||
|
|
||||||
|
|
||||||
Why Enry?
|
Why Enry?
|
||||||
|
@ -2,6 +2,7 @@ package enry
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
|
"fmt"
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
"log"
|
"log"
|
||||||
"os"
|
"os"
|
||||||
@ -110,11 +111,9 @@ func getSamples(dir string) ([]*sample, error) {
|
|||||||
filename: path,
|
filename: path,
|
||||||
content: content,
|
content: content,
|
||||||
}
|
}
|
||||||
|
|
||||||
samples = append(samples, s)
|
samples = append(samples, s)
|
||||||
return nil
|
return nil
|
||||||
})
|
})
|
||||||
|
|
||||||
return samples, err
|
return samples, err
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -157,17 +156,7 @@ func BenchmarkStrategiesTotal(b *testing.B) {
|
|||||||
b.SkipNow()
|
b.SkipNow()
|
||||||
}
|
}
|
||||||
|
|
||||||
benchmarks := []struct {
|
benchmarks := benchmarkForAllStrategies("TOTAL")
|
||||||
name string
|
|
||||||
strategy Strategy
|
|
||||||
candidates []string
|
|
||||||
}{
|
|
||||||
{name: "GetLanguagesByModeline()_TOTAL", strategy: GetLanguagesByModeline},
|
|
||||||
{name: "GetLanguagesByFilename()_TOTAL", strategy: GetLanguagesByFilename},
|
|
||||||
{name: "GetLanguagesByShebang()_TOTAL", strategy: GetLanguagesByShebang},
|
|
||||||
{name: "GetLanguagesByExtension()_TOTAL", strategy: GetLanguagesByExtension},
|
|
||||||
{name: "GetLanguagesByContent()_TOTAL", strategy: GetLanguagesByContent},
|
|
||||||
}
|
|
||||||
|
|
||||||
var o []string
|
var o []string
|
||||||
for _, benchmark := range benchmarks {
|
for _, benchmark := range benchmarks {
|
||||||
@ -222,17 +211,7 @@ func BenchmarkStrategiesPerSample(b *testing.B) {
|
|||||||
b.SkipNow()
|
b.SkipNow()
|
||||||
}
|
}
|
||||||
|
|
||||||
benchmarks := []struct {
|
benchmarks := benchmarkForAllStrategies("SAMPLE")
|
||||||
name string
|
|
||||||
strategy Strategy
|
|
||||||
candidates []string
|
|
||||||
}{
|
|
||||||
{name: "GetLanguagesByModeline()_SAMPLE_", strategy: GetLanguagesByModeline},
|
|
||||||
{name: "GetLanguagesByFilename()_SAMPLE_", strategy: GetLanguagesByFilename},
|
|
||||||
{name: "GetLanguagesByShebang()_SAMPLE_", strategy: GetLanguagesByShebang},
|
|
||||||
{name: "GetLanguagesByExtension()_SAMPLE_", strategy: GetLanguagesByExtension},
|
|
||||||
{name: "GetLanguagesByContent()_SAMPLE_", strategy: GetLanguagesByContent},
|
|
||||||
}
|
|
||||||
|
|
||||||
var o []string
|
var o []string
|
||||||
for _, benchmark := range benchmarks {
|
for _, benchmark := range benchmarks {
|
||||||
@ -247,3 +226,19 @@ func BenchmarkStrategiesPerSample(b *testing.B) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type strategyName struct {
|
||||||
|
name string
|
||||||
|
strategy Strategy
|
||||||
|
candidates []string
|
||||||
|
}
|
||||||
|
|
||||||
|
func benchmarkForAllStrategies(class string) []strategyName {
|
||||||
|
return []strategyName{
|
||||||
|
{name: fmt.Sprintf("GetLanguagesByModeline()_%s_", class), strategy: GetLanguagesByModeline},
|
||||||
|
{name: fmt.Sprintf("GetLanguagesByFilename()_%s_", class), strategy: GetLanguagesByFilename},
|
||||||
|
{name: fmt.Sprintf("GetLanguagesByShebang()_%s_", class), strategy: GetLanguagesByShebang},
|
||||||
|
{name: fmt.Sprintf("GetLanguagesByExtension()_%s_", class), strategy: GetLanguagesByExtension},
|
||||||
|
{name: fmt.Sprintf("GetLanguagesByContent()_%s_", class), strategy: GetLanguagesByContent},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
timeInterval,enry,numberOfFiles
|
timeInterval,enry,numberOfFiles
|
||||||
1us-10us,enry,96
|
1us-10us,enry,83
|
||||||
10us-100us,enry,1244
|
10us-100us,enry,1341
|
||||||
100us-1ms,enry,321
|
100us-1ms,enry,314
|
||||||
1ms-10ms,enry,135
|
1ms-10ms,enry,146
|
||||||
10ms-100ms,enry,43
|
10ms-100ms,enry,48
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
@ -1,8 +1,8 @@
|
|||||||
function,tool,iterations,ns/op
|
function,tool,iterations,ns/op
|
||||||
GetLanguage(),enry,100,1915861259
|
GetLanguage(),enry,100,2333748307
|
||||||
Classify(),enry,5,39977943775
|
Classify(),enry,3,53842505853
|
||||||
GetLanguagesByModeline(),enry,1000,196571071
|
GetLanguagesByModeline(),enry,1000,228234491
|
||||||
GetLanguagesByFilename(),enry,2000000,89774
|
GetLanguagesByFilename(),enry,1000000,124782
|
||||||
GetLanguagesByShebang(),enry,100000,1892569
|
GetLanguagesByShebang(),enry,100000,2339138
|
||||||
GetLanguagesByExtension(),enry,200000,921160
|
GetLanguagesByExtension(),enry,200000,1110007
|
||||||
GetLanguagesByContent(),enry,1000,286159159
|
GetLanguagesByContent(),enry,500,342358978
|
||||||
|
|
@ -1,6 +1,6 @@
|
|||||||
timeInterval,linguist,numberOfFiles
|
timeInterval,linguist,numberOfFiles
|
||||||
1us-10us,linguist,0
|
1us-10us,linguist,0
|
||||||
10us-100us,linguist,74
|
10us-100us,linguist,120
|
||||||
100us-1ms,linguist,920
|
100us-1ms,linguist,1070
|
||||||
1ms-10ms,linguist,788
|
1ms-10ms,linguist,816
|
||||||
10ms-100ms,linguist,57
|
10ms-100ms,linguist,71
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
@ -1,8 +1,8 @@
|
|||||||
function,tool,iterations,ns/op
|
function,tool,iterations,ns/op
|
||||||
GetLanguage(),linguist,5,3979096800
|
GetLanguage(),linguist,5,3822076000
|
||||||
Classify(),linguist,5,178253431800
|
Classify(),linguist,5,329660597600
|
||||||
GetLanguagesByModeline(),linguist,5,2582204000
|
GetLanguagesByModeline(),linguist,5,2770912600
|
||||||
GetLanguagesByFilename(),linguist,5,2688800
|
GetLanguagesByFilename(),linguist,5,34159000
|
||||||
GetLanguagesByShebang(),linguist,5,77155200
|
GetLanguagesByShebang(),linguist,5,159317200
|
||||||
GetLanguagesByExtension(),linguist,5,6688800
|
GetLanguagesByExtension(),linguist,5,354929800
|
||||||
GetLanguagesByContent(),linguist,5,161719000
|
GetLanguagesByContent(),linguist,5,3881611000
|
||||||
|
|
Binary file not shown.
Before Width: | Height: | Size: 17 KiB After Width: | Height: | Size: 16 KiB |
@ -1,9 +1,9 @@
|
|||||||
# Hardware and software used to run benchmarks
|
# Hardware and software used to run benchmarks
|
||||||
|
|
||||||
Dell XPS 9360
|
MacBookPro13,1
|
||||||
Linux 4.11.6-3-ARCH #1 SMP PREEMPT Thu Jun 22 12:21:46 CEST 2017 x86_64
|
Darwin Kernel Version 16.7.0: Tue Jan 30 11:27:06 PST 2018; root:xnu-3789.73.11~1/RELEASE_X86_64 x86_64 i386
|
||||||
go version go1.8.3 linux/amd64
|
go version go1.10.3 darwin/amd64
|
||||||
ruby 2.4.1p111 (2017-03-22 revision 58053) [x86_64-linux]
|
ruby 2.4.1p111 (2017-03-22 revision 58053) [x86_64-darwin16]
|
||||||
|
|
||||||
github/linguist/samples commit: d5c8db3fb91963c4b2762ca2ea2ff7cfac109f68
|
|
||||||
|
|
||||||
|
github/linguist v7.1.3 commit: e761f9b013e5b61161481fcb898b59721ee40e3d
|
||||||
|
src-d/enry v1.6.7 commit: 3d356c70ae322f41048f74d01c5e8572f5898d34
|
Loading…
Reference in New Issue
Block a user