added benchmarks and scripts to run, parse and plot them

moved benchmark/run-slow-benchmarks.sh's content to Makefile
2025-07-13 04:39:46 +00:00 · 2017-06-28 13:01:36 +02:00
parent 2045abfa41
commit 8d91dc7be8
11 changed files with 872 additions and 18 deletions
--- a/10
+++ b/10
@ -32,6 +32,16 @@ code-generate: $(LINGUIST_PATH)
 	mkdir -p data
 	go run internal/code-generator/main.go
 benchmarks: $(LINGUIST_PATH)
 	go test -run=NONE -bench=. && benchmark/linguist-total.sh
 benchmarks-samples: $(LINGUIST_PATH)
 	go test -run=NONE -bench=. -benchtime=5us && benchmark/linguist-samples.rb
 benchmarks-slow: $(LINGUST_PATH)
 	go test -run=NONE -bench=. -slow -benchtime=100ms -timeout=100h >benchmark/output/enry_samples.bench && \
 	benchmark/linguist-samples.rb 5 >benchmark/output/linguist_samples.bench
 clean:
 	rm -rf $(LINGUIST_PATH)
--- a/benchmark/linguist-samples.rb
+++ b/benchmark/linguist-samples.rb
@ -0,0 +1,126 @@
 #!/usr/bin/env ruby
 require 'benchmark'
 require 'linguist'
 iterations = (ARGV[0] || 1).to_i
 # BenchBlob wraps a FileBlob to keep data loaded and to clean attributes added by language detection.
 class BenchBlob < Linguist::FileBlob
  attr_accessor :data
  def initialize(path, base_path = nil)
    super
    @data = File.read(@fullpath)
  end
  def clean
    @_mime_type = nil
    @detect_encoding = nil
    @lines = nil
  end
 end
 def get_samples(root)
  samples = Array.new
  Dir.foreach(root) do |file|
    path = File.join(root, file)
    if file == "." or file == ".."
      next
    elsif File.directory?(path)
      get_samples(path).each do |blob|
        samples << blob
      end
    else
      samples << BenchBlob.new(path)
    end
  end
  return samples
 end
 samples = get_samples('.linguist/samples')
 languages = Linguist::Language.all
 samples.each do |blob|
  sample_name = blob.path.gsub(/\s/, '_')
  Benchmark.bmbm do |bm|
    bm.report('GetLanguage()_SAMPLE_' + sample_name + ' ' + iterations.to_s) do
      iterations.times do
        Linguist::detect(blob)
        blob.clean
      end
    end
  end
 end
 samples.each do |blob|
  sample_name = blob.path.gsub(/\s/, '_')
  Benchmark.bmbm do |bm|
    bm.report('Classify()_SAMPLE_' + sample_name + ' ' + iterations.to_s) do
      iterations.times do
        Linguist::Classifier.classify(Linguist::Samples.cache, blob.data)
        blob.clean
      end
    end
  end
 end
 samples.each do |blob|
  sample_name = blob.path.gsub(/\s/, '_')
  Benchmark.bmbm do |bm|
    bm.report('GetLanguagesByModeline()_SAMPLE_' + sample_name + ' ' + iterations.to_s) do
      iterations.times do
        Linguist::Strategy::Modeline.call(blob, languages)
        blob.clean
      end
    end
  end
 end
 samples.each do |blob|
  sample_name = blob.path.gsub(/\s/, '_')
  Benchmark.bmbm do |bm|
    bm.report('GetLanguagesByFilename()_SAMPLE_' + sample_name + ' ' + iterations.to_s) do
    iterations.times do
        Linguist::Strategy::Filename.call(blob, languages)
        blob.clean
      end
    end
  end
 end
 samples.each do |blob|
  sample_name = blob.path.gsub(/\s/, '_')
  Benchmark.bmbm do |bm|
    bm.report('GetLanguagesByShebang()_SAMPLE_' + sample_name + ' ' + iterations.to_s) do
      iterations.times do
        Linguist::Shebang.call(blob, languages)
        blob.clean
      end
    end
  end
 end
 samples.each do |blob|
  sample_name = blob.path.gsub(/\s/, '_')
  Benchmark.bmbm do |bm|
    bm.report('GetLanguagesByExtension()_SAMPLE_' + sample_name + ' ' + iterations.to_s) do
      iterations.times do
        Linguist::Strategy::Extension.call(blob, languages)
        blob.clean
      end
    end
  end
 end
 samples.each do |blob|
  sample_name = blob.path.gsub(/\s/, '_')
  Benchmark.bmbm do |bm|
    bm.report('GetLanguagesByContent()_SAMPLE_' + sample_name + ' ' + iterations.to_s) do
    iterations.times do
        Linguist::Heuristics.call(blob, languages)
        blob.clean
      end
    end
  end
 end
--- a/benchmark/linguist-total.rb
+++ b/benchmark/linguist-total.rb
@ -0,0 +1,120 @@
 #!/usr/bin/env ruby
 require 'benchmark'
 require 'linguist'
 iterations = (ARGV[0] || 1).to_i
 # BenchBlob wraps a FileBlob to keep data loaded and to clean attributes added by language detection.
 class BenchBlob < Linguist::FileBlob
  attr_accessor :data
  attr_accessor :fullpath
  def initialize(path, base_path = nil)
    super
    @data = File.read(@fullpath)
  end
  def clean
    @_mime_type = nil
    @detect_encoding = nil
    @lines = nil
  end
 end
 def get_samples(root)
  samples = Array.new
  Dir.foreach(root) do |file|
    path = File.join(root, file)
    if file == "." or file == ".."
      next
    elsif File.directory?(path)
      get_samples(path).each do |blob|
        samples << blob
      end
    else
      samples << BenchBlob.new(path)
    end
  end
  return samples
 end
 samples = get_samples('.linguist/samples')
 languages = Linguist::Language.all
 Benchmark.bmbm do |bm|
  time = bm.report('GetLanguage()_TOTAL ' + iterations.to_s) do
    iterations.times do
      samples.each do |blob|
        Linguist::detect(blob)
        blob.clean
      end
    end
  end
 end
 Benchmark.bmbm do |bm|
  bm.report('Classify()_TOTAL ' + iterations.to_s) do
    iterations.times do
      samples.each do |blob|
        Linguist::Classifier.classify(Linguist::Samples.cache, blob.data)
        blob.clean
      end
    end
  end
 end
 Benchmark.bmbm do |bm|
  bm.report('GetLanguagesByModeline()_TOTAL ' + iterations.to_s) do
    iterations.times do
      samples.each do |blob|
        Linguist::Strategy::Modeline.call(blob, languages)
        blob.clean
      end
    end
  end
 end
 Benchmark.bmbm do |bm|
  bm.report('GetLanguagesByFilename()_TOTAL ' + iterations.to_s) do
    iterations.times do
      samples.each do |blob|
        Linguist::Strategy::Filename.call(blob, languages)
        blob.clean
      end
    end
  end
 end
 Benchmark.bmbm do |bm|
  bm.report('GetLanguagesByShebang()_TOTAL ' + iterations.to_s) do
    iterations.times do
      samples.each do |blob|
        Linguist::Shebang.call(blob, languages)
        blob.clean
      end
    end
  end
 end
 Benchmark.bmbm do |bm|
  bm.report('GetLanguagesByExtension()_TOTAL ' + iterations.to_s) do
    iterations.times do
      samples.each do |blob|
        Linguist::Strategy::Extension.call(blob, languages)
        blob.clean
      end
    end
  end
 end
 Benchmark.bmbm do |bm|
  bm.report('GetLanguagesByContent()_TOTAL ' + iterations.to_s) do
    iterations.times do
      samples.each do |blob|
        Linguist::Heuristics.call(blob, languages)
        blob.clean
      end
    end
  end
 end
--- a/benchmark/parse.sh
+++ b/benchmark/parse.sh
@ -0,0 +1,5 @@
 #!/bin/sh
 cd benchmark/output && go run ../parser/main.go -outdir ../csv && \
 cd ../csv && go run ../parser/main.go -distribution
--- a/benchmark/parser/main.go
+++ b/benchmark/parser/main.go
@ -0,0 +1,386 @@
 package main
 import (
 	"bufio"
 	"bytes"
 	"encoding/csv"
 	"flag"
 	"fmt"
 	"io/ioutil"
 	"log"
 	"math"
 	"os"
 	"path/filepath"
 	"runtime"
 	"sort"
 	"strconv"
 	"strings"
 )
 const (
 	// functions benchmarked
 	getLanguageFunc = "GetLanguage()"
 	classifyFunc    = "Classify()"
 	modelineFunc    = "GetLanguagesByModeline()"
 	filenameFunc    = "GetLanguagesByFilename()"
 	shebangFunc     = "GetLanguagesByShebang()"
 	extensionFunc   = "GetLanguagesByExtension()"
 	contentFunc     = "GetLanguagesByContent()"
 	// benchmark's outputs
 	enryTotalBench       = "enry_total.bench"
 	enrySamplesBench     = "enry_samples.bench"
 	linguistTotalBench   = "linguist_total.bench"
 	linguistSamplesBench = "linguist_samples.bench"
 	// files to generate
 	enryTotalCSV       = "enry-total.csv"
 	enrySamplesCSV     = "enry-samples.csv"
 	linguistTotalCSV   = "linguist-total.csv"
 	linguistSamplesCSV = "linguist-samples.csv"
 	// files to generate with flag distribution
 	enryDistributionCSV     = "enry-distribution.csv"
 	linguistDistributionCSV = "linguist-distribution.csv"
 )
 var (
 	// flags
 	distribution bool
 	outDir       string
 	enryFunctions         = []string{getLanguageFunc, classifyFunc, modelineFunc, filenameFunc, shebangFunc, extensionFunc, contentFunc}
 	distributionIntervals = []string{"1us-10us", "10us-100us", "100us-1ms", "1ms-10ms", "10ms-100ms"}
 )
 func main() {
 	flag.BoolVar(&distribution, "distribution", false, "generate enry-distribuition.csv and linguist-distribution.csv")
 	flag.StringVar(&outDir, "outdir", "", "path to leave csv files")
 	flag.Parse()
 	if distribution {
 		generateDistributionCSV()
 		return
 	}
 	generateCSV()
 }
 func generateDistributionCSV() {
 	CSVFiles := []struct {
 		in   string
 		out  string
 		tool string
 	}{
 		{in: enrySamplesCSV, out: enryDistributionCSV, tool: "enry"},
 		{in: linguistSamplesCSV, out: linguistDistributionCSV, tool: "linguist"},
 	}
 	for _, CSVFile := range CSVFiles {
 		f, err := os.Open(CSVFile.in)
 		if err != nil {
 			log.Println(err)
 			continue
 		}
 		defer f.Close()
 		r := csv.NewReader(f)
 		CSVSamples, err := r.ReadAll()
 		if err != nil {
 			log.Println(err)
 			continue
 		}
 		CSVDistribution, err := buildDistribution(CSVSamples[1:], CSVFile.tool)
 		if err != nil {
 			log.Println(err)
 			continue
 		}
 		if err := writeCSV(CSVDistribution, filepath.Join(outDir, CSVFile.out)); err != nil {
 			log.Println(err)
 			continue
 		}
 	}
 }
 func buildDistribution(CSVSamples [][]string, tool string) ([][]string, error) {
 	count := make(map[string]int, len(distributionIntervals))
 	for _, row := range CSVSamples {
 		if row[1] != getLanguageFunc {
 			continue
 		}
 		num, err := strconv.ParseFloat(row[len(row)-1], 64)
 		if err != nil {
 			return nil, err
 		}
 		arrangeByTime(count, num)
 	}
 	CSVDistribution := make([][]string, 0, len(count)+1)
 	firstLine := []string{"timeInterval", tool, "numberOfFiles"}
 	CSVDistribution = append(CSVDistribution, firstLine)
 	for _, interval := range distributionIntervals {
 		number := strconv.FormatInt(int64(count[interval]), 10)
 		row := []string{interval, tool, number}
 		CSVDistribution = append(CSVDistribution, row)
 	}
 	printDistributionInfo(count, tool)
 	return CSVDistribution, nil
 }
 func printDistributionInfo(count map[string]int, tool string) {
 	total := 0
 	for _, v := range count {
 		total += v
 	}
 	fmt.Println(tool, "files", total)
 	fmt.Println("Distribution")
 	for _, interval := range distributionIntervals {
 		fmt.Println("\t", interval, count[interval])
 	}
 	fmt.Println("Percentage")
 	for _, interval := range distributionIntervals {
 		p := (float64(count[interval]) / float64(total)) * 100.00
 		fmt.Printf("\t %s %f%%\n", interval, p)
 	}
 	fmt.Printf("\n\n")
 }
 func arrangeByTime(count map[string]int, num float64) {
 	switch {
 	case num > 1000.00 && num <= 10000.00:
 		count[distributionIntervals[0]]++
 	case num > 10000.00 && num <= 100000.00:
 		count[distributionIntervals[1]]++
 	case num > 100000.00 && num <= 1000000.00:
 		count[distributionIntervals[2]]++
 	case num > 1000000.00 && num <= 10000000.00:
 		count[distributionIntervals[3]]++
 	case num > 10000000.00 && num <= 100000000.00:
 		count[distributionIntervals[4]]++
 	}
 }
 func writeCSV(CSVData [][]string, outPath string) error {
 	out, err := os.Create(outPath)
 	if err != nil {
 		return err
 	}
 	w := csv.NewWriter(out)
 	w.WriteAll(CSVData)
 	if err := w.Error(); err != nil {
 		return err
 	}
 	return nil
 }
 type parse func(data []byte, tool string) ([][]string, error)
 func generateCSV() {
 	bmFiles := []struct {
 		in    string
 		out   string
 		tool  string
 		parse parse
 	}{
 		{in: enryTotalBench, out: enryTotalCSV, tool: "enry", parse: parseTotal},
 		{in: linguistTotalBench, out: linguistTotalCSV, tool: "linguist", parse: parseTotal},
 		{in: enrySamplesBench, out: enrySamplesCSV, tool: "enry", parse: parseSamples},
 		{in: linguistSamplesBench, out: linguistSamplesCSV, tool: "linguist", parse: parseSamples},
 	}
 	for _, bmFile := range bmFiles {
 		buf, err := ioutil.ReadFile(bmFile.in)
 		if err != nil {
 			log.Println(err)
 			continue
 		}
 		info, err := bmFile.parse(buf, bmFile.tool)
 		if err != nil {
 			log.Println(err)
 			continue
 		}
 		if err := writeCSV(info, filepath.Join(outDir, bmFile.out)); err != nil {
 			log.Println(err)
 			continue
 		}
 	}
 }
 func parseTotal(data []byte, tool string) ([][]string, error) {
 	const totalLine = "_TOTAL"
 	parsedInfo := map[string][]string{}
 	buf := bufio.NewScanner(bytes.NewReader(data))
 	for buf.Scan() {
 		line := buf.Text()
 		if strings.Contains(line, totalLine) {
 			split := strings.Fields(line)
 			row, err := getRow(split, tool)
 			if err != nil {
 				return nil, err
 			}
 			parsedInfo[row[0]] = row
 		}
 	}
 	if err := buf.Err(); err != nil {
 		return nil, err
 	}
 	firstLine := []string{"function", "tool", "iterations", "ns/op"}
 	return prepareInfoForCSV(parsedInfo, firstLine), nil
 }
 func getRow(line []string, tool string) ([]string, error) {
 	row := make([]string, 0, 3)
 	for _, function := range enryFunctions {
 		if strings.Contains(line[0], function) {
 			row = append(row, function)
 			break
 		}
 	}
 	row = append(row, tool)
 	iterations := line[1]
 	row = append(row, iterations)
 	average, err := getAverage(line)
 	if err != nil {
 		return nil, err
 	}
 	row = append(row, average)
 	return row, nil
 }
 func getAverage(line []string) (string, error) {
 	average := line[len(line)-1]
 	if !strings.HasSuffix(average, ")") {
 		return line[2], nil
 	}
 	totalTime := strings.Trim(average, "() ")
 	time, err := strconv.ParseFloat(totalTime, 64)
 	if err != nil {
 		return "", err
 	}
 	iterations := line[1]
 	i, err := strconv.ParseFloat(iterations, 64)
 	if err != nil {
 		return "", err
 	}
 	avg := (time * math.Pow10(9)) / i
 	return fmt.Sprintf("%d", int(avg)), nil
 }
 func prepareInfoForCSV(parsedInfo map[string][]string, firstLine []string) [][]string {
 	info := createInfoWithFirstLine(firstLine, len(parsedInfo))
 	for _, function := range enryFunctions {
 		info = append(info, parsedInfo[function])
 	}
 	return info
 }
 func createInfoWithFirstLine(firstLine []string, sliceLength int) (info [][]string) {
 	if len(firstLine) > 0 {
 		info = make([][]string, 0, sliceLength+1)
 		info = append(info, firstLine)
 	} else {
 		info = make([][]string, 0, sliceLength)
 	}
 	return
 }
 type enryFuncs map[string][]string
 func newEnryFuncs() enryFuncs {
 	return enryFuncs{
 		getLanguageFunc: nil,
 		classifyFunc:    nil,
 		modelineFunc:    nil,
 		filenameFunc:    nil,
 		shebangFunc:     nil,
 		extensionFunc:   nil,
 		contentFunc:     nil,
 	}
 }
 func parseSamples(data []byte, tool string) ([][]string, error) {
 	const sampleLine = "SAMPLE_"
 	parsedInfo := map[string]enryFuncs{}
 	buf := bufio.NewScanner(bytes.NewReader(data))
 	for buf.Scan() {
 		line := buf.Text()
 		if strings.Contains(line, sampleLine) {
 			split := strings.Fields(line)
 			name := getSampleName(split[0])
 			if _, ok := parsedInfo[name]; !ok {
 				parsedInfo[name] = newEnryFuncs()
 			}
 			row := make([]string, 0, 4)
 			row = append(row, name)
 			r, err := getRow(split, tool)
 			if err != nil {
 				return nil, err
 			}
 			row = append(row, r...)
 			function := row[1]
 			parsedInfo[name][function] = row
 		}
 	}
 	if err := buf.Err(); err != nil {
 		return nil, err
 	}
 	firstLine := []string{"file", "function", "tool", "iterations", "ns/op"}
 	return prepareSamplesInfoForCSV(parsedInfo, firstLine), nil
 }
 func getSampleName(s string) string {
 	start := strings.Index(s, "SAMPLE_") + len("SAMPLE_")
 	suffix := fmt.Sprintf("-%d", runtime.GOMAXPROCS(-1))
 	name := strings.TrimSuffix(s[start:], suffix)
 	return name
 }
 func prepareSamplesInfoForCSV(parsedInfo map[string]enryFuncs, firstLine []string) [][]string {
 	info := createInfoWithFirstLine(firstLine, len(parsedInfo)*len(enryFunctions))
 	orderedKeys := sortKeys(parsedInfo)
 	for _, path := range orderedKeys {
 		sampleInfo := prepareInfoForCSV(parsedInfo[path], nil)
 		info = append(info, sampleInfo...)
 	}
 	return info
 }
 func sortKeys(parsedInfo map[string]enryFuncs) []string {
 	keys := make([]string, 0, len(parsedInfo))
 	for key := range parsedInfo {
 		keys = append(keys, key)
 	}
 	sort.Strings(keys)
 	return keys
 }
--- a/benchmark/plot-histogram.gp
+++ b/benchmark/plot-histogram.gp
@ -0,0 +1,19 @@
 set terminal jpeg large font arial size 1920,1080
 set output 'benchmark/histogram/distribution.jpg'
 set datafile separator comma
 set key under
 set style data histogram
 set style histogram clustered gap 1 title offset 1,1
 set style fill solid noborder
 set boxwidth 0.95
 set grid y
 set bmargin 12
 set autoscale
 set title "Number of files whose processed time was inside time interval"
 plot newhistogram, 'benchmark/csv/enry-distribution.csv' using 3:xtic(1) title "enry", 'benchmark/csv/linguist-distribution.csv' using 3 title "linguist"
 unset output
--- a/benchmark/plot-histogram.sh
+++ b/benchmark/plot-histogram.sh
@ -0,0 +1,4 @@
 #!/bin/sh
 gnuplot benchmark/plot-histogram.gp
--- a/benchmark/run-benchmark.sh
+++ b/benchmark/run-benchmark.sh
@ -0,0 +1,4 @@
 #!/bin/sh
 go test -run NONE -bench=. -benchtime=120s -timeout=100h >benchmark/output/enry_total.bench && \
 benchmark/linguist-total.rb 5 >benchmark/output/linguist_total.bench
--- a/benchmark/run.sh
+++ b/benchmark/run.sh
@ -0,0 +1,4 @@
 #!/bin/sh
 benchmark/run-benchmark.sh && make benchmarks-slow && \
 benchmark/parse.sh && benchmark/plot-histogram.sh
--- a/benchmark_test.go
+++ b/benchmark_test.go
@ -0,0 +1,194 @@
 package enry
 import (
 	"flag"
 	"io/ioutil"
 	"log"
 	"os"
 	"path/filepath"
 	"testing"
 )
 const samplesDir = ".linguist/samples"
 type sample struct {
 	filename string
 	content  []byte
 }
 var (
 	slow              bool
 	overcomeLanguage  string
 	overcomeLanguages []string
 	samples           []*sample
 )
 func TestMain(m *testing.M) {
 	flag.BoolVar(&slow, "slow", false, "run benchmarks per sample for strategies too")
 	flag.Parse()
 	var err error
 	samples, err = getSamples(samplesDir)
 	if err != nil {
 		log.Fatal(err)
 	}
 	os.Exit(m.Run())
 }
 func getSamples(dir string) ([]*sample, error) {
 	samples := make([]*sample, 0, 2000)
 	err := filepath.Walk(dir, func(path string, info os.FileInfo, err error) error {
 		if err != nil {
 			return err
 		}
 		if info.IsDir() {
 			return nil
 		}
 		content, err := ioutil.ReadFile(path)
 		if err != nil {
 			return err
 		}
 		s := &sample{
 			filename: path,
 			content:  content,
 		}
 		samples = append(samples, s)
 		return nil
 	})
 	return samples, err
 }
 func BenchmarkGetLanguageTotal(b *testing.B) {
 	if slow {
 		b.SkipNow()
 	}
 	var o string
 	b.Run("GetLanguage()_TOTAL", func(b *testing.B) {
 		for n := 0; n < b.N; n++ {
 			for _, sample := range samples {
 				o = GetLanguage(sample.filename, sample.content)
 			}
 		}
 		overcomeLanguage = o
 	})
 }
 func BenchmarkClassifyTotal(b *testing.B) {
 	if slow {
 		b.SkipNow()
 	}
 	var o []string
 	b.Run("Classify()_TOTAL", func(b *testing.B) {
 		for n := 0; n < b.N; n++ {
 			for _, sample := range samples {
 				o = DefaultClassifier.Classify(sample.content, nil)
 			}
 			overcomeLanguages = o
 		}
 	})
 }
 func BenchmarkStrategiesTotal(b *testing.B) {
 	if slow {
 		b.SkipNow()
 	}
 	benchmarks := []struct {
 		name       string
 		strategy   Strategy
 		candidates []string
 	}{
 		{name: "GetLanguagesByModeline()_TOTAL", strategy: GetLanguagesByModeline},
 		{name: "GetLanguagesByFilename()_TOTAL", strategy: GetLanguagesByFilename},
 		{name: "GetLanguagesByShebang()_TOTAL", strategy: GetLanguagesByShebang},
 		{name: "GetLanguagesByExtension()_TOTAL", strategy: GetLanguagesByExtension},
 		{name: "GetLanguagesByContent()_TOTAL", strategy: GetLanguagesByContent},
 	}
 	var o []string
 	for _, benchmark := range benchmarks {
 		b.Run(benchmark.name, func(b *testing.B) {
 			for n := 0; n < b.N; n++ {
 				for _, sample := range samples {
 					o = benchmark.strategy(sample.filename, sample.content, benchmark.candidates)
 				}
 				overcomeLanguages = o
 			}
 		})
 	}
 }
 func BenchmarkGetLanguagePerSample(b *testing.B) {
 	if !slow {
 		b.SkipNow()
 	}
 	var o string
 	for _, sample := range samples {
 		b.Run("GetLanguage()_SAMPLE_"+sample.filename, func(b *testing.B) {
 			for n := 0; n < b.N; n++ {
 				o = GetLanguage(sample.filename, sample.content)
 			}
 			overcomeLanguage = o
 		})
 	}
 }
 func BenchmarkClassifyPerSample(b *testing.B) {
 	if !slow {
 		b.SkipNow()
 	}
 	var o []string
 	for _, sample := range samples {
 		b.Run("Classify()_SAMPLE_"+sample.filename, func(b *testing.B) {
 			for n := 0; n < b.N; n++ {
 				o = DefaultClassifier.Classify(sample.content, nil)
 			}
 			overcomeLanguages = o
 		})
 	}
 }
 func BenchmarkStrategiesPerSample(b *testing.B) {
 	if !slow {
 		b.SkipNow()
 	}
 	benchmarks := []struct {
 		name       string
 		strategy   Strategy
 		candidates []string
 	}{
 		{name: "GetLanguagesByModeline()_SAMPLE_", strategy: GetLanguagesByModeline},
 		{name: "GetLanguagesByFilename()_SAMPLE_", strategy: GetLanguagesByFilename},
 		{name: "GetLanguagesByShebang()_SAMPLE_", strategy: GetLanguagesByShebang},
 		{name: "GetLanguagesByExtension()_SAMPLE_", strategy: GetLanguagesByExtension},
 		{name: "GetLanguagesByContent()_SAMPLE_", strategy: GetLanguagesByContent},
 	}
 	var o []string
 	for _, benchmark := range benchmarks {
 		for _, sample := range samples {
 			b.Run(benchmark.name+sample.filename, func(b *testing.B) {
 				for n := 0; n < b.N; n++ {
 					o = benchmark.strategy(sample.filename, sample.content, benchmark.candidates)
 				}
 				overcomeLanguages = o
 			})
 		}
 	}
 }
--- a/utils_test.go
+++ b/utils_test.go
@ -3,7 +3,6 @@ package enry
 import (
 	"bytes"
 	"fmt"
 	"testing"
 	"github.com/stretchr/testify/assert"
 )
@ -80,20 +79,3 @@ func (s *EnryTestSuite) TestIsBinary() {
 		assert.Equal(s.T(), is, test.expected, fmt.Sprintf("%v: is = %v, expected: %v", test.name, is, test.expected))
 	}
 }
 const (
 	htmlPath = "some/random/dir/file.html"
 	jsPath   = "some/random/dir/file.js"
 )
 func BenchmarkVendor(b *testing.B) {
 	for i := 0; i < b.N; i++ {
 		_ = IsVendor(htmlPath)
 	}
 }
 func BenchmarkVendorJS(b *testing.B) {
 	for i := 0; i < b.N; i++ {
 		_ = IsVendor(jsPath)
 	}
 }
		`@ -0,0 +1,4 @@`
							`#!/bin/sh`

							`gnuplot benchmark/plot-histogram.gp`