added benchmarks and scripts to run, parse and plot them

moved benchmark/run-slow-benchmarks.sh's content to Makefile
2025-09-16 18:37:33 +00:00 · 2017-06-28 13:01:36 +02:00
parent 2045abfa41
commit 8d91dc7be8
11 changed files with 872 additions and 18 deletions
--- a/10
+++ b/10
@@ -32,6 +32,16 @@ code-generate: $(LINGUIST_PATH)
 	mkdir -p data
 	go run internal/code-generator/main.go

+benchmarks: $(LINGUIST_PATH)
+	go test -run=NONE -bench=. && benchmark/linguist-total.sh
+
+benchmarks-samples: $(LINGUIST_PATH)
+	go test -run=NONE -bench=. -benchtime=5us && benchmark/linguist-samples.rb
+
+benchmarks-slow: $(LINGUST_PATH)
+	go test -run=NONE -bench=. -slow -benchtime=100ms -timeout=100h >benchmark/output/enry_samples.bench && \
+	benchmark/linguist-samples.rb 5 >benchmark/output/linguist_samples.bench
+
 clean:
 	rm -rf $(LINGUIST_PATH)

--- a/benchmark/linguist-samples.rb
+++ b/benchmark/linguist-samples.rb
@@ -0,0 +1,126 @@
+#!/usr/bin/env ruby
+
+require 'benchmark'
+require 'linguist'
+
+iterations = (ARGV[0] || 1).to_i
+
+# BenchBlob wraps a FileBlob to keep data loaded and to clean attributes added by language detection.
+class BenchBlob < Linguist::FileBlob
+  attr_accessor :data
+
+  def initialize(path, base_path = nil)
+    super
+    @data = File.read(@fullpath)
+  end
+
+  def clean
+    @_mime_type = nil
+    @detect_encoding = nil
+    @lines = nil
+  end
+end
+
+def get_samples(root)
+  samples = Array.new
+  Dir.foreach(root) do |file|
+    path = File.join(root, file)
+    if file == "." or file == ".."
+      next
+    elsif File.directory?(path)
+      get_samples(path).each do |blob|
+        samples << blob
+      end
+    else
+      samples << BenchBlob.new(path)
+    end
+  end
+  return samples
+end
+
+samples = get_samples('.linguist/samples')
+languages = Linguist::Language.all
+
+samples.each do |blob|
+  sample_name = blob.path.gsub(/\s/, '_')
+  Benchmark.bmbm do |bm|
+    bm.report('GetLanguage()_SAMPLE_' + sample_name + ' ' + iterations.to_s) do
+      iterations.times do
+        Linguist::detect(blob)
+        blob.clean
+      end
+    end
+  end
+end
+
+samples.each do |blob|
+  sample_name = blob.path.gsub(/\s/, '_')
+  Benchmark.bmbm do |bm|
+    bm.report('Classify()_SAMPLE_' + sample_name + ' ' + iterations.to_s) do
+      iterations.times do
+        Linguist::Classifier.classify(Linguist::Samples.cache, blob.data)
+        blob.clean
+      end
+    end
+  end
+end
+
+samples.each do |blob|
+  sample_name = blob.path.gsub(/\s/, '_')
+  Benchmark.bmbm do |bm|
+    bm.report('GetLanguagesByModeline()_SAMPLE_' + sample_name + ' ' + iterations.to_s) do
+      iterations.times do
+        Linguist::Strategy::Modeline.call(blob, languages)
+        blob.clean
+      end
+    end
+  end
+end
+
+samples.each do |blob|
+  sample_name = blob.path.gsub(/\s/, '_')
+  Benchmark.bmbm do |bm|
+    bm.report('GetLanguagesByFilename()_SAMPLE_' + sample_name + ' ' + iterations.to_s) do
+    iterations.times do
+        Linguist::Strategy::Filename.call(blob, languages)
+        blob.clean
+      end
+    end
+  end
+end
+
+samples.each do |blob|
+  sample_name = blob.path.gsub(/\s/, '_')
+  Benchmark.bmbm do |bm|
+    bm.report('GetLanguagesByShebang()_SAMPLE_' + sample_name + ' ' + iterations.to_s) do
+      iterations.times do
+        Linguist::Shebang.call(blob, languages)
+        blob.clean
+      end
+    end
+  end
+end
+
+samples.each do |blob|
+  sample_name = blob.path.gsub(/\s/, '_')
+  Benchmark.bmbm do |bm|
+    bm.report('GetLanguagesByExtension()_SAMPLE_' + sample_name + ' ' + iterations.to_s) do
+      iterations.times do
+        Linguist::Strategy::Extension.call(blob, languages)
+        blob.clean
+      end
+    end
+  end
+end
+
+samples.each do |blob|
+  sample_name = blob.path.gsub(/\s/, '_')
+  Benchmark.bmbm do |bm|
+    bm.report('GetLanguagesByContent()_SAMPLE_' + sample_name + ' ' + iterations.to_s) do
+    iterations.times do
+        Linguist::Heuristics.call(blob, languages)
+        blob.clean
+      end
+    end
+  end
+end
--- a/benchmark/linguist-total.rb
+++ b/benchmark/linguist-total.rb
@@ -0,0 +1,120 @@
+#!/usr/bin/env ruby
+
+require 'benchmark'
+require 'linguist'
+
+iterations = (ARGV[0] || 1).to_i
+
+# BenchBlob wraps a FileBlob to keep data loaded and to clean attributes added by language detection.
+class BenchBlob < Linguist::FileBlob
+  attr_accessor :data
+  attr_accessor :fullpath
+
+  def initialize(path, base_path = nil)
+    super
+    @data = File.read(@fullpath)
+  end
+
+  def clean
+    @_mime_type = nil
+    @detect_encoding = nil
+    @lines = nil
+  end
+end
+
+def get_samples(root)
+  samples = Array.new
+  Dir.foreach(root) do |file|
+    path = File.join(root, file)
+    if file == "." or file == ".."
+      next
+    elsif File.directory?(path)
+      get_samples(path).each do |blob|
+        samples << blob
+      end
+    else
+      samples << BenchBlob.new(path)
+    end
+  end
+  return samples
+end
+
+samples = get_samples('.linguist/samples')
+languages = Linguist::Language.all
+
+Benchmark.bmbm do |bm|
+  time = bm.report('GetLanguage()_TOTAL ' + iterations.to_s) do
+    iterations.times do
+      samples.each do |blob|
+        Linguist::detect(blob)
+        blob.clean
+      end
+    end
+  end
+end
+
+Benchmark.bmbm do |bm|
+  bm.report('Classify()_TOTAL ' + iterations.to_s) do
+    iterations.times do
+      samples.each do |blob|
+        Linguist::Classifier.classify(Linguist::Samples.cache, blob.data)
+        blob.clean
+      end
+    end
+  end
+end
+
+Benchmark.bmbm do |bm|
+  bm.report('GetLanguagesByModeline()_TOTAL ' + iterations.to_s) do
+    iterations.times do
+      samples.each do |blob|
+        Linguist::Strategy::Modeline.call(blob, languages)
+        blob.clean
+      end
+    end
+  end
+end
+
+Benchmark.bmbm do |bm|
+  bm.report('GetLanguagesByFilename()_TOTAL ' + iterations.to_s) do
+    iterations.times do
+      samples.each do |blob|
+        Linguist::Strategy::Filename.call(blob, languages)
+        blob.clean
+      end
+    end
+  end
+end
+
+Benchmark.bmbm do |bm|
+  bm.report('GetLanguagesByShebang()_TOTAL ' + iterations.to_s) do
+    iterations.times do
+      samples.each do |blob|
+        Linguist::Shebang.call(blob, languages)
+        blob.clean
+      end
+    end
+  end
+end
+
+Benchmark.bmbm do |bm|
+  bm.report('GetLanguagesByExtension()_TOTAL ' + iterations.to_s) do
+    iterations.times do
+      samples.each do |blob|
+        Linguist::Strategy::Extension.call(blob, languages)
+        blob.clean
+      end
+    end
+  end
+end
+
+Benchmark.bmbm do |bm|
+  bm.report('GetLanguagesByContent()_TOTAL ' + iterations.to_s) do
+    iterations.times do
+      samples.each do |blob|
+        Linguist::Heuristics.call(blob, languages)
+        blob.clean
+      end
+    end
+  end
+end
--- a/benchmark/parse.sh
+++ b/benchmark/parse.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+cd benchmark/output && go run ../parser/main.go -outdir ../csv && \
+cd ../csv && go run ../parser/main.go -distribution
+
--- a/benchmark/parser/main.go
+++ b/benchmark/parser/main.go
@@ -0,0 +1,386 @@
+package main
+
+import (
+	"bufio"
+	"bytes"
+	"encoding/csv"
+	"flag"
+	"fmt"
+	"io/ioutil"
+	"log"
+	"math"
+	"os"
+	"path/filepath"
+	"runtime"
+	"sort"
+	"strconv"
+	"strings"
+)
+
+const (
+	// functions benchmarked
+	getLanguageFunc = "GetLanguage()"
+	classifyFunc    = "Classify()"
+	modelineFunc    = "GetLanguagesByModeline()"
+	filenameFunc    = "GetLanguagesByFilename()"
+	shebangFunc     = "GetLanguagesByShebang()"
+	extensionFunc   = "GetLanguagesByExtension()"
+	contentFunc     = "GetLanguagesByContent()"
+
+	// benchmark's outputs
+	enryTotalBench       = "enry_total.bench"
+	enrySamplesBench     = "enry_samples.bench"
+	linguistTotalBench   = "linguist_total.bench"
+	linguistSamplesBench = "linguist_samples.bench"
+
+	// files to generate
+	enryTotalCSV       = "enry-total.csv"
+	enrySamplesCSV     = "enry-samples.csv"
+	linguistTotalCSV   = "linguist-total.csv"
+	linguistSamplesCSV = "linguist-samples.csv"
+
+	// files to generate with flag distribution
+	enryDistributionCSV     = "enry-distribution.csv"
+	linguistDistributionCSV = "linguist-distribution.csv"
+)
+
+var (
+	// flags
+	distribution bool
+	outDir       string
+
+	enryFunctions         = []string{getLanguageFunc, classifyFunc, modelineFunc, filenameFunc, shebangFunc, extensionFunc, contentFunc}
+	distributionIntervals = []string{"1us-10us", "10us-100us", "100us-1ms", "1ms-10ms", "10ms-100ms"}
+)
+
+func main() {
+	flag.BoolVar(&distribution, "distribution", false, "generate enry-distribuition.csv and linguist-distribution.csv")
+	flag.StringVar(&outDir, "outdir", "", "path to leave csv files")
+	flag.Parse()
+
+	if distribution {
+		generateDistributionCSV()
+		return
+	}
+
+	generateCSV()
+}
+
+func generateDistributionCSV() {
+	CSVFiles := []struct {
+		in   string
+		out  string
+		tool string
+	}{
+		{in: enrySamplesCSV, out: enryDistributionCSV, tool: "enry"},
+		{in: linguistSamplesCSV, out: linguistDistributionCSV, tool: "linguist"},
+	}
+
+	for _, CSVFile := range CSVFiles {
+		f, err := os.Open(CSVFile.in)
+		if err != nil {
+			log.Println(err)
+			continue
+		}
+		defer f.Close()
+
+		r := csv.NewReader(f)
+		CSVSamples, err := r.ReadAll()
+		if err != nil {
+			log.Println(err)
+			continue
+		}
+
+		CSVDistribution, err := buildDistribution(CSVSamples[1:], CSVFile.tool)
+		if err != nil {
+			log.Println(err)
+			continue
+		}
+
+		if err := writeCSV(CSVDistribution, filepath.Join(outDir, CSVFile.out)); err != nil {
+			log.Println(err)
+			continue
+		}
+	}
+}
+
+func buildDistribution(CSVSamples [][]string, tool string) ([][]string, error) {
+	count := make(map[string]int, len(distributionIntervals))
+	for _, row := range CSVSamples {
+		if row[1] != getLanguageFunc {
+			continue
+		}
+
+		num, err := strconv.ParseFloat(row[len(row)-1], 64)
+		if err != nil {
+			return nil, err
+		}
+
+		arrangeByTime(count, num)
+	}
+
+	CSVDistribution := make([][]string, 0, len(count)+1)
+	firstLine := []string{"timeInterval", tool, "numberOfFiles"}
+	CSVDistribution = append(CSVDistribution, firstLine)
+	for _, interval := range distributionIntervals {
+		number := strconv.FormatInt(int64(count[interval]), 10)
+		row := []string{interval, tool, number}
+		CSVDistribution = append(CSVDistribution, row)
+	}
+
+	printDistributionInfo(count, tool)
+	return CSVDistribution, nil
+}
+
+func printDistributionInfo(count map[string]int, tool string) {
+	total := 0
+	for _, v := range count {
+		total += v
+	}
+
+	fmt.Println(tool, "files", total)
+	fmt.Println("Distribution")
+	for _, interval := range distributionIntervals {
+		fmt.Println("\t", interval, count[interval])
+	}
+
+	fmt.Println("Percentage")
+	for _, interval := range distributionIntervals {
+		p := (float64(count[interval]) / float64(total)) * 100.00
+		fmt.Printf("\t %s %f%%\n", interval, p)
+	}
+
+	fmt.Printf("\n\n")
+}
+
+func arrangeByTime(count map[string]int, num float64) {
+	switch {
+	case num > 1000.00 && num <= 10000.00:
+		count[distributionIntervals[0]]++
+	case num > 10000.00 && num <= 100000.00:
+		count[distributionIntervals[1]]++
+	case num > 100000.00 && num <= 1000000.00:
+		count[distributionIntervals[2]]++
+	case num > 1000000.00 && num <= 10000000.00:
+		count[distributionIntervals[3]]++
+	case num > 10000000.00 && num <= 100000000.00:
+		count[distributionIntervals[4]]++
+	}
+}
+
+func writeCSV(CSVData [][]string, outPath string) error {
+	out, err := os.Create(outPath)
+	if err != nil {
+		return err
+	}
+
+	w := csv.NewWriter(out)
+	w.WriteAll(CSVData)
+
+	if err := w.Error(); err != nil {
+		return err
+	}
+
+	return nil
+}
+
+type parse func(data []byte, tool string) ([][]string, error)
+
+func generateCSV() {
+	bmFiles := []struct {
+		in    string
+		out   string
+		tool  string
+		parse parse
+	}{
+		{in: enryTotalBench, out: enryTotalCSV, tool: "enry", parse: parseTotal},
+		{in: linguistTotalBench, out: linguistTotalCSV, tool: "linguist", parse: parseTotal},
+		{in: enrySamplesBench, out: enrySamplesCSV, tool: "enry", parse: parseSamples},
+		{in: linguistSamplesBench, out: linguistSamplesCSV, tool: "linguist", parse: parseSamples},
+	}
+
+	for _, bmFile := range bmFiles {
+		buf, err := ioutil.ReadFile(bmFile.in)
+		if err != nil {
+			log.Println(err)
+			continue
+		}
+
+		info, err := bmFile.parse(buf, bmFile.tool)
+		if err != nil {
+			log.Println(err)
+			continue
+		}
+
+		if err := writeCSV(info, filepath.Join(outDir, bmFile.out)); err != nil {
+			log.Println(err)
+			continue
+		}
+	}
+}
+
+func parseTotal(data []byte, tool string) ([][]string, error) {
+	const totalLine = "_TOTAL"
+	parsedInfo := map[string][]string{}
+	buf := bufio.NewScanner(bytes.NewReader(data))
+	for buf.Scan() {
+		line := buf.Text()
+		if strings.Contains(line, totalLine) {
+			split := strings.Fields(line)
+			row, err := getRow(split, tool)
+			if err != nil {
+				return nil, err
+			}
+
+			parsedInfo[row[0]] = row
+		}
+	}
+
+	if err := buf.Err(); err != nil {
+		return nil, err
+	}
+
+	firstLine := []string{"function", "tool", "iterations", "ns/op"}
+	return prepareInfoForCSV(parsedInfo, firstLine), nil
+}
+
+func getRow(line []string, tool string) ([]string, error) {
+	row := make([]string, 0, 3)
+	for _, function := range enryFunctions {
+		if strings.Contains(line[0], function) {
+			row = append(row, function)
+			break
+		}
+	}
+
+	row = append(row, tool)
+	iterations := line[1]
+	row = append(row, iterations)
+
+	average, err := getAverage(line)
+	if err != nil {
+		return nil, err
+
+	}
+
+	row = append(row, average)
+	return row, nil
+}
+
+func getAverage(line []string) (string, error) {
+	average := line[len(line)-1]
+	if !strings.HasSuffix(average, ")") {
+		return line[2], nil
+	}
+
+	totalTime := strings.Trim(average, "() ")
+	time, err := strconv.ParseFloat(totalTime, 64)
+	if err != nil {
+		return "", err
+	}
+
+	iterations := line[1]
+	i, err := strconv.ParseFloat(iterations, 64)
+	if err != nil {
+		return "", err
+	}
+
+	avg := (time * math.Pow10(9)) / i
+	return fmt.Sprintf("%d", int(avg)), nil
+}
+
+func prepareInfoForCSV(parsedInfo map[string][]string, firstLine []string) [][]string {
+	info := createInfoWithFirstLine(firstLine, len(parsedInfo))
+	for _, function := range enryFunctions {
+		info = append(info, parsedInfo[function])
+	}
+
+	return info
+}
+
+func createInfoWithFirstLine(firstLine []string, sliceLength int) (info [][]string) {
+	if len(firstLine) > 0 {
+		info = make([][]string, 0, sliceLength+1)
+		info = append(info, firstLine)
+	} else {
+		info = make([][]string, 0, sliceLength)
+	}
+
+	return
+}
+
+type enryFuncs map[string][]string
+
+func newEnryFuncs() enryFuncs {
+	return enryFuncs{
+		getLanguageFunc: nil,
+		classifyFunc:    nil,
+		modelineFunc:    nil,
+		filenameFunc:    nil,
+		shebangFunc:     nil,
+		extensionFunc:   nil,
+		contentFunc:     nil,
+	}
+}
+
+func parseSamples(data []byte, tool string) ([][]string, error) {
+	const sampleLine = "SAMPLE_"
+	parsedInfo := map[string]enryFuncs{}
+	buf := bufio.NewScanner(bytes.NewReader(data))
+	for buf.Scan() {
+		line := buf.Text()
+		if strings.Contains(line, sampleLine) {
+			split := strings.Fields(line)
+			name := getSampleName(split[0])
+			if _, ok := parsedInfo[name]; !ok {
+				parsedInfo[name] = newEnryFuncs()
+			}
+
+			row := make([]string, 0, 4)
+			row = append(row, name)
+			r, err := getRow(split, tool)
+			if err != nil {
+				return nil, err
+			}
+
+			row = append(row, r...)
+			function := row[1]
+			parsedInfo[name][function] = row
+		}
+	}
+
+	if err := buf.Err(); err != nil {
+		return nil, err
+	}
+
+	firstLine := []string{"file", "function", "tool", "iterations", "ns/op"}
+	return prepareSamplesInfoForCSV(parsedInfo, firstLine), nil
+}
+
+func getSampleName(s string) string {
+	start := strings.Index(s, "SAMPLE_") + len("SAMPLE_")
+	suffix := fmt.Sprintf("-%d", runtime.GOMAXPROCS(-1))
+	name := strings.TrimSuffix(s[start:], suffix)
+	return name
+}
+
+func prepareSamplesInfoForCSV(parsedInfo map[string]enryFuncs, firstLine []string) [][]string {
+	info := createInfoWithFirstLine(firstLine, len(parsedInfo)*len(enryFunctions))
+	orderedKeys := sortKeys(parsedInfo)
+	for _, path := range orderedKeys {
+		sampleInfo := prepareInfoForCSV(parsedInfo[path], nil)
+		info = append(info, sampleInfo...)
+	}
+
+	return info
+}
+
+func sortKeys(parsedInfo map[string]enryFuncs) []string {
+	keys := make([]string, 0, len(parsedInfo))
+	for key := range parsedInfo {
+		keys = append(keys, key)
+	}
+
+	sort.Strings(keys)
+	return keys
+}
--- a/benchmark/plot-histogram.gp
+++ b/benchmark/plot-histogram.gp
@@ -0,0 +1,19 @@
+set terminal jpeg large font arial size 1920,1080
+set output 'benchmark/histogram/distribution.jpg'
+
+set datafile separator comma
+set key under
+
+set style data histogram
+set style histogram clustered gap 1 title offset 1,1
+set style fill solid noborder
+set boxwidth 0.95
+set grid y
+set bmargin 12
+set autoscale
+set title "Number of files whose processed time was inside time interval"
+
+plot newhistogram, 'benchmark/csv/enry-distribution.csv' using 3:xtic(1) title "enry", 'benchmark/csv/linguist-distribution.csv' using 3 title "linguist"
+
+unset output
+
--- a/benchmark/plot-histogram.sh
+++ b/benchmark/plot-histogram.sh
@@ -0,0 +1,4 @@
+#!/bin/sh
+
+gnuplot benchmark/plot-histogram.gp
+
--- a/benchmark/run-benchmark.sh
+++ b/benchmark/run-benchmark.sh
@@ -0,0 +1,4 @@
+#!/bin/sh
+
+go test -run NONE -bench=. -benchtime=120s -timeout=100h >benchmark/output/enry_total.bench && \
+benchmark/linguist-total.rb 5 >benchmark/output/linguist_total.bench
--- a/benchmark/run.sh
+++ b/benchmark/run.sh
@@ -0,0 +1,4 @@
+#!/bin/sh
+
+benchmark/run-benchmark.sh && make benchmarks-slow && \
+benchmark/parse.sh && benchmark/plot-histogram.sh
--- a/benchmark_test.go
+++ b/benchmark_test.go
@@ -0,0 +1,194 @@
+package enry
+
+import (
+	"flag"
+	"io/ioutil"
+	"log"
+	"os"
+	"path/filepath"
+	"testing"
+)
+
+const samplesDir = ".linguist/samples"
+
+type sample struct {
+	filename string
+	content  []byte
+}
+
+var (
+	slow              bool
+	overcomeLanguage  string
+	overcomeLanguages []string
+	samples           []*sample
+)
+
+func TestMain(m *testing.M) {
+	flag.BoolVar(&slow, "slow", false, "run benchmarks per sample for strategies too")
+	flag.Parse()
+	var err error
+	samples, err = getSamples(samplesDir)
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	os.Exit(m.Run())
+}
+
+func getSamples(dir string) ([]*sample, error) {
+	samples := make([]*sample, 0, 2000)
+	err := filepath.Walk(dir, func(path string, info os.FileInfo, err error) error {
+		if err != nil {
+			return err
+		}
+
+		if info.IsDir() {
+			return nil
+		}
+
+		content, err := ioutil.ReadFile(path)
+		if err != nil {
+			return err
+		}
+
+		s := &sample{
+			filename: path,
+			content:  content,
+		}
+
+		samples = append(samples, s)
+		return nil
+	})
+
+	return samples, err
+}
+
+func BenchmarkGetLanguageTotal(b *testing.B) {
+	if slow {
+		b.SkipNow()
+	}
+
+	var o string
+	b.Run("GetLanguage()_TOTAL", func(b *testing.B) {
+		for n := 0; n < b.N; n++ {
+			for _, sample := range samples {
+				o = GetLanguage(sample.filename, sample.content)
+			}
+		}
+
+		overcomeLanguage = o
+	})
+}
+
+func BenchmarkClassifyTotal(b *testing.B) {
+	if slow {
+		b.SkipNow()
+	}
+
+	var o []string
+	b.Run("Classify()_TOTAL", func(b *testing.B) {
+		for n := 0; n < b.N; n++ {
+			for _, sample := range samples {
+				o = DefaultClassifier.Classify(sample.content, nil)
+			}
+
+			overcomeLanguages = o
+		}
+	})
+}
+
+func BenchmarkStrategiesTotal(b *testing.B) {
+	if slow {
+		b.SkipNow()
+	}
+
+	benchmarks := []struct {
+		name       string
+		strategy   Strategy
+		candidates []string
+	}{
+		{name: "GetLanguagesByModeline()_TOTAL", strategy: GetLanguagesByModeline},
+		{name: "GetLanguagesByFilename()_TOTAL", strategy: GetLanguagesByFilename},
+		{name: "GetLanguagesByShebang()_TOTAL", strategy: GetLanguagesByShebang},
+		{name: "GetLanguagesByExtension()_TOTAL", strategy: GetLanguagesByExtension},
+		{name: "GetLanguagesByContent()_TOTAL", strategy: GetLanguagesByContent},
+	}
+
+	var o []string
+	for _, benchmark := range benchmarks {
+		b.Run(benchmark.name, func(b *testing.B) {
+			for n := 0; n < b.N; n++ {
+				for _, sample := range samples {
+					o = benchmark.strategy(sample.filename, sample.content, benchmark.candidates)
+				}
+
+				overcomeLanguages = o
+			}
+		})
+	}
+}
+
+func BenchmarkGetLanguagePerSample(b *testing.B) {
+	if !slow {
+		b.SkipNow()
+	}
+
+	var o string
+	for _, sample := range samples {
+		b.Run("GetLanguage()_SAMPLE_"+sample.filename, func(b *testing.B) {
+			for n := 0; n < b.N; n++ {
+				o = GetLanguage(sample.filename, sample.content)
+			}
+
+			overcomeLanguage = o
+		})
+	}
+}
+
+func BenchmarkClassifyPerSample(b *testing.B) {
+	if !slow {
+		b.SkipNow()
+	}
+
+	var o []string
+	for _, sample := range samples {
+		b.Run("Classify()_SAMPLE_"+sample.filename, func(b *testing.B) {
+			for n := 0; n < b.N; n++ {
+				o = DefaultClassifier.Classify(sample.content, nil)
+			}
+
+			overcomeLanguages = o
+		})
+	}
+}
+
+func BenchmarkStrategiesPerSample(b *testing.B) {
+	if !slow {
+		b.SkipNow()
+	}
+
+	benchmarks := []struct {
+		name       string
+		strategy   Strategy
+		candidates []string
+	}{
+		{name: "GetLanguagesByModeline()_SAMPLE_", strategy: GetLanguagesByModeline},
+		{name: "GetLanguagesByFilename()_SAMPLE_", strategy: GetLanguagesByFilename},
+		{name: "GetLanguagesByShebang()_SAMPLE_", strategy: GetLanguagesByShebang},
+		{name: "GetLanguagesByExtension()_SAMPLE_", strategy: GetLanguagesByExtension},
+		{name: "GetLanguagesByContent()_SAMPLE_", strategy: GetLanguagesByContent},
+	}
+
+	var o []string
+	for _, benchmark := range benchmarks {
+		for _, sample := range samples {
+			b.Run(benchmark.name+sample.filename, func(b *testing.B) {
+				for n := 0; n < b.N; n++ {
+					o = benchmark.strategy(sample.filename, sample.content, benchmark.candidates)
+				}
+
+				overcomeLanguages = o
+			})
+		}
+	}
+}
--- a/utils_test.go
+++ b/utils_test.go
@@ -3,7 +3,6 @@ package enry
 import (
 	"bytes"
 	"fmt"
-	"testing"

 	"github.com/stretchr/testify/assert"
 )
@@ -80,20 +79,3 @@ func (s *EnryTestSuite) TestIsBinary() {
 		assert.Equal(s.T(), is, test.expected, fmt.Sprintf("%v: is = %v, expected: %v", test.name, is, test.expected))
 	}
 }
-
-const (
-	htmlPath = "some/random/dir/file.html"
-	jsPath   = "some/random/dir/file.js"
-)
-
-func BenchmarkVendor(b *testing.B) {
-	for i := 0; i < b.N; i++ {
-		_ = IsVendor(htmlPath)
-	}
-}
-
-func BenchmarkVendorJS(b *testing.B) {
-	for i := 0; i < b.N; i++ {
-		_ = IsVendor(jsPath)
-	}
-}