From b2b61c2a8c86b09dc9a1c9642e918b70a2c90af9 Mon Sep 17 00:00:00 2001
From: Alexander Bezzubov <bzz@apache.org>
Date: Wed, 3 Apr 2019 15:40:23 +0200
Subject: [PATCH 1/9] gen: refactoring, renaming vars for readability

This does not change the logic of the generatro
but only renames/moves some vars for readability

Signed-off-by: Alexander Bezzubov <bzz@apache.org>
---
 .../code-generator/generator/samplesfreq.go   | 35 +++++++++----------
 1 file changed, 17 insertions(+), 18 deletions(-)

diff --git a/internal/code-generator/generator/samplesfreq.go b/internal/code-generator/generator/samplesfreq.go
index 7b734b0..25fe431 100644
--- a/internal/code-generator/generator/samplesfreq.go
+++ b/internal/code-generator/generator/samplesfreq.go
@@ -7,7 +7,6 @@ import (
 	"io/ioutil"
 	"log"
 	"math"
-	"os"
 	"path/filepath"
 	"sort"
 	"strconv"
@@ -41,7 +40,7 @@ func Frequencies(fileToParse, samplesDir, outPath, tmplPath, tmplName, commit st
 }
 
 func getFrequencies(samplesDir string) (*samplesFrequencies, error) {
-	entries, err := ioutil.ReadDir(samplesDir)
+	langDirs, err := ioutil.ReadDir(samplesDir)
 	if err != nil {
 		return nil, err
 	}
@@ -52,13 +51,14 @@ func getFrequencies(samplesDir string) (*samplesFrequencies, error) {
 	var tokens = make(map[string]map[string]int)
 	var languageTokens = make(map[string]int)
 
-	for _, entry := range entries {
-		if !entry.IsDir() {
+	for _, langDir := range langDirs {
+		if !langDir.IsDir() {
 			log.Println(err)
 			continue
 		}
 
-		samples, err := getSamples(samplesDir, entry)
+		lang := langDir.Name()
+		samples, err := getSamplesFrom(filepath.Join(samplesDir, lang))
 		if err != nil {
 			log.Println(err)
 		}
@@ -73,7 +73,6 @@ func getFrequencies(samplesDir string) (*samplesFrequencies, error) {
 			continue
 		}
 
-		lang := entry.Name()
 		languageTotal += len(samples)
 		languages[lang] = len(samples)
 		tokensTotal += len(samplesTokens)
@@ -93,22 +92,23 @@ func getFrequencies(samplesDir string) (*samplesFrequencies, error) {
 	}, nil
 }
 
-func getSamples(samplesDir string, langDir os.FileInfo) ([]string, error) {
-	const samplesSubDir = "filenames"
-	samples := []string{}
-	path := filepath.Join(samplesDir, langDir.Name())
-	entries, err := ioutil.ReadDir(path)
+func getSamplesFrom(samplesLangDir string) ([]string, error) {
+	const samplesLangFilesDir = "filenames"
+	var samples []string
+	sampleFiles, err := ioutil.ReadDir(samplesLangDir)
 	if err != nil {
 		return nil, err
 	}
 
-	for _, entry := range entries {
-		if entry.Mode().IsRegular() {
-			samples = append(samples, filepath.Join(path, entry.Name()))
+	for _, sampleFile := range sampleFiles {
+		filename := filepath.Join(samplesLangDir, sampleFile.Name())
+		if sampleFile.Mode().IsRegular() {
+			samples = append(samples, filename)
+			continue
 		}
 
-		if entry.IsDir() && entry.Name() == samplesSubDir {
-			subSamples, err := getSubSamples(samplesDir, langDir.Name(), entry)
+		if sampleFile.IsDir() && sampleFile.Name() == samplesLangFilesDir {
+			subSamples, err := getSubSamplesFrom(filename)
 			if err != nil {
 				return nil, err
 			}
@@ -121,9 +121,8 @@ func getSamples(samplesDir string, langDir os.FileInfo) ([]string, error) {
 	return samples, nil
 }
 
-func getSubSamples(samplesDir, langDir string, subLangDir os.FileInfo) ([]string, error) {
+func getSubSamplesFrom(path string) ([]string, error) {
 	subSamples := []string{}
-	path := filepath.Join(samplesDir, langDir, subLangDir.Name())
 	entries, err := ioutil.ReadDir(path)
 	if err != nil {
 		return nil, err

From df01124e1877e4f90f8938ad672bd4611d0ad9e0 Mon Sep 17 00:00:00 2001
From: Alexander Bezzubov <bzz@apache.org>
Date: Wed, 3 Apr 2019 16:07:14 +0200
Subject: [PATCH 2/9] doc: better wording in API godoc

Signed-off-by: Alexander Bezzubov <bzz@apache.org>
---
 common.go | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/common.go b/common.go
index 3486274..b88c339 100644
--- a/common.go
+++ b/common.go
@@ -26,7 +26,7 @@ var DefaultStrategies = []Strategy{
 	GetLanguagesByClassifier,
 }
 
-// DefaultClassifier is a naive Bayes classifier based on Linguist samples.
+// DefaultClassifier is a Naive Bayes classifier trained on Linguist samples.
 var DefaultClassifier Classifier = &classifier{
 	languagesLogProbabilities: data.LanguagesLogProbabilities,
 	tokensLogProbabilities:    data.TokensLogProbabilities,
@@ -390,8 +390,8 @@ func getDotIndexes(filename string) []int {
 	return dots
 }
 
-// GetLanguagesByContent returns a slice of possible languages for the given content.
-// It complies with the signature to be a Strategy type.
+// GetLanguagesByContent returns a slice of languages for the given content.
+// It is a Strategy that uses a content-based regexp heuristics and a filename extension.
 func GetLanguagesByContent(filename string, content []byte, _ []string) []string {
 	if filename == "" {
 		return nil

From 88810fed12a5264c82a147fa30c1eaa2e7bfa015 Mon Sep 17 00:00:00 2001
From: Alexander Bezzubov <bzz@apache.org>
Date: Wed, 3 Apr 2019 16:21:10 +0200
Subject: [PATCH 3/9] cli: mimic linguist output by default

This includes next main changes:

 - default: print only Programming and Markup types
   as Linguist does
 - `-prog` option replaced with `-all`, to allow for
   previous behavior
 - always use GetLanguage as main source of truth
   that fixes #204 and perf will be restored under #212

Signed-off-by: Alexander Bezzubov <bzz@apache.org>
---
 cmd/enry/main.go | 35 ++++++++++++++++++-----------------
 1 file changed, 18 insertions(+), 17 deletions(-)

diff --git a/cmd/enry/main.go b/cmd/enry/main.go
index b67d2aa..139a6c0 100644
--- a/cmd/enry/main.go
+++ b/cmd/enry/main.go
@@ -29,7 +29,7 @@ func main() {
 	breakdownFlag := flag.Bool("breakdown", false, "")
 	jsonFlag := flag.Bool("json", false, "")
 	showVersion := flag.Bool("version", false, "Show the enry version information")
-	onlyProg := flag.Bool("prog", false, "Only show programming file types in output")
+	allLangs := flag.Bool("all", false, "Show not only the files with programming languages (default) but all languages instead")
 	countMode := flag.String("mode", "file", "the method used to count file size. Available options are: file, line and byte")
 	limitKB := flag.Int64("limit", 16*1024, "Analyse first N KB of the file (-1 means no limit)")
 	flag.Parse()
@@ -96,24 +96,25 @@ func main() {
 			return nil
 		}
 
-		language, ok := enry.GetLanguageByExtension(path)
-		if !ok {
-			if language, ok = enry.GetLanguageByFilename(path); !ok {
-				content, err := readFile(path, limit)
-				if err != nil {
-					log.Println(err)
-					return nil
-				}
-
-				language = enry.GetLanguage(filepath.Base(path), content)
-				if language == enry.OtherLanguage {
-					return nil
-				}
-			}
+		//TODO(bzz): provide API that mimics lingust CLI output for
+		// running ByExtension & ByFilename
+		// reading the file, if that did not work
+		// GetLanguage([]Strategy)
+		content, err := readFile(path, limit)
+		if err != nil {
+			log.Println(err)
+			return nil
 		}
 
-		// If we are displaying only prog. and language is not prog. skip it.
-		if *onlyProg && enry.GetLanguageType(language) != enry.Programming {
+		language := enry.GetLanguage(filepath.Base(path), content)
+		if language == enry.OtherLanguage {
+			return nil
+		}
+
+		// If we are displaying only prog, skip it
+		if !*allLangs &&
+			enry.GetLanguageType(language) != enry.Programming &&
+			enry.GetLanguageType(language) != enry.Markup {
 			return nil
 		}
 

From c9f1793a78198109297b496fdd1d42d9699b1dfa Mon Sep 17 00:00:00 2001
From: Alexander Bezzubov <bzz@apache.org>
Date: Wed, 3 Apr 2019 17:35:03 +0200
Subject: [PATCH 4/9] doc: update godoc and README \w supported features

Signed-off-by: Alexander Bezzubov <bzz@apache.org>
---
 README.md        | 13 +++++++++++--
 cmd/enry/main.go |  5 ++++-
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 3fb2c8d..3b93a86 100644
--- a/README.md
+++ b/README.md
@@ -188,9 +188,18 @@ as a set for the tests, the following issues were found:
 
 * [Heuristics for ".es" extension](https://github.com/github/linguist/blob/e761f9b013e5b61161481fcb898b59721ee40e3d/lib/linguist/heuristics.yml#L103) in JavaScript could not be parsed, due to unsupported backreference in RE2 regexp engine
 
-* As of (Linguist v5.3.2)[https://github.com/github/linguist/releases/tag/v5.3.2] it is using [flex-based scanner in C for tokenization](https://github.com/github/linguist/pull/3846). Enry stil uses [extract_token](https://github.com/github/linguist/pull/3846/files#diff-d5179df0b71620e3fac4535cd1368d15L60) regex-based algorithm. Tracked under https://github.com/src-d/enry/issues/193
+* As of (Linguist v5.3.2)[https://github.com/github/linguist/releases/tag/v5.3.2] it is using [flex-based scanner in C for tokenization](https://github.com/github/linguist/pull/3846). Enry stil uses [extract_token](https://github.com/github/linguist/pull/3846/files#diff-d5179df0b71620e3fac4535cd1368d15L60) regex-based algorithm. [#193](https://github.com/src-d/enry/issues/193)
 
-* Bayesian classifier cann't distinguish "SQL" vs "PLpgSQL". Tracked under https://github.com/src-d/enry/issues/194
+* Bayesian classifier cann't distinguish "SQL" vs "PLpgSQL. [#194](https://github.com/src-d/enry/issues/194)
+
+* Dection of [generated files](https://github.com/github/linguist/blob/bf95666fc15e49d556f2def4d0a85338423c25f3/lib/linguist/generated.rb#L53) is not supported yet.
+ (Thus they are not exclued from CLI output) [#213](https://github.com/src-d/enry/issues/213)
+
+* XML detection strategy is not implemented. [#192](https://github.com/src-d/enry/issues/192)
+
+* Overriding languaes and types though `.gitattributes` is not yet supported. [#18](https://github.com/src-d/enry/issues/18)
+
+* enry CLI output does NOT exclude `.gitignore`ed files and submodel dirs as linguist does
 
 `enry` [CLI tool](#cli) does not require a full Git repository to be present in filesystem in order to report languages.
 
diff --git a/cmd/enry/main.go b/cmd/enry/main.go
index 139a6c0..1b7e4aa 100644
--- a/cmd/enry/main.go
+++ b/cmd/enry/main.go
@@ -85,6 +85,7 @@ func main() {
 
 		if enry.IsVendor(relativePath) || enry.IsDotFile(relativePath) ||
 			enry.IsDocumentation(relativePath) || enry.IsConfiguration(relativePath) {
+			//TODO(bzz): skip enry.IsGeneratedPath() after https://github.com/src-d/enry/issues/213
 			if f.IsDir() {
 				return filepath.SkipDir
 			}
@@ -105,13 +106,15 @@ func main() {
 			log.Println(err)
 			return nil
 		}
+		//TODO(bzz): skip enry.IsGeneratedContent() after https://github.com/src-d/enry/issues/213
 
 		language := enry.GetLanguage(filepath.Base(path), content)
 		if language == enry.OtherLanguage {
 			return nil
 		}
 
-		// If we are displaying only prog, skip it
+		// If we are not asked to display all, do as
+		// https://github.com/github/linguist/blob/bf95666fc15e49d556f2def4d0a85338423c25f3/lib/linguist/blob_helper.rb#L382
 		if !*allLangs &&
 			enry.GetLanguageType(language) != enry.Programming &&
 			enry.GetLanguageType(language) != enry.Markup {

From 94e8598d3d05c08def60e7dea0a3404f797a5010 Mon Sep 17 00:00:00 2001
From: Alexander Bezzubov <bzz@apache.org>
Date: Thu, 4 Apr 2019 15:27:12 +0200
Subject: [PATCH 5/9] doc: update TravisCI links

Signed-off-by: Alexander Bezzubov <bzz@apache.org>
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 3b93a86..7c2f7bb 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-# enry [![GoDoc](https://godoc.org/gopkg.in/src-d/enry.v1?status.svg)](https://godoc.org/gopkg.in/src-d/enry.v1) [![Build Status](https://travis-ci.org/src-d/enry.svg?branch=master)](https://travis-ci.org/src-d/enry) [![codecov](https://codecov.io/gh/src-d/enry/branch/master/graph/badge.svg)](https://codecov.io/gh/src-d/enry)
+# enry [![GoDoc](https://godoc.org/gopkg.in/src-d/enry.v1?status.svg)](https://godoc.org/gopkg.in/src-d/enry.v1) [![Build Status](https://travis-ci.com/src-d/enry.svg?branch=master)](https://travis-ci.com/src-d/enry) [![codecov](https://codecov.io/gh/src-d/enry/branch/master/graph/badge.svg)](https://codecov.io/gh/src-d/enry)
 
 File programming language detector and toolbox to ignore binary or vendored files. *enry*, started as a port to _Go_ of the original [linguist](https://github.com/github/linguist) _Ruby_ library, that has an improved *2x performance*.
 

From b6027d6d0c4f31f63f1527cc7270a838585ec2dc Mon Sep 17 00:00:00 2001
From: Alexander Bezzubov <bzz@apache.org>
Date: Thu, 4 Apr 2019 22:02:30 +0200
Subject: [PATCH 6/9] cli: mode=byte by default + fix file reading

Signed-off-by: Alexander Bezzubov <bzz@apache.org>
---
 cmd/enry/main.go | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/cmd/enry/main.go b/cmd/enry/main.go
index 1b7e4aa..20095ec 100644
--- a/cmd/enry/main.go
+++ b/cmd/enry/main.go
@@ -30,7 +30,7 @@ func main() {
 	jsonFlag := flag.Bool("json", false, "")
 	showVersion := flag.Bool("version", false, "Show the enry version information")
 	allLangs := flag.Bool("all", false, "Show not only the files with programming languages (default) but all languages instead")
-	countMode := flag.String("mode", "file", "the method used to count file size. Available options are: file, line and byte")
+	countMode := flag.String("mode", "byte", "the method used to count file size. Available options are: file, line and byte")
 	limitKB := flag.Int64("limit", 16*1024, "Analyse first N KB of the file (-1 means no limit)")
 	flag.Parse()
 	limit := (*limitKB) * 1024
@@ -136,11 +136,11 @@ func main() {
 	case *jsonFlag && *breakdownFlag:
 		printBreakDown(out, &buf)
 	case *breakdownFlag:
-		printPercents(out, &buf, *countMode)
+		printPercents(root, out, &buf, *countMode)
 		buf.WriteByte('\n')
 		printBreakDown(out, &buf)
 	default:
-		printPercents(out, &buf, *countMode)
+		printPercents(root, out, &buf, *countMode)
 	}
 
 	fmt.Print(buf.String())
@@ -182,9 +182,9 @@ func (e filelistError) Error() string {
 	return fmt.Sprintf("Could not process the following files:\n%s", strings.Join(e, "\n"))
 }
 
-func printPercents(fSummary map[string][]string, buff *bytes.Buffer, mode string) {
+func printPercents(root string, fSummary map[string][]string, buff *bytes.Buffer, mode string) {
 	// Select the way we quantify 'amount' of code.
-	var reducer func([]string) (float64, filelistError)
+	var reducer func(string, []string) (float64, filelistError)
 	switch mode {
 	case "file":
 		reducer = fileCountValues
@@ -204,7 +204,8 @@ func printPercents(fSummary map[string][]string, buff *bytes.Buffer, mode string
 		fileValues      = make(map[string]float64)
 	)
 	for fType, files := range fSummary {
-		val, err := reducer(files)
+		//FIXME(bzz): all files here have relative paths
+		val, err := reducer(root, files)
 		if err != nil {
 			unreadableFiles = append(unreadableFiles, err...)
 		}
@@ -229,25 +230,25 @@ func printPercents(fSummary map[string][]string, buff *bytes.Buffer, mode string
 	}
 }
 
-func fileCountValues(files []string) (float64, filelistError) {
+func fileCountValues(_ string, files []string) (float64, filelistError) {
 	return float64(len(files)), nil
 }
 
-func lineCountValues(files []string) (float64, filelistError) {
+func lineCountValues(root string, files []string) (float64, filelistError) {
 	var filesErr filelistError
 	var t float64
 	for _, fName := range files {
-		l, _ := getLines(fName, nil)
+		l, _ := getLines(filepath.Join(root, fName), nil)
 		t += float64(l)
 	}
 	return t, filesErr
 }
 
-func byteCountValues(files []string) (float64, filelistError) {
+func byteCountValues(root string, files []string) (float64, filelistError) {
 	var filesErr filelistError
 	var t float64
 	for _, fName := range files {
-		f, err := os.Open(fName)
+		f, err := os.Open(filepath.Join(root, fName))
 		if err != nil {
 			filesErr = append(filesErr, fName)
 			continue

From 416afb45fcdb8606cbfd0e9bc0f81ce889f8d8d3 Mon Sep 17 00:00:00 2001
From: Alexander Bezzubov <bzz@apache.org>
Date: Mon, 8 Apr 2019 15:58:46 +0200
Subject: [PATCH 7/9] doc: better wording in 'divergences from linguist'
 section

Signed-off-by: Alexander Bezzubov <bzz@apache.org>
---
 README.md | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/README.md b/README.md
index 7c2f7bb..b36d8b9 100644
--- a/README.md
+++ b/README.md
@@ -183,25 +183,28 @@ To run the tests,
 Divergences from linguist
 ------------
 
+`enry` [CLI tool](#cli) does *not* require a full Git repository to be present in the filesystem in order to report languages.
+
 Using [linguist/samples](https://github.com/github/linguist/tree/master/samples)
 as a set for the tests, the following issues were found:
 
 * [Heuristics for ".es" extension](https://github.com/github/linguist/blob/e761f9b013e5b61161481fcb898b59721ee40e3d/lib/linguist/heuristics.yml#L103) in JavaScript could not be parsed, due to unsupported backreference in RE2 regexp engine
 
-* As of (Linguist v5.3.2)[https://github.com/github/linguist/releases/tag/v5.3.2] it is using [flex-based scanner in C for tokenization](https://github.com/github/linguist/pull/3846). Enry stil uses [extract_token](https://github.com/github/linguist/pull/3846/files#diff-d5179df0b71620e3fac4535cd1368d15L60) regex-based algorithm. [#193](https://github.com/src-d/enry/issues/193)
+* As of (Linguist v5.3.2)[https://github.com/github/linguist/releases/tag/v5.3.2] it is using [flex-based scanner in C for tokenization](https://github.com/github/linguist/pull/3846). Enry stil uses [extract_token](https://github.com/github/linguist/pull/3846/files#diff-d5179df0b71620e3fac4535cd1368d15L60) regex-based algorithm. See [#193](https://github.com/src-d/enry/issues/193).
 
-* Bayesian classifier cann't distinguish "SQL" vs "PLpgSQL. [#194](https://github.com/src-d/enry/issues/194)
+* Bayesian classifier can't distinguish "SQL" from "PLpgSQL. See [#194](https://github.com/src-d/enry/issues/194).
 
-* Dection of [generated files](https://github.com/github/linguist/blob/bf95666fc15e49d556f2def4d0a85338423c25f3/lib/linguist/generated.rb#L53) is not supported yet.
- (Thus they are not exclued from CLI output) [#213](https://github.com/src-d/enry/issues/213)
+* Detection of [generated files](https://github.com/github/linguist/blob/bf95666fc15e49d556f2def4d0a85338423c25f3/lib/linguist/generated.rb#L53) is not supported yet.
+ (Thus they are not excluded from CLI output). See [#213](https://github.com/src-d/enry/issues/213).
 
-* XML detection strategy is not implemented. [#192](https://github.com/src-d/enry/issues/192)
+* XML detection strategy is not implemented. See [#192](https://github.com/src-d/enry/issues/192).
 
-* Overriding languaes and types though `.gitattributes` is not yet supported. [#18](https://github.com/src-d/enry/issues/18)
+* Overriding languages and types though `.gitattributes` is not yet supported. See [#18](https://github.com/src-d/enry/issues/18).
 
-* enry CLI output does NOT exclude `.gitignore`ed files and submodel dirs as linguist does
+* `enry` CLI output does NOT exclude `.gitignore`ed files and git submodules, as linguist does
+
+In all the cases above that have an issue number - we plan to update enry to match Linguist behaviour.
 
-`enry` [CLI tool](#cli) does not require a full Git repository to be present in filesystem in order to report languages.
 
 Benchmarks
 ------------

From bdb5603f28eee0c96782d4e4520f9e9644a5fb3f Mon Sep 17 00:00:00 2001
From: Alexander Bezzubov <bzz@apache.org>
Date: Mon, 8 Apr 2019 16:07:10 +0200
Subject: [PATCH 8/9] Address code review feedback

Signed-off-by: Alexander Bezzubov <bzz@apache.org>
---
 cmd/enry/main.go                              | 19 ++++++++-----------
 common.go                                     |  2 +-
 .../code-generator/generator/samplesfreq.go   | 11 +++++------
 3 files changed, 14 insertions(+), 18 deletions(-)

diff --git a/cmd/enry/main.go b/cmd/enry/main.go
index 20095ec..9ad255d 100644
--- a/cmd/enry/main.go
+++ b/cmd/enry/main.go
@@ -29,7 +29,7 @@ func main() {
 	breakdownFlag := flag.Bool("breakdown", false, "")
 	jsonFlag := flag.Bool("json", false, "")
 	showVersion := flag.Bool("version", false, "Show the enry version information")
-	allLangs := flag.Bool("all", false, "Show not only the files with programming languages (default) but all languages instead")
+	allLangs := flag.Bool("all", false, "Show all files, including those identifed as non-programming languages")
 	countMode := flag.String("mode", "byte", "the method used to count file size. Available options are: file, line and byte")
 	limitKB := flag.Int64("limit", 16*1024, "Analyse first N KB of the file (-1 means no limit)")
 	flag.Parse()
@@ -85,7 +85,7 @@ func main() {
 
 		if enry.IsVendor(relativePath) || enry.IsDotFile(relativePath) ||
 			enry.IsDocumentation(relativePath) || enry.IsConfiguration(relativePath) {
-			//TODO(bzz): skip enry.IsGeneratedPath() after https://github.com/src-d/enry/issues/213
+			// TODO(bzz): skip enry.IsGeneratedPath() after https://github.com/src-d/enry/issues/213
 			if f.IsDir() {
 				return filepath.SkipDir
 			}
@@ -97,16 +97,16 @@ func main() {
 			return nil
 		}
 
-		//TODO(bzz): provide API that mimics lingust CLI output for
-		// running ByExtension & ByFilename
-		// reading the file, if that did not work
-		// GetLanguage([]Strategy)
+		// TODO(bzz): provide API that mimics lingust CLI output for
+		// - running ByExtension & ByFilename
+		// - reading the file, if that did not work
+		// - GetLanguage([]Strategy)
 		content, err := readFile(path, limit)
 		if err != nil {
 			log.Println(err)
 			return nil
 		}
-		//TODO(bzz): skip enry.IsGeneratedContent() after https://github.com/src-d/enry/issues/213
+		// TODO(bzz): skip enry.IsGeneratedContent() as well, after https://github.com/src-d/enry/issues/213
 
 		language := enry.GetLanguage(filepath.Base(path), content)
 		if language == enry.OtherLanguage {
@@ -184,7 +184,7 @@ func (e filelistError) Error() string {
 
 func printPercents(root string, fSummary map[string][]string, buff *bytes.Buffer, mode string) {
 	// Select the way we quantify 'amount' of code.
-	var reducer func(string, []string) (float64, filelistError)
+	reducer := fileCountValues
 	switch mode {
 	case "file":
 		reducer = fileCountValues
@@ -192,8 +192,6 @@ func printPercents(root string, fSummary map[string][]string, buff *bytes.Buffer
 		reducer = lineCountValues
 	case "byte":
 		reducer = byteCountValues
-	default:
-		reducer = fileCountValues
 	}
 
 	// Reduce the list of files to a quantity of file type.
@@ -204,7 +202,6 @@ func printPercents(root string, fSummary map[string][]string, buff *bytes.Buffer
 		fileValues      = make(map[string]float64)
 	)
 	for fType, files := range fSummary {
-		//FIXME(bzz): all files here have relative paths
 		val, err := reducer(root, files)
 		if err != nil {
 			unreadableFiles = append(unreadableFiles, err...)
diff --git a/common.go b/common.go
index b88c339..567dd96 100644
--- a/common.go
+++ b/common.go
@@ -391,7 +391,7 @@ func getDotIndexes(filename string) []int {
 }
 
 // GetLanguagesByContent returns a slice of languages for the given content.
-// It is a Strategy that uses a content-based regexp heuristics and a filename extension.
+// It is a Strategy that uses content-based regexp heuristics and a filename extension.
 func GetLanguagesByContent(filename string, content []byte, _ []string) []string {
 	if filename == "" {
 		return nil
diff --git a/internal/code-generator/generator/samplesfreq.go b/internal/code-generator/generator/samplesfreq.go
index 25fe431..fc166d9 100644
--- a/internal/code-generator/generator/samplesfreq.go
+++ b/internal/code-generator/generator/samplesfreq.go
@@ -53,12 +53,11 @@ func getFrequencies(samplesDir string) (*samplesFrequencies, error) {
 
 	for _, langDir := range langDirs {
 		if !langDir.IsDir() {
-			log.Println(err)
 			continue
 		}
 
 		lang := langDir.Name()
-		samples, err := getSamplesFrom(filepath.Join(samplesDir, lang))
+		samples, err := readSamples(filepath.Join(samplesDir, lang))
 		if err != nil {
 			log.Println(err)
 		}
@@ -92,14 +91,14 @@ func getFrequencies(samplesDir string) (*samplesFrequencies, error) {
 	}, nil
 }
 
-func getSamplesFrom(samplesLangDir string) ([]string, error) {
+func readSamples(samplesLangDir string) ([]string, error) {
 	const samplesLangFilesDir = "filenames"
-	var samples []string
 	sampleFiles, err := ioutil.ReadDir(samplesLangDir)
 	if err != nil {
 		return nil, err
 	}
 
+	var samples []string
 	for _, sampleFile := range sampleFiles {
 		filename := filepath.Join(samplesLangDir, sampleFile.Name())
 		if sampleFile.Mode().IsRegular() {
@@ -108,7 +107,7 @@ func getSamplesFrom(samplesLangDir string) ([]string, error) {
 		}
 
 		if sampleFile.IsDir() && sampleFile.Name() == samplesLangFilesDir {
-			subSamples, err := getSubSamplesFrom(filename)
+			subSamples, err := readSubSamples(filename)
 			if err != nil {
 				return nil, err
 			}
@@ -121,7 +120,7 @@ func getSamplesFrom(samplesLangDir string) ([]string, error) {
 	return samples, nil
 }
 
-func getSubSamplesFrom(path string) ([]string, error) {
+func readSubSamples(path string) ([]string, error) {
 	subSamples := []string{}
 	entries, err := ioutil.ReadDir(path)
 	if err != nil {

From bad147cb7286fe16cfab4f82c1ffb5791c4a722d Mon Sep 17 00:00:00 2001
From: "M. J. Fromberger" <michael.j.fromberger@gmail.com>
Date: Mon, 8 Apr 2019 18:00:43 +0200
Subject: [PATCH 9/9] Update README.md

Co-Authored-By: bzz <bzz@users.noreply.github.com>
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index b36d8b9..66e2156 100644
--- a/README.md
+++ b/README.md
@@ -190,7 +190,7 @@ as a set for the tests, the following issues were found:
 
 * [Heuristics for ".es" extension](https://github.com/github/linguist/blob/e761f9b013e5b61161481fcb898b59721ee40e3d/lib/linguist/heuristics.yml#L103) in JavaScript could not be parsed, due to unsupported backreference in RE2 regexp engine
 
-* As of (Linguist v5.3.2)[https://github.com/github/linguist/releases/tag/v5.3.2] it is using [flex-based scanner in C for tokenization](https://github.com/github/linguist/pull/3846). Enry stil uses [extract_token](https://github.com/github/linguist/pull/3846/files#diff-d5179df0b71620e3fac4535cd1368d15L60) regex-based algorithm. See [#193](https://github.com/src-d/enry/issues/193).
+* As of (Linguist v5.3.2)[https://github.com/github/linguist/releases/tag/v5.3.2] it is using [flex-based scanner in C for tokenization](https://github.com/github/linguist/pull/3846). Enry still uses [extract_token](https://github.com/github/linguist/pull/3846/files#diff-d5179df0b71620e3fac4535cd1368d15L60) regex-based algorithm. See [#193](https://github.com/src-d/enry/issues/193).
 
 * Bayesian classifier can't distinguish "SQL" from "PLpgSQL. See [#194](https://github.com/src-d/enry/issues/194).