go: remove Classifier from API

Even more reduces public API surface by hiding un-used Classifier API for providing a pre-trained classifier weights. Signed-off-by: Alexander Bezzubov <bzz@apache.org>
2025-09-18 19:28:12 +00:00 · 2019-10-29 17:56:13 +01:00
parent 3f0c4e182b
commit fa097f4ed4
4 changed files with 21 additions and 21 deletions
--- a/benchmark_test.go
+++ b/benchmark_test.go
@@ -140,7 +140,7 @@ func BenchmarkClassifyTotal(b *testing.B) {
 	b.Run("Classify()_TOTAL", func(b *testing.B) {
 		for n := 0; n < b.N; n++ {
 			for _, sample := range samples {
-				o = defaultClassifier.Classify(sample.content, nil)
+				o = defaultClassifier.classify(sample.content, nil)
 			}
 			overcomeLanguages = o
@@ -195,7 +195,7 @@ func BenchmarkClassifyPerSample(b *testing.B) {
 	for _, sample := range samples {
 		b.Run("Classify()_SAMPLE_"+sample.filename, func(b *testing.B) {
 			for n := 0; n < b.N; n++ {
-				o = defaultClassifier.Classify(sample.content, nil)
+				o = defaultClassifier.classify(sample.content, nil)
 			}
 			overcomeLanguages = o
--- a/classifier.go
+++ b/classifier.go
@@ -7,13 +7,13 @@ import (
 	"github.com/src-d/enry/v2/internal/tokenizer"
 )
-// Classifier is the interface in charge to detect the possible languages of the given content based on a set of
+// classifier is the interface in charge to detect the possible languages of the given content based on a set of
 // candidates. Candidates is a map which can be used to assign weights to languages dynamically.
-type Classifier interface {
+type classifier interface {
-	Classify(content []byte, candidates map[string]float64) (languages []string)
+	classify(content []byte, candidates map[string]float64) (languages []string)
 }
-type classifier struct {
+type naiveBayes struct {
 	languagesLogProbabilities map[string]float64
 	tokensLogProbabilities    map[string]map[string]float64
 	tokensTotal               float64
@@ -24,8 +24,8 @@ type scoredLanguage struct {
 	score    float64
 }
-// Classify returns a sorted slice of possible languages sorted by decreasing language's probability
+// classify returns a sorted slice of possible languages sorted by decreasing language's probability
-func (c *classifier) Classify(content []byte, candidates map[string]float64) []string {
+func (c *naiveBayes) classify(content []byte, candidates map[string]float64) []string {
 	var languages map[string]float64
 	if len(candidates) == 0 {
@@ -73,7 +73,7 @@ func sortLanguagesByScore(scoredLangs []*scoredLanguage) []string {
 	return sortedLanguages
 }
-func (c *classifier) knownLangs() map[string]float64 {
+func (c *naiveBayes) knownLangs() map[string]float64 {
 	langs := make(map[string]float64, len(c.languagesLogProbabilities))
 	for lang := range c.languagesLogProbabilities {
 		langs[lang]++
@@ -82,7 +82,7 @@ func (c *classifier) knownLangs() map[string]float64 {
 	return langs
 }
-func (c *classifier) tokensLogProbability(tokens []string, language string) float64 {
+func (c *naiveBayes) tokensLogProbability(tokens []string, language string) float64 {
 	var sum float64
 	for _, token := range tokens {
 		sum += c.tokenProbability(token, language)
@@ -91,7 +91,7 @@ func (c *classifier) tokensLogProbability(tokens []string, language string) floa
 	return sum
 }
-func (c *classifier) tokenProbability(token, language string) float64 {
+func (c *naiveBayes) tokenProbability(token, language string) float64 {
 	tokenProb, ok := c.tokensLogProbabilities[language][token]
 	if !ok {
 		tokenProb = math.Log(1.000000 / c.tokensTotal)
--- a/common.go
+++ b/common.go
@@ -27,7 +27,7 @@ var DefaultStrategies = []Strategy{
 }
 // defaultClassifier is a Naive Bayes classifier trained on Linguist samples.
-var defaultClassifier Classifier = &classifier{
+var defaultClassifier classifier = &naiveBayes{
 	languagesLogProbabilities: data.LanguagesLogProbabilities,
 	tokensLogProbabilities:    data.TokensLogProbabilities,
 	tokensTotal:               data.TokensTotal,
@@ -108,10 +108,10 @@ func getFirstLanguageAndSafe(languages []string) (language string, safe bool) {
 	return
 }
-// GetLanguageBySpecificClassifier returns the most probably language for the given content using
+// getLanguageBySpecificClassifier returns the most probably language for the given content using
 // classifier to detect language.
-func GetLanguageBySpecificClassifier(content []byte, candidates []string, classifier Classifier) (language string, safe bool) {
+func getLanguageBySpecificClassifier(content []byte, candidates []string, classifier classifier) (language string, safe bool) {
-	languages := GetLanguagesBySpecificClassifier(content, candidates, classifier)
+	languages := getLanguagesBySpecificClassifier(content, candidates, classifier)
 	return getFirstLanguageAndSafe(languages)
 }
@@ -420,17 +420,17 @@ func GetLanguagesByClassifier(filename string, content []byte, candidates []stri
 		return nil
 	}
-	return GetLanguagesBySpecificClassifier(content, candidates, defaultClassifier)
+	return getLanguagesBySpecificClassifier(content, candidates, defaultClassifier)
 }
-// GetLanguagesBySpecificClassifier returns a slice of possible languages. It takes in a Classifier to be used.
+// getLanguagesBySpecificClassifier returns a slice of possible languages. It takes in a Classifier to be used.
-func GetLanguagesBySpecificClassifier(content []byte, candidates []string, classifier Classifier) (languages []string) {
+func getLanguagesBySpecificClassifier(content []byte, candidates []string, classifier classifier) (languages []string) {
 	mapCandidates := make(map[string]float64)
 	for _, candidate := range candidates {
 		mapCandidates[candidate]++
 	}
-	return classifier.Classify(content, mapCandidates)
+	return classifier.classify(content, mapCandidates)
 }
 // GetLanguageExtensions returns the different extensions being used by the language.
--- a/common_test.go
+++ b/common_test.go
@@ -332,7 +332,7 @@ func (s *EnryTestSuite) TestGetLanguagesBySpecificClassifier() {
 		name       string
 		filename   string
 		candidates []string
-		classifier Classifier
+		classifier classifier
 		expected   string
 	}{
 		{name: "TestGetLanguagesByClassifier_1", filename: filepath.Join(s.samplesDir, "C/blob.c"), candidates: []string{"python", "ruby", "c", "c++"}, classifier: defaultClassifier, expected: "C"},
@@ -348,7 +348,7 @@ func (s *EnryTestSuite) TestGetLanguagesBySpecificClassifier() {
 		content, err := ioutil.ReadFile(test.filename)
 		assert.NoError(s.T(), err)
-		languages := GetLanguagesBySpecificClassifier(content, test.candidates, test.classifier)
+		languages := getLanguagesBySpecificClassifier(content, test.candidates, test.classifier)
 		var language string
 		if len(languages) == 0 {
 			language = OtherLanguage