mirror of
https://github.com/ralsina/tartrazine.git
synced 2025-05-24 08:18:52 -03:00
go: reduce API surface
Don't export defaultClassifier Signed-off-by: Alexander Bezzubov <bzz@apache.org>
This commit is contained in:
parent
a4c166cc04
commit
3f0c4e182b
@ -140,7 +140,7 @@ func BenchmarkClassifyTotal(b *testing.B) {
|
|||||||
b.Run("Classify()_TOTAL", func(b *testing.B) {
|
b.Run("Classify()_TOTAL", func(b *testing.B) {
|
||||||
for n := 0; n < b.N; n++ {
|
for n := 0; n < b.N; n++ {
|
||||||
for _, sample := range samples {
|
for _, sample := range samples {
|
||||||
o = DefaultClassifier.Classify(sample.content, nil)
|
o = defaultClassifier.Classify(sample.content, nil)
|
||||||
}
|
}
|
||||||
|
|
||||||
overcomeLanguages = o
|
overcomeLanguages = o
|
||||||
@ -195,7 +195,7 @@ func BenchmarkClassifyPerSample(b *testing.B) {
|
|||||||
for _, sample := range samples {
|
for _, sample := range samples {
|
||||||
b.Run("Classify()_SAMPLE_"+sample.filename, func(b *testing.B) {
|
b.Run("Classify()_SAMPLE_"+sample.filename, func(b *testing.B) {
|
||||||
for n := 0; n < b.N; n++ {
|
for n := 0; n < b.N; n++ {
|
||||||
o = DefaultClassifier.Classify(sample.content, nil)
|
o = defaultClassifier.Classify(sample.content, nil)
|
||||||
}
|
}
|
||||||
|
|
||||||
overcomeLanguages = o
|
overcomeLanguages = o
|
||||||
|
10
common.go
10
common.go
@ -26,8 +26,8 @@ var DefaultStrategies = []Strategy{
|
|||||||
GetLanguagesByClassifier,
|
GetLanguagesByClassifier,
|
||||||
}
|
}
|
||||||
|
|
||||||
// DefaultClassifier is a Naive Bayes classifier trained on Linguist samples.
|
// defaultClassifier is a Naive Bayes classifier trained on Linguist samples.
|
||||||
var DefaultClassifier Classifier = &classifier{
|
var defaultClassifier Classifier = &classifier{
|
||||||
languagesLogProbabilities: data.LanguagesLogProbabilities,
|
languagesLogProbabilities: data.LanguagesLogProbabilities,
|
||||||
tokensLogProbabilities: data.TokensLogProbabilities,
|
tokensLogProbabilities: data.TokensLogProbabilities,
|
||||||
tokensTotal: data.TokensTotal,
|
tokensTotal: data.TokensTotal,
|
||||||
@ -92,7 +92,7 @@ func GetLanguageByContent(filename string, content []byte) (language string, saf
|
|||||||
}
|
}
|
||||||
|
|
||||||
// GetLanguageByClassifier returns the most probably language detected for the given content. It uses
|
// GetLanguageByClassifier returns the most probably language detected for the given content. It uses
|
||||||
// DefaultClassifier, if no candidates are provided it returns OtherLanguage.
|
// defaultClassifier, if no candidates are provided it returns OtherLanguage.
|
||||||
func GetLanguageByClassifier(content []byte, candidates []string) (language string, safe bool) {
|
func GetLanguageByClassifier(content []byte, candidates []string) (language string, safe bool) {
|
||||||
return getLanguageByStrategy(GetLanguagesByClassifier, "", content, candidates)
|
return getLanguageByStrategy(GetLanguagesByClassifier, "", content, candidates)
|
||||||
}
|
}
|
||||||
@ -413,14 +413,14 @@ func GetLanguagesByContent(filename string, content []byte, _ []string) []string
|
|||||||
return heuristic.Match(content)
|
return heuristic.Match(content)
|
||||||
}
|
}
|
||||||
|
|
||||||
// GetLanguagesByClassifier uses DefaultClassifier as a Classifier and returns a sorted slice of possible languages ordered by
|
// GetLanguagesByClassifier uses defaultClassifier as a Classifier and returns a sorted slice of possible languages ordered by
|
||||||
// decreasing language's probability. If there are not candidates it returns nil. It complies with the signature to be a Strategy type.
|
// decreasing language's probability. If there are not candidates it returns nil. It complies with the signature to be a Strategy type.
|
||||||
func GetLanguagesByClassifier(filename string, content []byte, candidates []string) (languages []string) {
|
func GetLanguagesByClassifier(filename string, content []byte, candidates []string) (languages []string) {
|
||||||
if len(candidates) == 0 {
|
if len(candidates) == 0 {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
return GetLanguagesBySpecificClassifier(content, candidates, DefaultClassifier)
|
return GetLanguagesBySpecificClassifier(content, candidates, defaultClassifier)
|
||||||
}
|
}
|
||||||
|
|
||||||
// GetLanguagesBySpecificClassifier returns a slice of possible languages. It takes in a Classifier to be used.
|
// GetLanguagesBySpecificClassifier returns a slice of possible languages. It takes in a Classifier to be used.
|
||||||
|
@ -335,13 +335,13 @@ func (s *EnryTestSuite) TestGetLanguagesBySpecificClassifier() {
|
|||||||
classifier Classifier
|
classifier Classifier
|
||||||
expected string
|
expected string
|
||||||
}{
|
}{
|
||||||
{name: "TestGetLanguagesByClassifier_1", filename: filepath.Join(s.samplesDir, "C/blob.c"), candidates: []string{"python", "ruby", "c", "c++"}, classifier: DefaultClassifier, expected: "C"},
|
{name: "TestGetLanguagesByClassifier_1", filename: filepath.Join(s.samplesDir, "C/blob.c"), candidates: []string{"python", "ruby", "c", "c++"}, classifier: defaultClassifier, expected: "C"},
|
||||||
{name: "TestGetLanguagesByClassifier_2", filename: filepath.Join(s.samplesDir, "C/blob.c"), candidates: nil, classifier: DefaultClassifier, expected: "C"},
|
{name: "TestGetLanguagesByClassifier_2", filename: filepath.Join(s.samplesDir, "C/blob.c"), candidates: nil, classifier: defaultClassifier, expected: "C"},
|
||||||
{name: "TestGetLanguagesByClassifier_3", filename: filepath.Join(s.samplesDir, "C/main.c"), candidates: []string{}, classifier: DefaultClassifier, expected: "C"},
|
{name: "TestGetLanguagesByClassifier_3", filename: filepath.Join(s.samplesDir, "C/main.c"), candidates: []string{}, classifier: defaultClassifier, expected: "C"},
|
||||||
{name: "TestGetLanguagesByClassifier_4", filename: filepath.Join(s.samplesDir, "C/blob.c"), candidates: []string{"python", "ruby", "c++"}, classifier: DefaultClassifier, expected: "C++"},
|
{name: "TestGetLanguagesByClassifier_4", filename: filepath.Join(s.samplesDir, "C/blob.c"), candidates: []string{"python", "ruby", "c++"}, classifier: defaultClassifier, expected: "C++"},
|
||||||
{name: "TestGetLanguagesByClassifier_5", filename: filepath.Join(s.samplesDir, "C/blob.c"), candidates: []string{"ruby"}, classifier: DefaultClassifier, expected: "Ruby"},
|
{name: "TestGetLanguagesByClassifier_5", filename: filepath.Join(s.samplesDir, "C/blob.c"), candidates: []string{"ruby"}, classifier: defaultClassifier, expected: "Ruby"},
|
||||||
{name: "TestGetLanguagesByClassifier_6", filename: filepath.Join(s.samplesDir, "Python/django-models-base.py"), candidates: []string{"python", "ruby", "c", "c++"}, classifier: DefaultClassifier, expected: "Python"},
|
{name: "TestGetLanguagesByClassifier_6", filename: filepath.Join(s.samplesDir, "Python/django-models-base.py"), candidates: []string{"python", "ruby", "c", "c++"}, classifier: defaultClassifier, expected: "Python"},
|
||||||
{name: "TestGetLanguagesByClassifier_7", filename: os.DevNull, candidates: nil, classifier: DefaultClassifier, expected: "XML"},
|
{name: "TestGetLanguagesByClassifier_7", filename: os.DevNull, candidates: nil, classifier: defaultClassifier, expected: "XML"},
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, test := range test {
|
for _, test := range test {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user