test: cover GetLanguageByContent confusing edge cases

And clarify documentation wording, based on discussion
at https://github.com/go-enry/go-enry/issues/145

test plan:
 * go test -run '^Test_EnryTestSuite$' -testify.m '^(TestGetLanguageByContent)$' ./...
This commit is contained in:
Alex Bezzubov 2022-11-29 22:42:23 +01:00
parent a243a1fde8
commit 86cae02425
2 changed files with 20 additions and 1 deletions

View File

@ -61,7 +61,7 @@ To make a guess only based on the content of the file or a text snippet, use
### By file
The most accurate guess would be one when both, the file name and the content are available:
The most accurate guess would be when both, a file name and it's content are available:
- `GetLanguagesByContent` only uses file extension and a set of regexp-based content heuristics.
- `GetLanguages` uses the full set of matching strategies and is expected to be most accurate.

View File

@ -371,6 +371,25 @@ println("The shell script says ",vm.arglist.concat(" "));`
}
}
func (s *enryTestSuite) TestGetLanguageByContent() {
tests := []struct {
name string
filename string
content []byte
expected string
}{
{name: "TestGetLanguageByContent_0", filename: "", expected: ""},
{name: "TestGetLanguageByContent_1", filename: "foo.cpp", content: []byte("int main() { return 0; }"), expected: ""}, // as .cpp is unambiguous ¯\_(ツ)_/¯
{name: "TestGetLanguageByContent_2", filename: "foo.h", content: []byte("int main() { return 0; }"), expected: "C"}, // C, as it does not match any of the heuristics for C++ or Objective-C
{name: "TestGetLanguageByContent_3", filename: "foo.h", content: []byte("#include <string>\n int main() { return 0; }"), expected: "C++"}, // '#include <string>' matches regex heuristic
}
for _, test := range tests {
languages, _ := GetLanguageByContent(test.filename, test.content)
assert.Equal(s.T(), test.expected, languages, fmt.Sprintf("%v: languages = %v, expected: %v", test.name, languages, test.expected))
}
}
func (s *enryTestSuite) TestGetLanguagesByExtension() {
tests := []struct {
name string