2022-11-23 20:49:30 +00:00
|
|
|
package enry
|
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
|
|
|
"io/ioutil"
|
|
|
|
"os"
|
|
|
|
"path/filepath"
|
|
|
|
"testing"
|
|
|
|
|
|
|
|
"github.com/go-enry/go-enry/v2/data"
|
2023-03-04 18:57:11 +00:00
|
|
|
"github.com/go-enry/go-enry/v2/regex"
|
2022-11-23 20:49:30 +00:00
|
|
|
"github.com/stretchr/testify/suite"
|
|
|
|
)
|
|
|
|
|
|
|
|
type linguistCorpusSuite struct {
|
|
|
|
enryBaseTestSuite
|
|
|
|
}
|
|
|
|
|
|
|
|
func Test_EnryOnLinguistCorpus(t *testing.T) {
|
|
|
|
suite.Run(t, new(linguistCorpusSuite))
|
|
|
|
}
|
|
|
|
|
|
|
|
// First part of the test_blob.rb#test_language
|
|
|
|
// https://github.com/github/linguist/blob/59b2d88b2242e6062384e5fb876668cc30ead951/test/test_blob.rb#L258
|
|
|
|
func (s *linguistCorpusSuite) TestLinguistSamples() {
|
|
|
|
const filenamesDir = "filenames"
|
|
|
|
var cornerCases = map[string]bool{
|
2023-09-22 11:19:04 +00:00
|
|
|
"textobj-rubyblock.vba": true, // unsupported negative lookahead RE syntax (https://github.com/github/linguist/blob/8083cb5a89cee2d99f5a988f165994d0243f0d1e/lib/linguist/heuristics.yml#L521)
|
2022-11-23 20:49:30 +00:00
|
|
|
// .es and .ice fail heuristics parsing, but do not fail any tests
|
2023-03-03 22:17:32 +00:00
|
|
|
// 'Adblock Filter List' hack https://github.com/github/linguist/blob/bf853f1c663903e3ee35935189760191f1c45e1c/lib/linguist/heuristics.yml#L680-L702
|
|
|
|
"Imperial Units Remover.txt": true,
|
|
|
|
"abp-filters-anti-cv.txt": true,
|
|
|
|
"anti-facebook.txt": true,
|
|
|
|
"fake-news.txt": true,
|
|
|
|
"test_rules.txt": true,
|
2023-09-22 11:38:49 +00:00
|
|
|
// backreference in .plist heuristics for "XML Property List" language https://github.com/go-enry/go-enry/pull/169#discussion_r1319889500
|
|
|
|
// upsteam fix comming in https://github.com/go-enry/go-enry/pull/169#issuecomment-1708840755
|
|
|
|
"ff-man.plist": true,
|
|
|
|
"info.min.plist": true,
|
|
|
|
"info.plist": true,
|
|
|
|
"man.plist": true,
|
2022-11-23 20:49:30 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
var total, failed, ok, other int
|
|
|
|
var expected string
|
|
|
|
filepath.Walk(s.samplesDir, func(path string, f os.FileInfo, err error) error {
|
|
|
|
if f.IsDir() {
|
|
|
|
if f.Name() != filenamesDir {
|
|
|
|
expected, _ = data.LanguageByAlias(f.Name())
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
filename := filepath.Base(path)
|
|
|
|
content, _ := ioutil.ReadFile(path)
|
|
|
|
|
|
|
|
total++
|
2023-03-03 22:14:06 +00:00
|
|
|
got := GetLanguage(filename, content)
|
|
|
|
if got == OtherLanguage {
|
|
|
|
got = "Other"
|
2022-11-23 20:49:30 +00:00
|
|
|
other++
|
|
|
|
}
|
|
|
|
|
2023-03-03 22:14:06 +00:00
|
|
|
if expected == got {
|
2022-11-23 20:49:30 +00:00
|
|
|
ok++
|
|
|
|
} else {
|
|
|
|
failed++
|
|
|
|
}
|
|
|
|
|
2023-03-03 22:14:06 +00:00
|
|
|
errMsg := fmt.Sprintf("file: %q\texpected: %q\tgot: %q\n", path, expected, got)
|
2022-11-23 20:49:30 +00:00
|
|
|
if _, ok := cornerCases[filename]; ok {
|
2023-09-22 11:18:10 +00:00
|
|
|
s.T().Logf(fmt.Sprintf("\t[corner case] %s", errMsg))
|
2022-11-23 20:49:30 +00:00
|
|
|
} else {
|
2023-03-03 22:14:06 +00:00
|
|
|
s.Equal(expected, got, errMsg)
|
2022-11-23 20:49:30 +00:00
|
|
|
}
|
|
|
|
return nil
|
|
|
|
})
|
2023-09-22 11:18:10 +00:00
|
|
|
s.T().Logf("\ttotal files: %d, ok: %d, failed: %d, other: %d\n", total, ok, failed, other)
|
2022-11-23 20:49:30 +00:00
|
|
|
}
|
2023-03-04 18:57:11 +00:00
|
|
|
|
|
|
|
// Second part of the test_blob.rb#test_language
|
|
|
|
// https://github.com/github/linguist/blob/59b2d88b2242e6062384e5fb876668cc30ead951/test/test_blob.rb#L275
|
|
|
|
func (s *linguistCorpusSuite) TestLinguistGenericFixtures() {
|
|
|
|
if regex.Name != regex.Oniguruma {
|
|
|
|
s.T().Skip("requires Ruby regexp support")
|
|
|
|
}
|
|
|
|
const filenamesDir = "filenames"
|
|
|
|
|
|
|
|
var total, failed, ok, multiple int
|
|
|
|
var expected string
|
|
|
|
filepath.Walk(filepath.Join(s.testFixturesDir, "Generic"), func(path string, f os.FileInfo, err error) error {
|
|
|
|
if f.IsDir() {
|
|
|
|
if f.Name() != filenamesDir {
|
|
|
|
expected, _ = data.LanguageByAlias(f.Name())
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
filename := filepath.Base(path)
|
|
|
|
content, _ := ioutil.ReadFile(path)
|
|
|
|
|
|
|
|
total++
|
|
|
|
result := GetLanguagesByContent(filename, content, nil)
|
|
|
|
|
|
|
|
var obtained, status string
|
|
|
|
switch len(result) {
|
|
|
|
case 0:
|
|
|
|
obtained = ""
|
|
|
|
case 1:
|
|
|
|
obtained = result[0]
|
|
|
|
default:
|
|
|
|
obtained = ""
|
|
|
|
multiple++
|
|
|
|
}
|
|
|
|
|
|
|
|
if expected == obtained {
|
|
|
|
status = "ok"
|
|
|
|
ok++
|
|
|
|
} else {
|
|
|
|
status = "failed"
|
|
|
|
failed++
|
|
|
|
}
|
|
|
|
s.Equal(expected, obtained, fmt.Sprintf("%s\texpected: %s\tobtained: %s\tstatus: %s\n", filename, expected, obtained, status))
|
|
|
|
return nil
|
|
|
|
})
|
|
|
|
s.T().Logf("\t\ttotal files: %d, ok: %d, failed: %d, multiple: %d\n", total, ok, failed, multiple)
|
|
|
|
}
|