refactoring: add separate test suite for linguist samples/fixtures

This commit is contained in:
Alex Bezzubov 2022-11-23 21:49:30 +01:00
parent bb7a81ede4
commit a93364ec79
2 changed files with 116 additions and 90 deletions

View File

@ -19,6 +19,7 @@ import (
const linguistURL = "https://github.com/github/linguist.git"
const linguistClonedEnvVar = "ENRY_TEST_REPO"
// not a part of the test Suite as benchmark does not use testify
func maybeCloneLinguist() (string, bool, error) {
var err error
linguistTmpDir := os.Getenv(linguistClonedEnvVar)
@ -57,7 +58,7 @@ func maybeCloneLinguist() (string, bool, error) {
return linguistTmpDir, isCleanupNeeded, nil
}
type EnryTestSuite struct {
type enryBaseTestSuite struct {
suite.Suite
tmpLinguistDir string
isCleanupNeeded bool
@ -65,7 +66,31 @@ type EnryTestSuite struct {
testFixturesDir string
}
func (s *EnryTestSuite) TestRegexpEdgeCases() {
func (s *enryBaseTestSuite) SetupSuite() {
var err error
s.tmpLinguistDir, s.isCleanupNeeded, err = maybeCloneLinguist()
require.NoError(s.T(), err)
s.samplesDir = filepath.Join(s.tmpLinguistDir, "samples")
s.testFixturesDir = filepath.Join(s.tmpLinguistDir, "test", "fixtures")
}
func (s *enryBaseTestSuite) TearDownSuite() {
if s.isCleanupNeeded {
err := os.RemoveAll(s.tmpLinguistDir)
assert.NoError(s.T(), err)
}
}
type enryTestSuite struct {
enryBaseTestSuite
}
func Test_EnryTestSuite(t *testing.T) {
suite.Run(t, new(enryTestSuite))
}
func (s *enryTestSuite) TestRegexpEdgeCases() {
var regexpEdgeCases = []struct {
lang string
filename string
@ -92,27 +117,7 @@ func (s *EnryTestSuite) TestRegexpEdgeCases() {
}
}
func Test_EnryTestSuite(t *testing.T) {
suite.Run(t, new(EnryTestSuite))
}
func (s *EnryTestSuite) SetupSuite() {
var err error
s.tmpLinguistDir, s.isCleanupNeeded, err = maybeCloneLinguist()
assert.NoError(s.T(), err)
s.samplesDir = filepath.Join(s.tmpLinguistDir, "samples")
s.testFixturesDir = filepath.Join(s.tmpLinguistDir, "test", "fixtures")
}
func (s *EnryTestSuite) TearDownSuite() {
if s.isCleanupNeeded {
err := os.RemoveAll(s.tmpLinguistDir)
assert.NoError(s.T(), err)
}
}
func (s *EnryTestSuite) TestGetLanguage() {
func (s *enryTestSuite) TestGetLanguage() {
tests := []struct {
name string
filename string
@ -134,7 +139,7 @@ func (s *EnryTestSuite) TestGetLanguage() {
}
}
func (s *EnryTestSuite) TestGetLanguages() {
func (s *enryTestSuite) TestGetLanguages() {
tests := []struct {
name string
filename string
@ -166,7 +171,7 @@ func (s *EnryTestSuite) TestGetLanguages() {
}
}
func (s *EnryTestSuite) TestGetLanguagesByModelineLinguist() {
func (s *enryTestSuite) TestGetLanguagesByModelineLinguist() {
var modelinesDir = filepath.Join(s.tmpLinguistDir, "test", "fixtures", "Data", "Modelines")
tests := []struct {
@ -226,7 +231,7 @@ func (s *EnryTestSuite) TestGetLanguagesByModelineLinguist() {
}
}
func (s *EnryTestSuite) TestGetLanguagesByModeline() {
func (s *enryTestSuite) TestGetLanguagesByModeline() {
const (
wrongVim = `# vim: set syntax=ruby ft =python filetype=perl :`
rightVim = `/* vim: set syntax=python ft =python filetype=python */`
@ -253,7 +258,7 @@ func (s *EnryTestSuite) TestGetLanguagesByModeline() {
}
}
func (s *EnryTestSuite) TestGetLanguagesByFilename() {
func (s *enryTestSuite) TestGetLanguagesByFilename() {
tests := []struct {
name string
filename string
@ -281,7 +286,7 @@ func (s *EnryTestSuite) TestGetLanguagesByFilename() {
}
}
func (s *EnryTestSuite) TestGetLanguagesByShebang() {
func (s *enryTestSuite) TestGetLanguagesByShebang() {
const (
multilineExecHack = `#!/bin/sh
# Next line is comment in Tcl, but not in sh... \
@ -366,7 +371,7 @@ println("The shell script says ",vm.arglist.concat(" "));`
}
}
func (s *EnryTestSuite) TestGetLanguagesByExtension() {
func (s *enryTestSuite) TestGetLanguagesByExtension() {
tests := []struct {
name string
filename string
@ -387,7 +392,7 @@ func (s *EnryTestSuite) TestGetLanguagesByExtension() {
}
}
func (s *EnryTestSuite) TestGetLanguagesByManpage() {
func (s *enryTestSuite) TestGetLanguagesByManpage() {
tests := []struct {
name string
filename string
@ -411,7 +416,7 @@ func (s *EnryTestSuite) TestGetLanguagesByManpage() {
}
}
func (s *EnryTestSuite) TestGetLanguagesByXML() {
func (s *enryTestSuite) TestGetLanguagesByXML() {
tests := []struct {
name string
filename string
@ -434,7 +439,7 @@ func (s *EnryTestSuite) TestGetLanguagesByXML() {
}
}
func (s *EnryTestSuite) TestGetLanguagesByClassifier() {
func (s *enryTestSuite) TestGetLanguagesByClassifier() {
test := []struct {
name string
filename string
@ -471,7 +476,7 @@ func (s *EnryTestSuite) TestGetLanguagesByClassifier() {
}
}
func (s *EnryTestSuite) TestGetLanguagesBySpecificClassifier() {
func (s *enryTestSuite) TestGetLanguagesBySpecificClassifier() {
test := []struct {
name string
filename string
@ -504,7 +509,7 @@ func (s *EnryTestSuite) TestGetLanguagesBySpecificClassifier() {
}
}
func (s *EnryTestSuite) TestGetLanguageExtensions() {
func (s *enryTestSuite) TestGetLanguageExtensions() {
tests := []struct {
name string
language string
@ -521,7 +526,7 @@ func (s *EnryTestSuite) TestGetLanguageExtensions() {
}
}
func (s *EnryTestSuite) TestGetLanguageType() {
func (s *enryTestSuite) TestGetLanguageType() {
tests := []struct {
name string
language string
@ -544,7 +549,7 @@ func (s *EnryTestSuite) TestGetLanguageType() {
}
}
func (s *EnryTestSuite) TestGetLanguageGroup() {
func (s *enryTestSuite) TestGetLanguageGroup() {
tests := []struct {
name string
language string
@ -562,7 +567,7 @@ func (s *EnryTestSuite) TestGetLanguageGroup() {
}
}
func (s *EnryTestSuite) TestGetLanguageByAlias() {
func (s *enryTestSuite) TestGetLanguageByAlias() {
tests := []struct {
name string
alias string
@ -588,57 +593,7 @@ func (s *EnryTestSuite) TestGetLanguageByAlias() {
}
}
func (s *EnryTestSuite) TestLinguistCorpus() {
const filenamesDir = "filenames"
var cornerCases = map[string]bool{
"drop_stuff.sql": true, // https://github.com/src-d/enry/issues/194
"textobj-rubyblock.vba": true, // Because of unsupported negative lookahead RE syntax (https://github.com/github/linguist/blob/8083cb5a89cee2d99f5a988f165994d0243f0d1e/lib/linguist/heuristics.yml#L521)
// .es and .ice fail heuristics parsing, but do not fail any tests
}
var total, failed, ok, other int
var expected string
filepath.Walk(s.samplesDir, func(path string, f os.FileInfo, err error) error {
if f.IsDir() {
if f.Name() != filenamesDir {
expected, _ = data.LanguageByAlias(f.Name())
}
return nil
}
filename := filepath.Base(path)
content, _ := ioutil.ReadFile(path)
total++
obtained := GetLanguage(filename, content)
if obtained == OtherLanguage {
obtained = "Other"
other++
}
var status string
if expected == obtained {
status = "ok"
ok++
} else {
status = "failed"
failed++
}
if _, ok := cornerCases[filename]; ok {
s.T().Logf("\t\t[considered corner case] %s\texpected: %s\tobtained: %s\tstatus: %s\n", filename, expected, obtained, status)
} else {
assert.Equal(s.T(), expected, obtained, fmt.Sprintf("%s\texpected: %s\tobtained: %s\tstatus: %s\n", filename, expected, obtained, status))
}
return nil
})
s.T().Logf("\t\ttotal files: %d, ok: %d, failed: %d, other: %d\n", total, ok, failed, other)
}
func (s *EnryTestSuite) TestGetLanguageID() {
func (s *enryTestSuite) TestGetLanguageID() {
tests := []struct {
name string
language string
@ -661,7 +616,7 @@ func (s *EnryTestSuite) TestGetLanguageID() {
}
}
func (s *EnryTestSuite) TestGetLanguageInfo() {
func (s *enryTestSuite) TestGetLanguageInfo() {
tests := []struct {
name string
language string
@ -688,7 +643,7 @@ func (s *EnryTestSuite) TestGetLanguageInfo() {
}
}
func (s *EnryTestSuite) TestGetLanguageInfoByID() {
func (s *enryTestSuite) TestGetLanguageInfoByID() {
tests := []struct {
name string
id int

71
linguist_corpus_test.go Normal file
View File

@ -0,0 +1,71 @@
package enry
import (
"fmt"
"io/ioutil"
"os"
"path/filepath"
"testing"
"github.com/go-enry/go-enry/v2/data"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/suite"
)
type linguistCorpusSuite struct {
enryBaseTestSuite
}
func Test_EnryOnLinguistCorpus(t *testing.T) {
suite.Run(t, new(linguistCorpusSuite))
}
// First part of the test_blob.rb#test_language
// https://github.com/github/linguist/blob/59b2d88b2242e6062384e5fb876668cc30ead951/test/test_blob.rb#L258
func (s *linguistCorpusSuite) TestLinguistSamples() {
const filenamesDir = "filenames"
var cornerCases = map[string]bool{
"drop_stuff.sql": true, // https://github.com/src-d/enry/issues/194
"textobj-rubyblock.vba": true, // Because of unsupported negative lookahead RE syntax (https://github.com/github/linguist/blob/8083cb5a89cee2d99f5a988f165994d0243f0d1e/lib/linguist/heuristics.yml#L521)
// .es and .ice fail heuristics parsing, but do not fail any tests
}
var total, failed, ok, other int
var expected string
filepath.Walk(s.samplesDir, func(path string, f os.FileInfo, err error) error {
if f.IsDir() {
if f.Name() != filenamesDir {
expected, _ = data.LanguageByAlias(f.Name())
}
return nil
}
filename := filepath.Base(path)
content, _ := ioutil.ReadFile(path)
total++
obtained := GetLanguage(filename, content)
if obtained == OtherLanguage {
obtained = "Other"
other++
}
var status string
if expected == obtained {
status = "ok"
ok++
} else {
status = "failed"
failed++
}
if _, ok := cornerCases[filename]; ok {
s.T().Logf("\t\t[considered corner case] %s\texpected: %s\tobtained: %s\tstatus: %s\n", filename, expected, obtained, status)
} else {
assert.Equal(s.T(), expected, obtained, fmt.Sprintf("%s\texpected: %s\tobtained: %s\tstatus: %s\n", filename, expected, obtained, status))
}
return nil
})
s.T().Logf("\t\ttotal files: %d, ok: %d, failed: %d, other: %d\n", total, ok, failed, other)
}