Merge pull request #144 from go-enry/refactoring-tests

Refactoring tests
This commit is contained in:
Alex 2022-12-01 22:06:00 +01:00 committed by GitHub
commit a243a1fde8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 222 additions and 223 deletions

View File

@ -6,11 +6,8 @@ import (
"io/ioutil"
"log"
"os"
"os/exec"
"path/filepath"
"testing"
"github.com/go-enry/go-enry/v2/data"
)
type sample struct {
@ -23,22 +20,21 @@ var (
overcomeLanguage string
overcomeLanguages []string
samples []*sample
samplesDir string
cloned bool
)
func TestMain(m *testing.M) {
flag.BoolVar(&slow, "slow", false, "run benchmarks per sample for strategies too")
flag.Parse()
if err := cloneLinguist(linguistURL); err != nil {
tmpLinguistDir, cleanupNeeded, err := maybeCloneLinguist()
if err != nil {
log.Fatal(err)
}
if cloned {
defer os.RemoveAll(filepath.Dir(samplesDir))
if cleanupNeeded {
defer os.RemoveAll(tmpLinguistDir)
}
var err error
samplesDir := filepath.Join(tmpLinguistDir, "samples")
samples, err = getSamples(samplesDir)
if err != nil {
log.Fatal(err)
@ -47,47 +43,6 @@ func TestMain(m *testing.M) {
os.Exit(m.Run())
}
func cloneLinguist(linguistURL string) error {
repoLinguist := os.Getenv(linguistClonedEnvVar)
cloned = repoLinguist == ""
if cloned {
var err error
repoLinguist, err = ioutil.TempDir("", "linguist-")
if err != nil {
return err
}
}
samplesDir = filepath.Join(repoLinguist, "samples")
if cloned {
cmd := exec.Command("git", "clone", linguistURL, repoLinguist)
if err := cmd.Run(); err != nil {
return err
}
}
cwd, err := os.Getwd()
if err != nil {
return err
}
if err = os.Chdir(repoLinguist); err != nil {
return err
}
cmd := exec.Command("git", "checkout", data.LinguistCommit)
if err := cmd.Run(); err != nil {
return err
}
if err = os.Chdir(cwd); err != nil {
return err
}
return nil
}
func getSamples(dir string) ([]*sample, error) {
samples := make([]*sample, 0, 2000)
err := filepath.Walk(dir, func(path string, info os.FileInfo, err error) error {

View File

@ -38,7 +38,7 @@ var defaultClassifier classifier = &naiveBayes{
}
// GetLanguage applies a sequence of strategies based on the given filename and content
// to find out the most probably language to return.
// to find out the most probable language to return.
func GetLanguage(filename string, content []byte) (language string) {
languages := GetLanguages(filename, content)
return firstLanguage(languages)
@ -508,28 +508,6 @@ func GetLanguageExtensions(language string) []string {
return data.ExtensionsByLanguage[language]
}
// GetLanguageID returns the ID for the language. IDs are assigned by GitHub.
// The input must be the canonical language name. Aliases are not supported.
//
// NOTE: The zero value (0) is a valid language ID, so this API mimics the Go
// map API. Use the second return value to check if the language was found.
func GetLanguageID(language string) (int, bool) {
id, ok := data.IDByLanguage[language]
return id, ok
}
// Type represent language's type. Either data, programming, markup, prose, or unknown.
type Type int
// Type's values.
const (
Unknown Type = Type(data.TypeUnknown)
Data = Type(data.TypeData)
Programming = Type(data.TypeProgramming)
Markup = Type(data.TypeMarkup)
Prose = Type(data.TypeProse)
)
// GetLanguageType returns the type of the given language.
func GetLanguageType(language string) (langType Type) {
intType, ok := data.LanguagesType[language]
@ -540,6 +518,15 @@ func GetLanguageType(language string) (langType Type) {
return langType
}
// GetLanguageGroup returns language group or empty string if language does not have group.
func GetLanguageGroup(language string) string {
if group, ok := data.LanguagesGroup[language]; ok {
return group
}
return ""
}
// GetLanguageByAlias returns either the language related to the given alias and ok set to true
// or Otherlanguage and ok set to false if the alias is not recognized.
func GetLanguageByAlias(alias string) (lang string, ok bool) {
@ -551,13 +538,14 @@ func GetLanguageByAlias(alias string) (lang string, ok bool) {
return
}
// GetLanguageGroup returns language group or empty string if language does not have group.
func GetLanguageGroup(language string) string {
if group, ok := data.LanguagesGroup[language]; ok {
return group
}
return ""
// GetLanguageID returns the ID for the language. IDs are assigned by GitHub.
// The input must be the canonical language name. Aliases are not supported.
//
// NOTE: The zero value (0) is a valid language ID, so this API mimics the Go
// map API. Use the second return value to check if the language was found.
func GetLanguageID(language string) (int, bool) {
id, ok := data.IDByLanguage[language]
return id, ok
}
// GetLanguageInfo returns the LanguageInfo for a given language name, or an error if not found.

View File

@ -19,15 +19,78 @@ import (
const linguistURL = "https://github.com/github/linguist.git"
const linguistClonedEnvVar = "ENRY_TEST_REPO"
type EnryTestSuite struct {
// not a part of the test Suite as benchmark does not use testify
func maybeCloneLinguist() (string, bool, error) {
var err error
linguistTmpDir := os.Getenv(linguistClonedEnvVar)
isCleanupNeeded := false
isLinguistCloned := linguistTmpDir != ""
if !isLinguistCloned {
linguistTmpDir, err = ioutil.TempDir("", "linguist-")
if err != nil {
return "", false, err
}
isCleanupNeeded = true
cmd := exec.Command("git", "clone", "--depth", "100", linguistURL, linguistTmpDir)
if err := cmd.Run(); err != nil {
return linguistTmpDir, isCleanupNeeded, err
}
}
cwd, err := os.Getwd()
if err != nil {
return linguistTmpDir, isCleanupNeeded, err
}
if err = os.Chdir(linguistTmpDir); err != nil {
return linguistTmpDir, isCleanupNeeded, err
}
cmd := exec.Command("git", "checkout", data.LinguistCommit)
if err := cmd.Run(); err != nil {
return linguistTmpDir, isCleanupNeeded, err
}
if err = os.Chdir(cwd); err != nil {
return linguistTmpDir, isCleanupNeeded, err
}
return linguistTmpDir, isCleanupNeeded, nil
}
type enryBaseTestSuite struct {
suite.Suite
tmpLinguist string
needToClone bool
tmpLinguistDir string
isCleanupNeeded bool
samplesDir string
testFixturesDir string
}
func (s *EnryTestSuite) TestRegexpEdgeCases() {
func (s *enryBaseTestSuite) SetupSuite() {
var err error
s.tmpLinguistDir, s.isCleanupNeeded, err = maybeCloneLinguist()
require.NoError(s.T(), err)
s.samplesDir = filepath.Join(s.tmpLinguistDir, "samples")
s.testFixturesDir = filepath.Join(s.tmpLinguistDir, "test", "fixtures")
}
func (s *enryBaseTestSuite) TearDownSuite() {
if s.isCleanupNeeded {
err := os.RemoveAll(s.tmpLinguistDir)
require.NoError(s.T(), err)
}
}
type enryTestSuite struct {
enryBaseTestSuite
}
func Test_EnryTestSuite(t *testing.T) {
suite.Run(t, new(enryTestSuite))
}
func (s *enryTestSuite) TestRegexpEdgeCases() {
var regexpEdgeCases = []struct {
lang string
filename string
@ -41,7 +104,7 @@ func (s *EnryTestSuite) TestRegexpEdgeCases() {
}
for _, r := range regexpEdgeCases {
filename := filepath.Join(s.tmpLinguist, "samples", r.lang, r.filename)
filename := filepath.Join(s.tmpLinguistDir, "samples", r.lang, r.filename)
content, err := ioutil.ReadFile(filename)
require.NoError(s.T(), err)
@ -54,51 +117,7 @@ func (s *EnryTestSuite) TestRegexpEdgeCases() {
}
}
func Test_EnryTestSuite(t *testing.T) {
suite.Run(t, new(EnryTestSuite))
}
func (s *EnryTestSuite) SetupSuite() {
var err error
s.tmpLinguist = os.Getenv(linguistClonedEnvVar)
s.needToClone = s.tmpLinguist == ""
if s.needToClone {
s.tmpLinguist, err = ioutil.TempDir("", "linguist-")
require.NoError(s.T(), err)
s.T().Logf("Cloning Linguist repo to '%s' as %s was not set\n",
s.tmpLinguist, linguistClonedEnvVar)
cmd := exec.Command("git", "clone", linguistURL, s.tmpLinguist)
err = cmd.Run()
require.NoError(s.T(), err)
}
s.samplesDir = filepath.Join(s.tmpLinguist, "samples")
s.T().Logf("using samples from %s", s.samplesDir)
s.testFixturesDir = filepath.Join(s.tmpLinguist, "test", "fixtures")
s.T().Logf("using test fixtures from %s", s.samplesDir)
cwd, err := os.Getwd()
assert.NoError(s.T(), err)
err = os.Chdir(s.tmpLinguist)
assert.NoError(s.T(), err)
cmd := exec.Command("git", "checkout", data.LinguistCommit)
err = cmd.Run()
assert.NoError(s.T(), err)
err = os.Chdir(cwd)
assert.NoError(s.T(), err)
}
func (s *EnryTestSuite) TearDownSuite() {
if s.needToClone {
err := os.RemoveAll(s.tmpLinguist)
assert.NoError(s.T(), err)
}
}
func (s *EnryTestSuite) TestGetLanguage() {
func (s *enryTestSuite) TestGetLanguage() {
tests := []struct {
name string
filename string
@ -120,7 +139,7 @@ func (s *EnryTestSuite) TestGetLanguage() {
}
}
func (s *EnryTestSuite) TestGetLanguages() {
func (s *enryTestSuite) TestGetLanguages() {
tests := []struct {
name string
filename string
@ -152,8 +171,8 @@ func (s *EnryTestSuite) TestGetLanguages() {
}
}
func (s *EnryTestSuite) TestGetLanguagesByModelineLinguist() {
var modelinesDir = filepath.Join(s.tmpLinguist, "test", "fixtures", "Data", "Modelines")
func (s *enryTestSuite) TestGetLanguagesByModelineLinguist() {
var modelinesDir = filepath.Join(s.tmpLinguistDir, "test", "fixtures", "Data", "Modelines")
tests := []struct {
name string
@ -212,7 +231,7 @@ func (s *EnryTestSuite) TestGetLanguagesByModelineLinguist() {
}
}
func (s *EnryTestSuite) TestGetLanguagesByModeline() {
func (s *enryTestSuite) TestGetLanguagesByModeline() {
const (
wrongVim = `# vim: set syntax=ruby ft =python filetype=perl :`
rightVim = `/* vim: set syntax=python ft =python filetype=python */`
@ -239,7 +258,7 @@ func (s *EnryTestSuite) TestGetLanguagesByModeline() {
}
}
func (s *EnryTestSuite) TestGetLanguagesByFilename() {
func (s *enryTestSuite) TestGetLanguagesByFilename() {
tests := []struct {
name string
filename string
@ -267,7 +286,7 @@ func (s *EnryTestSuite) TestGetLanguagesByFilename() {
}
}
func (s *EnryTestSuite) TestGetLanguagesByShebang() {
func (s *enryTestSuite) TestGetLanguagesByShebang() {
const (
multilineExecHack = `#!/bin/sh
# Next line is comment in Tcl, but not in sh... \
@ -352,7 +371,7 @@ println("The shell script says ",vm.arglist.concat(" "));`
}
}
func (s *EnryTestSuite) TestGetLanguagesByExtension() {
func (s *enryTestSuite) TestGetLanguagesByExtension() {
tests := []struct {
name string
filename string
@ -373,7 +392,7 @@ func (s *EnryTestSuite) TestGetLanguagesByExtension() {
}
}
func (s *EnryTestSuite) TestGetLanguagesByManpage() {
func (s *enryTestSuite) TestGetLanguagesByManpage() {
tests := []struct {
name string
filename string
@ -397,7 +416,7 @@ func (s *EnryTestSuite) TestGetLanguagesByManpage() {
}
}
func (s *EnryTestSuite) TestGetLanguagesByXML() {
func (s *enryTestSuite) TestGetLanguagesByXML() {
tests := []struct {
name string
filename string
@ -420,7 +439,7 @@ func (s *EnryTestSuite) TestGetLanguagesByXML() {
}
}
func (s *EnryTestSuite) TestGetLanguagesByClassifier() {
func (s *enryTestSuite) TestGetLanguagesByClassifier() {
test := []struct {
name string
filename string
@ -457,7 +476,7 @@ func (s *EnryTestSuite) TestGetLanguagesByClassifier() {
}
}
func (s *EnryTestSuite) TestGetLanguagesBySpecificClassifier() {
func (s *enryTestSuite) TestGetLanguagesBySpecificClassifier() {
test := []struct {
name string
filename string
@ -490,7 +509,7 @@ func (s *EnryTestSuite) TestGetLanguagesBySpecificClassifier() {
}
}
func (s *EnryTestSuite) TestGetLanguageExtensions() {
func (s *enryTestSuite) TestGetLanguageExtensions() {
tests := []struct {
name string
language string
@ -507,7 +526,7 @@ func (s *EnryTestSuite) TestGetLanguageExtensions() {
}
}
func (s *EnryTestSuite) TestGetLanguageType() {
func (s *enryTestSuite) TestGetLanguageType() {
tests := []struct {
name string
language string
@ -530,7 +549,7 @@ func (s *EnryTestSuite) TestGetLanguageType() {
}
}
func (s *EnryTestSuite) TestGetLanguageGroup() {
func (s *enryTestSuite) TestGetLanguageGroup() {
tests := []struct {
name string
language string
@ -548,7 +567,7 @@ func (s *EnryTestSuite) TestGetLanguageGroup() {
}
}
func (s *EnryTestSuite) TestGetLanguageByAlias() {
func (s *enryTestSuite) TestGetLanguageByAlias() {
tests := []struct {
name string
alias string
@ -574,57 +593,7 @@ func (s *EnryTestSuite) TestGetLanguageByAlias() {
}
}
func (s *EnryTestSuite) TestLinguistCorpus() {
const filenamesDir = "filenames"
var cornerCases = map[string]bool{
"drop_stuff.sql": true, // https://github.com/src-d/enry/issues/194
"textobj-rubyblock.vba": true, // Because of unsupported negative lookahead RE syntax (https://github.com/github/linguist/blob/8083cb5a89cee2d99f5a988f165994d0243f0d1e/lib/linguist/heuristics.yml#L521)
// .es and .ice fail heuristics parsing, but do not fail any tests
}
var total, failed, ok, other int
var expected string
filepath.Walk(s.samplesDir, func(path string, f os.FileInfo, err error) error {
if f.IsDir() {
if f.Name() != filenamesDir {
expected, _ = data.LanguageByAlias(f.Name())
}
return nil
}
filename := filepath.Base(path)
content, _ := ioutil.ReadFile(path)
total++
obtained := GetLanguage(filename, content)
if obtained == OtherLanguage {
obtained = "Other"
other++
}
var status string
if expected == obtained {
status = "ok"
ok++
} else {
status = "failed"
failed++
}
if _, ok := cornerCases[filename]; ok {
s.T().Logf("\t\t[considered corner case] %s\texpected: %s\tobtained: %s\tstatus: %s\n", filename, expected, obtained, status)
} else {
assert.Equal(s.T(), expected, obtained, fmt.Sprintf("%s\texpected: %s\tobtained: %s\tstatus: %s\n", filename, expected, obtained, status))
}
return nil
})
s.T().Logf("\t\ttotal files: %d, ok: %d, failed: %d, other: %d\n", total, ok, failed, other)
}
func (s *EnryTestSuite) TestGetLanguageID() {
func (s *enryTestSuite) TestGetLanguageID() {
tests := []struct {
name string
language string
@ -647,7 +616,7 @@ func (s *EnryTestSuite) TestGetLanguageID() {
}
}
func (s *EnryTestSuite) TestGetLanguageInfo() {
func (s *enryTestSuite) TestGetLanguageInfo() {
tests := []struct {
name string
language string
@ -674,7 +643,7 @@ func (s *EnryTestSuite) TestGetLanguageInfo() {
}
}
func (s *EnryTestSuite) TestGetLanguageInfoByID() {
func (s *enryTestSuite) TestGetLanguageInfoByID() {
tests := []struct {
name string
id int

14
enry.go
View File

@ -14,3 +14,17 @@
package enry // import "github.com/go-enry/go-enry/v2"
//go:generate make code-generate
import "github.com/go-enry/go-enry/v2/data"
// Type represent language's type. Either data, programming, markup, prose, or unknown.
type Type int
// Type's values.
const (
Unknown Type = Type(data.TypeUnknown)
Data = Type(data.TypeData)
Programming = Type(data.TypeProgramming)
Markup = Type(data.TypeMarkup)
Prose = Type(data.TypeProse)
)

View File

@ -97,9 +97,9 @@ var (
type GeneratorTestSuite struct {
suite.Suite
tmpLinguistDir string
isLinguistCloned bool
testCases []testCase
tmpLinguistDir string
isCleanupNeeded bool
testCases []testCase
}
type testCase struct {
@ -122,27 +122,31 @@ func Test_GeneratorTestSuite(t *testing.T) {
func (s *GeneratorTestSuite) maybeCloneLinguist() {
var err error
s.tmpLinguistDir = os.Getenv(linguistClonedEnvVar)
s.isLinguistCloned = s.tmpLinguistDir != ""
if !s.isLinguistCloned {
isLinguistCloned := s.tmpLinguistDir != ""
if !isLinguistCloned {
s.tmpLinguistDir, err = ioutil.TempDir("", "linguist-")
assert.NoError(s.T(), err)
cmd := exec.Command("git", "clone", linguistURL, s.tmpLinguistDir)
require.NoError(s.T(), err)
s.T().Logf("Cloning Linguist repo to '%s' as %s was not set\n",
s.tmpLinguistDir, linguistClonedEnvVar)
cmd := exec.Command("git", "clone", "--depth", "100", linguistURL, s.tmpLinguistDir)
err = cmd.Run()
assert.NoError(s.T(), err)
cwd, err := os.Getwd()
assert.NoError(s.T(), err)
err = os.Chdir(s.tmpLinguistDir)
assert.NoError(s.T(), err)
cmd = exec.Command("git", "checkout", commit)
err = cmd.Run()
assert.NoError(s.T(), err)
err = os.Chdir(cwd)
assert.NoError(s.T(), err)
require.NoError(s.T(), err)
s.isCleanupNeeded = true
}
cwd, err := os.Getwd()
require.NoError(s.T(), err)
err = os.Chdir(s.tmpLinguistDir)
require.NoError(s.T(), err)
cmd := exec.Command("git", "checkout", commit)
err = cmd.Run()
require.NoError(s.T(), err)
err = os.Chdir(cwd)
require.NoError(s.T(), err)
}
func (s *GeneratorTestSuite) SetupSuite() {
@ -280,11 +284,9 @@ func (s *GeneratorTestSuite) SetupSuite() {
}
func (s *GeneratorTestSuite) TearDownSuite() {
if s.isLinguistCloned {
if s.isCleanupNeeded {
err := os.RemoveAll(s.tmpLinguistDir)
if err != nil {
s.T().Logf("Failed to clean up %s after the test.\n", s.tmpLinguistDir)
}
assert.NoError(s.T(), err)
}
}

71
linguist_corpus_test.go Normal file
View File

@ -0,0 +1,71 @@
package enry
import (
"fmt"
"io/ioutil"
"os"
"path/filepath"
"testing"
"github.com/go-enry/go-enry/v2/data"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/suite"
)
type linguistCorpusSuite struct {
enryBaseTestSuite
}
func Test_EnryOnLinguistCorpus(t *testing.T) {
suite.Run(t, new(linguistCorpusSuite))
}
// First part of the test_blob.rb#test_language
// https://github.com/github/linguist/blob/59b2d88b2242e6062384e5fb876668cc30ead951/test/test_blob.rb#L258
func (s *linguistCorpusSuite) TestLinguistSamples() {
const filenamesDir = "filenames"
var cornerCases = map[string]bool{
"drop_stuff.sql": true, // https://github.com/src-d/enry/issues/194
"textobj-rubyblock.vba": true, // Because of unsupported negative lookahead RE syntax (https://github.com/github/linguist/blob/8083cb5a89cee2d99f5a988f165994d0243f0d1e/lib/linguist/heuristics.yml#L521)
// .es and .ice fail heuristics parsing, but do not fail any tests
}
var total, failed, ok, other int
var expected string
filepath.Walk(s.samplesDir, func(path string, f os.FileInfo, err error) error {
if f.IsDir() {
if f.Name() != filenamesDir {
expected, _ = data.LanguageByAlias(f.Name())
}
return nil
}
filename := filepath.Base(path)
content, _ := ioutil.ReadFile(path)
total++
obtained := GetLanguage(filename, content)
if obtained == OtherLanguage {
obtained = "Other"
other++
}
var status string
if expected == obtained {
status = "ok"
ok++
} else {
status = "failed"
failed++
}
if _, ok := cornerCases[filename]; ok {
s.T().Logf("\t\t[considered corner case] %s\texpected: %s\tobtained: %s\tstatus: %s\n", filename, expected, obtained, status)
} else {
assert.Equal(s.T(), expected, obtained, fmt.Sprintf("%s\texpected: %s\tobtained: %s\tstatus: %s\n", filename, expected, obtained, status))
}
return nil
})
s.T().Logf("\t\ttotal files: %d, ok: %d, failed: %d, other: %d\n", total, ok, failed, other)
}