mirror of
https://github.com/ralsina/tartrazine.git
synced 2025-07-16 12:51:10 +00:00
Merge pull request #144 from go-enry/refactoring-tests
Refactoring tests
This commit is contained in:
@@ -6,11 +6,8 @@ import (
|
|||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
"log"
|
"log"
|
||||||
"os"
|
"os"
|
||||||
"os/exec"
|
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/go-enry/go-enry/v2/data"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type sample struct {
|
type sample struct {
|
||||||
@@ -23,22 +20,21 @@ var (
|
|||||||
overcomeLanguage string
|
overcomeLanguage string
|
||||||
overcomeLanguages []string
|
overcomeLanguages []string
|
||||||
samples []*sample
|
samples []*sample
|
||||||
samplesDir string
|
|
||||||
cloned bool
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestMain(m *testing.M) {
|
func TestMain(m *testing.M) {
|
||||||
flag.BoolVar(&slow, "slow", false, "run benchmarks per sample for strategies too")
|
flag.BoolVar(&slow, "slow", false, "run benchmarks per sample for strategies too")
|
||||||
flag.Parse()
|
flag.Parse()
|
||||||
|
|
||||||
if err := cloneLinguist(linguistURL); err != nil {
|
tmpLinguistDir, cleanupNeeded, err := maybeCloneLinguist()
|
||||||
|
if err != nil {
|
||||||
log.Fatal(err)
|
log.Fatal(err)
|
||||||
}
|
}
|
||||||
if cloned {
|
if cleanupNeeded {
|
||||||
defer os.RemoveAll(filepath.Dir(samplesDir))
|
defer os.RemoveAll(tmpLinguistDir)
|
||||||
}
|
}
|
||||||
|
|
||||||
var err error
|
samplesDir := filepath.Join(tmpLinguistDir, "samples")
|
||||||
samples, err = getSamples(samplesDir)
|
samples, err = getSamples(samplesDir)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatal(err)
|
log.Fatal(err)
|
||||||
@@ -47,47 +43,6 @@ func TestMain(m *testing.M) {
|
|||||||
os.Exit(m.Run())
|
os.Exit(m.Run())
|
||||||
}
|
}
|
||||||
|
|
||||||
func cloneLinguist(linguistURL string) error {
|
|
||||||
repoLinguist := os.Getenv(linguistClonedEnvVar)
|
|
||||||
cloned = repoLinguist == ""
|
|
||||||
if cloned {
|
|
||||||
var err error
|
|
||||||
repoLinguist, err = ioutil.TempDir("", "linguist-")
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
samplesDir = filepath.Join(repoLinguist, "samples")
|
|
||||||
|
|
||||||
if cloned {
|
|
||||||
cmd := exec.Command("git", "clone", linguistURL, repoLinguist)
|
|
||||||
if err := cmd.Run(); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
cwd, err := os.Getwd()
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
if err = os.Chdir(repoLinguist); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
cmd := exec.Command("git", "checkout", data.LinguistCommit)
|
|
||||||
if err := cmd.Run(); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
if err = os.Chdir(cwd); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func getSamples(dir string) ([]*sample, error) {
|
func getSamples(dir string) ([]*sample, error) {
|
||||||
samples := make([]*sample, 0, 2000)
|
samples := make([]*sample, 0, 2000)
|
||||||
err := filepath.Walk(dir, func(path string, info os.FileInfo, err error) error {
|
err := filepath.Walk(dir, func(path string, info os.FileInfo, err error) error {
|
||||||
|
48
common.go
48
common.go
@@ -38,7 +38,7 @@ var defaultClassifier classifier = &naiveBayes{
|
|||||||
}
|
}
|
||||||
|
|
||||||
// GetLanguage applies a sequence of strategies based on the given filename and content
|
// GetLanguage applies a sequence of strategies based on the given filename and content
|
||||||
// to find out the most probably language to return.
|
// to find out the most probable language to return.
|
||||||
func GetLanguage(filename string, content []byte) (language string) {
|
func GetLanguage(filename string, content []byte) (language string) {
|
||||||
languages := GetLanguages(filename, content)
|
languages := GetLanguages(filename, content)
|
||||||
return firstLanguage(languages)
|
return firstLanguage(languages)
|
||||||
@@ -508,28 +508,6 @@ func GetLanguageExtensions(language string) []string {
|
|||||||
return data.ExtensionsByLanguage[language]
|
return data.ExtensionsByLanguage[language]
|
||||||
}
|
}
|
||||||
|
|
||||||
// GetLanguageID returns the ID for the language. IDs are assigned by GitHub.
|
|
||||||
// The input must be the canonical language name. Aliases are not supported.
|
|
||||||
//
|
|
||||||
// NOTE: The zero value (0) is a valid language ID, so this API mimics the Go
|
|
||||||
// map API. Use the second return value to check if the language was found.
|
|
||||||
func GetLanguageID(language string) (int, bool) {
|
|
||||||
id, ok := data.IDByLanguage[language]
|
|
||||||
return id, ok
|
|
||||||
}
|
|
||||||
|
|
||||||
// Type represent language's type. Either data, programming, markup, prose, or unknown.
|
|
||||||
type Type int
|
|
||||||
|
|
||||||
// Type's values.
|
|
||||||
const (
|
|
||||||
Unknown Type = Type(data.TypeUnknown)
|
|
||||||
Data = Type(data.TypeData)
|
|
||||||
Programming = Type(data.TypeProgramming)
|
|
||||||
Markup = Type(data.TypeMarkup)
|
|
||||||
Prose = Type(data.TypeProse)
|
|
||||||
)
|
|
||||||
|
|
||||||
// GetLanguageType returns the type of the given language.
|
// GetLanguageType returns the type of the given language.
|
||||||
func GetLanguageType(language string) (langType Type) {
|
func GetLanguageType(language string) (langType Type) {
|
||||||
intType, ok := data.LanguagesType[language]
|
intType, ok := data.LanguagesType[language]
|
||||||
@@ -540,6 +518,15 @@ func GetLanguageType(language string) (langType Type) {
|
|||||||
return langType
|
return langType
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// GetLanguageGroup returns language group or empty string if language does not have group.
|
||||||
|
func GetLanguageGroup(language string) string {
|
||||||
|
if group, ok := data.LanguagesGroup[language]; ok {
|
||||||
|
return group
|
||||||
|
}
|
||||||
|
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
// GetLanguageByAlias returns either the language related to the given alias and ok set to true
|
// GetLanguageByAlias returns either the language related to the given alias and ok set to true
|
||||||
// or Otherlanguage and ok set to false if the alias is not recognized.
|
// or Otherlanguage and ok set to false if the alias is not recognized.
|
||||||
func GetLanguageByAlias(alias string) (lang string, ok bool) {
|
func GetLanguageByAlias(alias string) (lang string, ok bool) {
|
||||||
@@ -551,13 +538,14 @@ func GetLanguageByAlias(alias string) (lang string, ok bool) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// GetLanguageGroup returns language group or empty string if language does not have group.
|
// GetLanguageID returns the ID for the language. IDs are assigned by GitHub.
|
||||||
func GetLanguageGroup(language string) string {
|
// The input must be the canonical language name. Aliases are not supported.
|
||||||
if group, ok := data.LanguagesGroup[language]; ok {
|
//
|
||||||
return group
|
// NOTE: The zero value (0) is a valid language ID, so this API mimics the Go
|
||||||
}
|
// map API. Use the second return value to check if the language was found.
|
||||||
|
func GetLanguageID(language string) (int, bool) {
|
||||||
return ""
|
id, ok := data.IDByLanguage[language]
|
||||||
|
return id, ok
|
||||||
}
|
}
|
||||||
|
|
||||||
// GetLanguageInfo returns the LanguageInfo for a given language name, or an error if not found.
|
// GetLanguageInfo returns the LanguageInfo for a given language name, or an error if not found.
|
||||||
|
205
common_test.go
205
common_test.go
@@ -19,15 +19,78 @@ import (
|
|||||||
const linguistURL = "https://github.com/github/linguist.git"
|
const linguistURL = "https://github.com/github/linguist.git"
|
||||||
const linguistClonedEnvVar = "ENRY_TEST_REPO"
|
const linguistClonedEnvVar = "ENRY_TEST_REPO"
|
||||||
|
|
||||||
type EnryTestSuite struct {
|
// not a part of the test Suite as benchmark does not use testify
|
||||||
|
func maybeCloneLinguist() (string, bool, error) {
|
||||||
|
var err error
|
||||||
|
linguistTmpDir := os.Getenv(linguistClonedEnvVar)
|
||||||
|
isCleanupNeeded := false
|
||||||
|
isLinguistCloned := linguistTmpDir != ""
|
||||||
|
if !isLinguistCloned {
|
||||||
|
linguistTmpDir, err = ioutil.TempDir("", "linguist-")
|
||||||
|
if err != nil {
|
||||||
|
return "", false, err
|
||||||
|
}
|
||||||
|
|
||||||
|
isCleanupNeeded = true
|
||||||
|
cmd := exec.Command("git", "clone", "--depth", "100", linguistURL, linguistTmpDir)
|
||||||
|
if err := cmd.Run(); err != nil {
|
||||||
|
return linguistTmpDir, isCleanupNeeded, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cwd, err := os.Getwd()
|
||||||
|
if err != nil {
|
||||||
|
return linguistTmpDir, isCleanupNeeded, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err = os.Chdir(linguistTmpDir); err != nil {
|
||||||
|
return linguistTmpDir, isCleanupNeeded, err
|
||||||
|
}
|
||||||
|
|
||||||
|
cmd := exec.Command("git", "checkout", data.LinguistCommit)
|
||||||
|
if err := cmd.Run(); err != nil {
|
||||||
|
return linguistTmpDir, isCleanupNeeded, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err = os.Chdir(cwd); err != nil {
|
||||||
|
return linguistTmpDir, isCleanupNeeded, err
|
||||||
|
}
|
||||||
|
return linguistTmpDir, isCleanupNeeded, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
type enryBaseTestSuite struct {
|
||||||
suite.Suite
|
suite.Suite
|
||||||
tmpLinguist string
|
tmpLinguistDir string
|
||||||
needToClone bool
|
isCleanupNeeded bool
|
||||||
samplesDir string
|
samplesDir string
|
||||||
testFixturesDir string
|
testFixturesDir string
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *EnryTestSuite) TestRegexpEdgeCases() {
|
func (s *enryBaseTestSuite) SetupSuite() {
|
||||||
|
var err error
|
||||||
|
s.tmpLinguistDir, s.isCleanupNeeded, err = maybeCloneLinguist()
|
||||||
|
require.NoError(s.T(), err)
|
||||||
|
|
||||||
|
s.samplesDir = filepath.Join(s.tmpLinguistDir, "samples")
|
||||||
|
s.testFixturesDir = filepath.Join(s.tmpLinguistDir, "test", "fixtures")
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *enryBaseTestSuite) TearDownSuite() {
|
||||||
|
if s.isCleanupNeeded {
|
||||||
|
err := os.RemoveAll(s.tmpLinguistDir)
|
||||||
|
require.NoError(s.T(), err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type enryTestSuite struct {
|
||||||
|
enryBaseTestSuite
|
||||||
|
}
|
||||||
|
|
||||||
|
func Test_EnryTestSuite(t *testing.T) {
|
||||||
|
suite.Run(t, new(enryTestSuite))
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *enryTestSuite) TestRegexpEdgeCases() {
|
||||||
var regexpEdgeCases = []struct {
|
var regexpEdgeCases = []struct {
|
||||||
lang string
|
lang string
|
||||||
filename string
|
filename string
|
||||||
@@ -41,7 +104,7 @@ func (s *EnryTestSuite) TestRegexpEdgeCases() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for _, r := range regexpEdgeCases {
|
for _, r := range regexpEdgeCases {
|
||||||
filename := filepath.Join(s.tmpLinguist, "samples", r.lang, r.filename)
|
filename := filepath.Join(s.tmpLinguistDir, "samples", r.lang, r.filename)
|
||||||
|
|
||||||
content, err := ioutil.ReadFile(filename)
|
content, err := ioutil.ReadFile(filename)
|
||||||
require.NoError(s.T(), err)
|
require.NoError(s.T(), err)
|
||||||
@@ -54,51 +117,7 @@ func (s *EnryTestSuite) TestRegexpEdgeCases() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func Test_EnryTestSuite(t *testing.T) {
|
func (s *enryTestSuite) TestGetLanguage() {
|
||||||
suite.Run(t, new(EnryTestSuite))
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *EnryTestSuite) SetupSuite() {
|
|
||||||
var err error
|
|
||||||
s.tmpLinguist = os.Getenv(linguistClonedEnvVar)
|
|
||||||
s.needToClone = s.tmpLinguist == ""
|
|
||||||
if s.needToClone {
|
|
||||||
s.tmpLinguist, err = ioutil.TempDir("", "linguist-")
|
|
||||||
require.NoError(s.T(), err)
|
|
||||||
s.T().Logf("Cloning Linguist repo to '%s' as %s was not set\n",
|
|
||||||
s.tmpLinguist, linguistClonedEnvVar)
|
|
||||||
cmd := exec.Command("git", "clone", linguistURL, s.tmpLinguist)
|
|
||||||
err = cmd.Run()
|
|
||||||
require.NoError(s.T(), err)
|
|
||||||
}
|
|
||||||
s.samplesDir = filepath.Join(s.tmpLinguist, "samples")
|
|
||||||
s.T().Logf("using samples from %s", s.samplesDir)
|
|
||||||
|
|
||||||
s.testFixturesDir = filepath.Join(s.tmpLinguist, "test", "fixtures")
|
|
||||||
s.T().Logf("using test fixtures from %s", s.samplesDir)
|
|
||||||
|
|
||||||
cwd, err := os.Getwd()
|
|
||||||
assert.NoError(s.T(), err)
|
|
||||||
|
|
||||||
err = os.Chdir(s.tmpLinguist)
|
|
||||||
assert.NoError(s.T(), err)
|
|
||||||
|
|
||||||
cmd := exec.Command("git", "checkout", data.LinguistCommit)
|
|
||||||
err = cmd.Run()
|
|
||||||
assert.NoError(s.T(), err)
|
|
||||||
|
|
||||||
err = os.Chdir(cwd)
|
|
||||||
assert.NoError(s.T(), err)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *EnryTestSuite) TearDownSuite() {
|
|
||||||
if s.needToClone {
|
|
||||||
err := os.RemoveAll(s.tmpLinguist)
|
|
||||||
assert.NoError(s.T(), err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *EnryTestSuite) TestGetLanguage() {
|
|
||||||
tests := []struct {
|
tests := []struct {
|
||||||
name string
|
name string
|
||||||
filename string
|
filename string
|
||||||
@@ -120,7 +139,7 @@ func (s *EnryTestSuite) TestGetLanguage() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *EnryTestSuite) TestGetLanguages() {
|
func (s *enryTestSuite) TestGetLanguages() {
|
||||||
tests := []struct {
|
tests := []struct {
|
||||||
name string
|
name string
|
||||||
filename string
|
filename string
|
||||||
@@ -152,8 +171,8 @@ func (s *EnryTestSuite) TestGetLanguages() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *EnryTestSuite) TestGetLanguagesByModelineLinguist() {
|
func (s *enryTestSuite) TestGetLanguagesByModelineLinguist() {
|
||||||
var modelinesDir = filepath.Join(s.tmpLinguist, "test", "fixtures", "Data", "Modelines")
|
var modelinesDir = filepath.Join(s.tmpLinguistDir, "test", "fixtures", "Data", "Modelines")
|
||||||
|
|
||||||
tests := []struct {
|
tests := []struct {
|
||||||
name string
|
name string
|
||||||
@@ -212,7 +231,7 @@ func (s *EnryTestSuite) TestGetLanguagesByModelineLinguist() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *EnryTestSuite) TestGetLanguagesByModeline() {
|
func (s *enryTestSuite) TestGetLanguagesByModeline() {
|
||||||
const (
|
const (
|
||||||
wrongVim = `# vim: set syntax=ruby ft =python filetype=perl :`
|
wrongVim = `# vim: set syntax=ruby ft =python filetype=perl :`
|
||||||
rightVim = `/* vim: set syntax=python ft =python filetype=python */`
|
rightVim = `/* vim: set syntax=python ft =python filetype=python */`
|
||||||
@@ -239,7 +258,7 @@ func (s *EnryTestSuite) TestGetLanguagesByModeline() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *EnryTestSuite) TestGetLanguagesByFilename() {
|
func (s *enryTestSuite) TestGetLanguagesByFilename() {
|
||||||
tests := []struct {
|
tests := []struct {
|
||||||
name string
|
name string
|
||||||
filename string
|
filename string
|
||||||
@@ -267,7 +286,7 @@ func (s *EnryTestSuite) TestGetLanguagesByFilename() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *EnryTestSuite) TestGetLanguagesByShebang() {
|
func (s *enryTestSuite) TestGetLanguagesByShebang() {
|
||||||
const (
|
const (
|
||||||
multilineExecHack = `#!/bin/sh
|
multilineExecHack = `#!/bin/sh
|
||||||
# Next line is comment in Tcl, but not in sh... \
|
# Next line is comment in Tcl, but not in sh... \
|
||||||
@@ -352,7 +371,7 @@ println("The shell script says ",vm.arglist.concat(" "));`
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *EnryTestSuite) TestGetLanguagesByExtension() {
|
func (s *enryTestSuite) TestGetLanguagesByExtension() {
|
||||||
tests := []struct {
|
tests := []struct {
|
||||||
name string
|
name string
|
||||||
filename string
|
filename string
|
||||||
@@ -373,7 +392,7 @@ func (s *EnryTestSuite) TestGetLanguagesByExtension() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *EnryTestSuite) TestGetLanguagesByManpage() {
|
func (s *enryTestSuite) TestGetLanguagesByManpage() {
|
||||||
tests := []struct {
|
tests := []struct {
|
||||||
name string
|
name string
|
||||||
filename string
|
filename string
|
||||||
@@ -397,7 +416,7 @@ func (s *EnryTestSuite) TestGetLanguagesByManpage() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *EnryTestSuite) TestGetLanguagesByXML() {
|
func (s *enryTestSuite) TestGetLanguagesByXML() {
|
||||||
tests := []struct {
|
tests := []struct {
|
||||||
name string
|
name string
|
||||||
filename string
|
filename string
|
||||||
@@ -420,7 +439,7 @@ func (s *EnryTestSuite) TestGetLanguagesByXML() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *EnryTestSuite) TestGetLanguagesByClassifier() {
|
func (s *enryTestSuite) TestGetLanguagesByClassifier() {
|
||||||
test := []struct {
|
test := []struct {
|
||||||
name string
|
name string
|
||||||
filename string
|
filename string
|
||||||
@@ -457,7 +476,7 @@ func (s *EnryTestSuite) TestGetLanguagesByClassifier() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *EnryTestSuite) TestGetLanguagesBySpecificClassifier() {
|
func (s *enryTestSuite) TestGetLanguagesBySpecificClassifier() {
|
||||||
test := []struct {
|
test := []struct {
|
||||||
name string
|
name string
|
||||||
filename string
|
filename string
|
||||||
@@ -490,7 +509,7 @@ func (s *EnryTestSuite) TestGetLanguagesBySpecificClassifier() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *EnryTestSuite) TestGetLanguageExtensions() {
|
func (s *enryTestSuite) TestGetLanguageExtensions() {
|
||||||
tests := []struct {
|
tests := []struct {
|
||||||
name string
|
name string
|
||||||
language string
|
language string
|
||||||
@@ -507,7 +526,7 @@ func (s *EnryTestSuite) TestGetLanguageExtensions() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *EnryTestSuite) TestGetLanguageType() {
|
func (s *enryTestSuite) TestGetLanguageType() {
|
||||||
tests := []struct {
|
tests := []struct {
|
||||||
name string
|
name string
|
||||||
language string
|
language string
|
||||||
@@ -530,7 +549,7 @@ func (s *EnryTestSuite) TestGetLanguageType() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *EnryTestSuite) TestGetLanguageGroup() {
|
func (s *enryTestSuite) TestGetLanguageGroup() {
|
||||||
tests := []struct {
|
tests := []struct {
|
||||||
name string
|
name string
|
||||||
language string
|
language string
|
||||||
@@ -548,7 +567,7 @@ func (s *EnryTestSuite) TestGetLanguageGroup() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *EnryTestSuite) TestGetLanguageByAlias() {
|
func (s *enryTestSuite) TestGetLanguageByAlias() {
|
||||||
tests := []struct {
|
tests := []struct {
|
||||||
name string
|
name string
|
||||||
alias string
|
alias string
|
||||||
@@ -574,57 +593,7 @@ func (s *EnryTestSuite) TestGetLanguageByAlias() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *EnryTestSuite) TestLinguistCorpus() {
|
func (s *enryTestSuite) TestGetLanguageID() {
|
||||||
const filenamesDir = "filenames"
|
|
||||||
var cornerCases = map[string]bool{
|
|
||||||
"drop_stuff.sql": true, // https://github.com/src-d/enry/issues/194
|
|
||||||
"textobj-rubyblock.vba": true, // Because of unsupported negative lookahead RE syntax (https://github.com/github/linguist/blob/8083cb5a89cee2d99f5a988f165994d0243f0d1e/lib/linguist/heuristics.yml#L521)
|
|
||||||
// .es and .ice fail heuristics parsing, but do not fail any tests
|
|
||||||
}
|
|
||||||
|
|
||||||
var total, failed, ok, other int
|
|
||||||
var expected string
|
|
||||||
filepath.Walk(s.samplesDir, func(path string, f os.FileInfo, err error) error {
|
|
||||||
if f.IsDir() {
|
|
||||||
if f.Name() != filenamesDir {
|
|
||||||
expected, _ = data.LanguageByAlias(f.Name())
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
filename := filepath.Base(path)
|
|
||||||
content, _ := ioutil.ReadFile(path)
|
|
||||||
|
|
||||||
total++
|
|
||||||
obtained := GetLanguage(filename, content)
|
|
||||||
if obtained == OtherLanguage {
|
|
||||||
obtained = "Other"
|
|
||||||
other++
|
|
||||||
}
|
|
||||||
|
|
||||||
var status string
|
|
||||||
if expected == obtained {
|
|
||||||
status = "ok"
|
|
||||||
ok++
|
|
||||||
} else {
|
|
||||||
status = "failed"
|
|
||||||
failed++
|
|
||||||
}
|
|
||||||
|
|
||||||
if _, ok := cornerCases[filename]; ok {
|
|
||||||
s.T().Logf("\t\t[considered corner case] %s\texpected: %s\tobtained: %s\tstatus: %s\n", filename, expected, obtained, status)
|
|
||||||
} else {
|
|
||||||
assert.Equal(s.T(), expected, obtained, fmt.Sprintf("%s\texpected: %s\tobtained: %s\tstatus: %s\n", filename, expected, obtained, status))
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
})
|
|
||||||
|
|
||||||
s.T().Logf("\t\ttotal files: %d, ok: %d, failed: %d, other: %d\n", total, ok, failed, other)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *EnryTestSuite) TestGetLanguageID() {
|
|
||||||
tests := []struct {
|
tests := []struct {
|
||||||
name string
|
name string
|
||||||
language string
|
language string
|
||||||
@@ -647,7 +616,7 @@ func (s *EnryTestSuite) TestGetLanguageID() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *EnryTestSuite) TestGetLanguageInfo() {
|
func (s *enryTestSuite) TestGetLanguageInfo() {
|
||||||
tests := []struct {
|
tests := []struct {
|
||||||
name string
|
name string
|
||||||
language string
|
language string
|
||||||
@@ -674,7 +643,7 @@ func (s *EnryTestSuite) TestGetLanguageInfo() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *EnryTestSuite) TestGetLanguageInfoByID() {
|
func (s *enryTestSuite) TestGetLanguageInfoByID() {
|
||||||
tests := []struct {
|
tests := []struct {
|
||||||
name string
|
name string
|
||||||
id int
|
id int
|
||||||
|
14
enry.go
14
enry.go
@@ -14,3 +14,17 @@
|
|||||||
package enry // import "github.com/go-enry/go-enry/v2"
|
package enry // import "github.com/go-enry/go-enry/v2"
|
||||||
|
|
||||||
//go:generate make code-generate
|
//go:generate make code-generate
|
||||||
|
|
||||||
|
import "github.com/go-enry/go-enry/v2/data"
|
||||||
|
|
||||||
|
// Type represent language's type. Either data, programming, markup, prose, or unknown.
|
||||||
|
type Type int
|
||||||
|
|
||||||
|
// Type's values.
|
||||||
|
const (
|
||||||
|
Unknown Type = Type(data.TypeUnknown)
|
||||||
|
Data = Type(data.TypeData)
|
||||||
|
Programming = Type(data.TypeProgramming)
|
||||||
|
Markup = Type(data.TypeMarkup)
|
||||||
|
Prose = Type(data.TypeProse)
|
||||||
|
)
|
||||||
|
@@ -97,9 +97,9 @@ var (
|
|||||||
|
|
||||||
type GeneratorTestSuite struct {
|
type GeneratorTestSuite struct {
|
||||||
suite.Suite
|
suite.Suite
|
||||||
tmpLinguistDir string
|
tmpLinguistDir string
|
||||||
isLinguistCloned bool
|
isCleanupNeeded bool
|
||||||
testCases []testCase
|
testCases []testCase
|
||||||
}
|
}
|
||||||
|
|
||||||
type testCase struct {
|
type testCase struct {
|
||||||
@@ -122,27 +122,31 @@ func Test_GeneratorTestSuite(t *testing.T) {
|
|||||||
func (s *GeneratorTestSuite) maybeCloneLinguist() {
|
func (s *GeneratorTestSuite) maybeCloneLinguist() {
|
||||||
var err error
|
var err error
|
||||||
s.tmpLinguistDir = os.Getenv(linguistClonedEnvVar)
|
s.tmpLinguistDir = os.Getenv(linguistClonedEnvVar)
|
||||||
s.isLinguistCloned = s.tmpLinguistDir != ""
|
isLinguistCloned := s.tmpLinguistDir != ""
|
||||||
if !s.isLinguistCloned {
|
if !isLinguistCloned {
|
||||||
s.tmpLinguistDir, err = ioutil.TempDir("", "linguist-")
|
s.tmpLinguistDir, err = ioutil.TempDir("", "linguist-")
|
||||||
assert.NoError(s.T(), err)
|
require.NoError(s.T(), err)
|
||||||
cmd := exec.Command("git", "clone", linguistURL, s.tmpLinguistDir)
|
|
||||||
|
s.T().Logf("Cloning Linguist repo to '%s' as %s was not set\n",
|
||||||
|
s.tmpLinguistDir, linguistClonedEnvVar)
|
||||||
|
cmd := exec.Command("git", "clone", "--depth", "100", linguistURL, s.tmpLinguistDir)
|
||||||
err = cmd.Run()
|
err = cmd.Run()
|
||||||
assert.NoError(s.T(), err)
|
require.NoError(s.T(), err)
|
||||||
|
s.isCleanupNeeded = true
|
||||||
cwd, err := os.Getwd()
|
|
||||||
assert.NoError(s.T(), err)
|
|
||||||
|
|
||||||
err = os.Chdir(s.tmpLinguistDir)
|
|
||||||
assert.NoError(s.T(), err)
|
|
||||||
|
|
||||||
cmd = exec.Command("git", "checkout", commit)
|
|
||||||
err = cmd.Run()
|
|
||||||
assert.NoError(s.T(), err)
|
|
||||||
|
|
||||||
err = os.Chdir(cwd)
|
|
||||||
assert.NoError(s.T(), err)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
cwd, err := os.Getwd()
|
||||||
|
require.NoError(s.T(), err)
|
||||||
|
|
||||||
|
err = os.Chdir(s.tmpLinguistDir)
|
||||||
|
require.NoError(s.T(), err)
|
||||||
|
|
||||||
|
cmd := exec.Command("git", "checkout", commit)
|
||||||
|
err = cmd.Run()
|
||||||
|
require.NoError(s.T(), err)
|
||||||
|
|
||||||
|
err = os.Chdir(cwd)
|
||||||
|
require.NoError(s.T(), err)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *GeneratorTestSuite) SetupSuite() {
|
func (s *GeneratorTestSuite) SetupSuite() {
|
||||||
@@ -280,11 +284,9 @@ func (s *GeneratorTestSuite) SetupSuite() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (s *GeneratorTestSuite) TearDownSuite() {
|
func (s *GeneratorTestSuite) TearDownSuite() {
|
||||||
if s.isLinguistCloned {
|
if s.isCleanupNeeded {
|
||||||
err := os.RemoveAll(s.tmpLinguistDir)
|
err := os.RemoveAll(s.tmpLinguistDir)
|
||||||
if err != nil {
|
assert.NoError(s.T(), err)
|
||||||
s.T().Logf("Failed to clean up %s after the test.\n", s.tmpLinguistDir)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
71
linguist_corpus_test.go
Normal file
71
linguist_corpus_test.go
Normal file
@@ -0,0 +1,71 @@
|
|||||||
|
package enry
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"io/ioutil"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/go-enry/go-enry/v2/data"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/suite"
|
||||||
|
)
|
||||||
|
|
||||||
|
type linguistCorpusSuite struct {
|
||||||
|
enryBaseTestSuite
|
||||||
|
}
|
||||||
|
|
||||||
|
func Test_EnryOnLinguistCorpus(t *testing.T) {
|
||||||
|
suite.Run(t, new(linguistCorpusSuite))
|
||||||
|
}
|
||||||
|
|
||||||
|
// First part of the test_blob.rb#test_language
|
||||||
|
// https://github.com/github/linguist/blob/59b2d88b2242e6062384e5fb876668cc30ead951/test/test_blob.rb#L258
|
||||||
|
func (s *linguistCorpusSuite) TestLinguistSamples() {
|
||||||
|
const filenamesDir = "filenames"
|
||||||
|
var cornerCases = map[string]bool{
|
||||||
|
"drop_stuff.sql": true, // https://github.com/src-d/enry/issues/194
|
||||||
|
"textobj-rubyblock.vba": true, // Because of unsupported negative lookahead RE syntax (https://github.com/github/linguist/blob/8083cb5a89cee2d99f5a988f165994d0243f0d1e/lib/linguist/heuristics.yml#L521)
|
||||||
|
// .es and .ice fail heuristics parsing, but do not fail any tests
|
||||||
|
}
|
||||||
|
|
||||||
|
var total, failed, ok, other int
|
||||||
|
var expected string
|
||||||
|
filepath.Walk(s.samplesDir, func(path string, f os.FileInfo, err error) error {
|
||||||
|
if f.IsDir() {
|
||||||
|
if f.Name() != filenamesDir {
|
||||||
|
expected, _ = data.LanguageByAlias(f.Name())
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
filename := filepath.Base(path)
|
||||||
|
content, _ := ioutil.ReadFile(path)
|
||||||
|
|
||||||
|
total++
|
||||||
|
obtained := GetLanguage(filename, content)
|
||||||
|
if obtained == OtherLanguage {
|
||||||
|
obtained = "Other"
|
||||||
|
other++
|
||||||
|
}
|
||||||
|
|
||||||
|
var status string
|
||||||
|
if expected == obtained {
|
||||||
|
status = "ok"
|
||||||
|
ok++
|
||||||
|
} else {
|
||||||
|
status = "failed"
|
||||||
|
failed++
|
||||||
|
}
|
||||||
|
|
||||||
|
if _, ok := cornerCases[filename]; ok {
|
||||||
|
s.T().Logf("\t\t[considered corner case] %s\texpected: %s\tobtained: %s\tstatus: %s\n", filename, expected, obtained, status)
|
||||||
|
} else {
|
||||||
|
assert.Equal(s.T(), expected, obtained, fmt.Sprintf("%s\texpected: %s\tobtained: %s\tstatus: %s\n", filename, expected, obtained, status))
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
s.T().Logf("\t\ttotal files: %d, ok: %d, failed: %d, other: %d\n", total, ok, failed, other)
|
||||||
|
}
|
Reference in New Issue
Block a user