mirror of
https://github.com/ralsina/tartrazine.git
synced 2024-11-10 05:22:23 +00:00
Merge pull request #151 from go-enry/distinguish-re-syntax
Syntax-aware regexp generation for configurable engines
This commit is contained in:
commit
0e58945703
@ -184,6 +184,8 @@ Parsing [linguist/samples](https://github.com/github/linguist/tree/master/sample
|
|||||||
|
|
||||||
In all the cases above that have an issue number - we plan to update enry to match Linguist behavior.
|
In all the cases above that have an issue number - we plan to update enry to match Linguist behavior.
|
||||||
|
|
||||||
|
> All the issues related to heuristics' regexp syntax incompatibilities with the RE2 engine can be avoided by using `oniguruma` instead (see [instuctions](#misc))
|
||||||
|
|
||||||
## Benchmarks
|
## Benchmarks
|
||||||
|
|
||||||
Enry's language detection has been compared with Linguist's on [_linguist/samples_](https://github.com/github/linguist/tree/master/samples).
|
Enry's language detection has been compared with Linguist's on [_linguist/samples_](https://github.com/github/linguist/tree/master/samples).
|
||||||
|
660
data/content.go
660
data/content.go
File diff suppressed because it is too large
Load Diff
@ -3,6 +3,15 @@
|
|||||||
// with colliding extensions, based on regexps from Linguist data.
|
// with colliding extensions, based on regexps from Linguist data.
|
||||||
package rule
|
package rule
|
||||||
|
|
||||||
|
import "github.com/go-enry/go-enry/v2/regex"
|
||||||
|
|
||||||
|
// Matcher checks if the data matches (number of) pattern(s).
|
||||||
|
// Every heuristic rule below implements this interface.
|
||||||
|
// A regexp.Regexp satisfies this interface and can be used instead.
|
||||||
|
type Matcher interface {
|
||||||
|
Match(data []byte) bool
|
||||||
|
}
|
||||||
|
|
||||||
// Heuristic consist of (a number of) rules where each, if matches,
|
// Heuristic consist of (a number of) rules where each, if matches,
|
||||||
// identifies content as belonging to a programming language(s).
|
// identifies content as belonging to a programming language(s).
|
||||||
type Heuristic interface {
|
type Heuristic interface {
|
||||||
@ -10,15 +19,7 @@ type Heuristic interface {
|
|||||||
Languages() []string
|
Languages() []string
|
||||||
}
|
}
|
||||||
|
|
||||||
// Matcher checks if the data matches (number of) pattern.
|
// languages base struct with all the languages that a Matcher identifies.
|
||||||
// Every heuristic rule below implements this interface.
|
|
||||||
// A regexp.Regexp satisfies this interface and can be used instead.
|
|
||||||
type Matcher interface {
|
|
||||||
Match(data []byte) bool
|
|
||||||
}
|
|
||||||
|
|
||||||
// languages struct incapsulate data common to every Matcher: all languages
|
|
||||||
// that it identifies.
|
|
||||||
type languages struct {
|
type languages struct {
|
||||||
langs []string
|
langs []string
|
||||||
}
|
}
|
||||||
@ -33,6 +34,10 @@ func MatchingLanguages(langs ...string) languages {
|
|||||||
return languages{langs}
|
return languages{langs}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func noLanguages() languages {
|
||||||
|
return MatchingLanguages([]string{}...)
|
||||||
|
}
|
||||||
|
|
||||||
// Implements a Heuristic.
|
// Implements a Heuristic.
|
||||||
type or struct {
|
type or struct {
|
||||||
languages
|
languages
|
||||||
@ -40,14 +45,19 @@ type or struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Or rule matches, if a single matching pattern exists.
|
// Or rule matches, if a single matching pattern exists.
|
||||||
// It receives only one pattern as it relies on compile-time optimization that
|
// It receives only one pattern as it relies on optimization that
|
||||||
// represtes union with | inside a single regexp.
|
// represtes union with | inside a single regexp during code generation.
|
||||||
func Or(l languages, r Matcher) Heuristic {
|
func Or(l languages, p Matcher) Heuristic {
|
||||||
return or{l, r}
|
//FIXME(bzz): this will not be the case as only some of the patterns may
|
||||||
|
// be non-RE2 => we shouldn't collate them not to loose the (accuracty of) whole rule
|
||||||
|
return or{l, p}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Match implements rule.Matcher.
|
// Match implements rule.Matcher.
|
||||||
func (r or) Match(data []byte) bool {
|
func (r or) Match(data []byte) bool {
|
||||||
|
if runOnRE2AndRegexNotAccepted(r.pattern) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
return r.pattern.Match(data)
|
return r.pattern.Match(data)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -65,6 +75,9 @@ func And(l languages, m ...Matcher) Heuristic {
|
|||||||
// Match implements data.Matcher.
|
// Match implements data.Matcher.
|
||||||
func (r and) Match(data []byte) bool {
|
func (r and) Match(data []byte) bool {
|
||||||
for _, p := range r.patterns {
|
for _, p := range r.patterns {
|
||||||
|
if runOnRE2AndRegexNotAccepted(p) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
if !p.Match(data) {
|
if !p.Match(data) {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
@ -86,6 +99,9 @@ func Not(l languages, r ...Matcher) Heuristic {
|
|||||||
// Match implements data.Matcher.
|
// Match implements data.Matcher.
|
||||||
func (r not) Match(data []byte) bool {
|
func (r not) Match(data []byte) bool {
|
||||||
for _, p := range r.Patterns {
|
for _, p := range r.Patterns {
|
||||||
|
if runOnRE2AndRegexNotAccepted(p) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
if p.Match(data) {
|
if p.Match(data) {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
@ -107,3 +123,11 @@ func Always(l languages) Heuristic {
|
|||||||
func (r always) Match(data []byte) bool {
|
func (r always) Match(data []byte) bool {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Checks if a regex syntax isn't accepted by RE2 engine.
|
||||||
|
// It's nil by construction from regex.MustCompileRuby but
|
||||||
|
// is used here as a Matcher interface wich itself is non-nil.
|
||||||
|
func runOnRE2AndRegexNotAccepted(re Matcher) bool {
|
||||||
|
v, ok := re.(regex.EnryRegexp)
|
||||||
|
return ok && v == nil
|
||||||
|
}
|
||||||
|
@ -1,39 +1,71 @@
|
|||||||
package rule
|
package rule
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"regexp"
|
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
|
"github.com/go-enry/go-enry/v2/regex"
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
)
|
)
|
||||||
|
|
||||||
const lang = "ActionScript"
|
const lang = "ActionScript"
|
||||||
|
|
||||||
var fixtures = []struct {
|
type fixture struct {
|
||||||
name string
|
name string
|
||||||
rule Heuristic
|
rule Heuristic
|
||||||
numLangs int
|
numLangs int
|
||||||
matching string
|
match string
|
||||||
noMatch string
|
noMatch string
|
||||||
}{
|
}
|
||||||
|
|
||||||
|
var specificFixtures = map[string][]fixture{
|
||||||
|
"": { // cases that don't vary between the engines
|
||||||
{"Always", Always(MatchingLanguages(lang)), 1, "a", ""},
|
{"Always", Always(MatchingLanguages(lang)), 1, "a", ""},
|
||||||
{"Not", Not(MatchingLanguages(lang), regexp.MustCompile(`a`)), 1, "b", "a"},
|
{"Not", Not(MatchingLanguages(lang), regex.MustCompile(`a`)), 1, "b", "a"},
|
||||||
{"And", And(MatchingLanguages(lang), regexp.MustCompile(`a`), regexp.MustCompile(`b`)), 1, "ab", "a"},
|
{"And", And(MatchingLanguages(lang), regex.MustCompile(`a`), regex.MustCompile(`b`)), 1, "ab", "a"},
|
||||||
{"Or", Or(MatchingLanguages(lang), regexp.MustCompile(`a|b`)), 1, "ab", "c"},
|
{"Or", Or(MatchingLanguages(lang), regex.MustCompile(`a|b`)), 1, "ab", "c"},
|
||||||
|
// the results of these depend on the regex engine
|
||||||
|
// {"NilOr", Or(noLanguages(), regex.MustCompileRuby(``)), 0, "", "a"},
|
||||||
|
// {"NilNot", Not(noLanguages(), regex.MustCompileRuby(`a`)), 0, "", "a"},
|
||||||
|
},
|
||||||
|
regex.RE2: {
|
||||||
|
{"NilAnd", And(noLanguages(), regex.MustCompileRuby(`a`), regex.MustCompile(`b`)), 0, "b", "a"},
|
||||||
|
{"NilNot", Not(noLanguages(), regex.MustCompileRuby(`a`), regex.MustCompile(`b`)), 0, "c", "b"},
|
||||||
|
},
|
||||||
|
regex.Oniguruma: {
|
||||||
|
{"NilAnd", And(noLanguages(), regex.MustCompileRuby(`a`), regex.MustCompile(`b`)), 0, "ab", "c"},
|
||||||
|
{"NilNot", Not(noLanguages(), regex.MustCompileRuby(`a`), regex.MustCompile(`b`)), 0, "c", "a"},
|
||||||
|
{"NilOr", Or(noLanguages(), regex.MustCompileRuby(`a`) /*, regexp.MustCompile(`b`)*/), 0, "a", "b"},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
func testRulesForEngine(t *testing.T, engine string) {
|
||||||
|
if engine != "" && regex.Name != engine {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
for _, f := range specificFixtures[engine] {
|
||||||
|
t.Run(engine+f.name, func(t *testing.T) {
|
||||||
|
check(t, f)
|
||||||
|
})
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestRules(t *testing.T) {
|
func TestRules(t *testing.T) {
|
||||||
for _, f := range fixtures {
|
//TODO(bzz): can all be run in parallel
|
||||||
t.Run(f.name, func(t *testing.T) {
|
testRulesForEngine(t, "")
|
||||||
|
testRulesForEngine(t, regex.RE2)
|
||||||
|
testRulesForEngine(t, regex.Oniguruma)
|
||||||
|
}
|
||||||
|
|
||||||
|
func check(t *testing.T, f fixture) {
|
||||||
assert.NotNil(t, f.rule)
|
assert.NotNil(t, f.rule)
|
||||||
assert.NotNil(t, f.rule.Languages())
|
assert.NotNil(t, f.rule.Languages())
|
||||||
assert.Equal(t, f.numLangs, len(f.rule.Languages()))
|
assert.Equal(t, f.numLangs, len(f.rule.Languages()))
|
||||||
assert.Truef(t, f.rule.Match([]byte(f.matching)),
|
if f.match != "" {
|
||||||
"'%s' is expected to .Match() by rule %s%v", f.matching, f.name, f.rule)
|
assert.Truef(t, f.rule.Match([]byte(f.match)),
|
||||||
|
"'%s' is expected to .Match() by rule %s%v", f.match, f.name, f.rule)
|
||||||
|
}
|
||||||
if f.noMatch != "" {
|
if f.noMatch != "" {
|
||||||
assert.Falsef(t, f.rule.Match([]byte(f.noMatch)),
|
assert.Falsef(t, f.rule.Match([]byte(f.noMatch)),
|
||||||
"'%s' is expected NOT to .Match() by rule %s%v", f.noMatch, f.name, f.rule)
|
"'%s' is expected NOT to .Match() by rule %s%v", f.noMatch, f.name, f.rule)
|
||||||
}
|
}
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
14
enry.go
14
enry.go
@ -1,15 +1,15 @@
|
|||||||
/*
|
/*
|
||||||
Package enry implements multiple strategies for programming language identification.
|
Package enry identifies programming languages.
|
||||||
|
|
||||||
Identification is made based on file name and file content using a service
|
Identification is based on file name and content using a series
|
||||||
of strategies to narrow down possible option.
|
of strategies to narrow down possible options.
|
||||||
Each strategy is available as a separate API call, as well as a main enty point
|
Each strategy is available as a separate API call, as well as though the main enty point:
|
||||||
|
|
||||||
GetLanguage(filename string, content []byte) (language string)
|
GetLanguage(filename string, content []byte) (language string)
|
||||||
|
|
||||||
It is a port of the https://github.com/github/linguist from Ruby.
|
It is a port of the https://github.com/github/linguist from Ruby.
|
||||||
Upstream Linguist YAML files are used to generate datastructures for data
|
Upstream Linguist YAML files are used to generate datastructures for data
|
||||||
package.
|
package.
|
||||||
*/
|
*/
|
||||||
package enry // import "github.com/go-enry/go-enry/v2"
|
package enry // import "github.com/go-enry/go-enry/v2"
|
||||||
|
|
||||||
|
2
go.mod
2
go.mod
@ -4,6 +4,6 @@ go 1.14
|
|||||||
|
|
||||||
require (
|
require (
|
||||||
github.com/go-enry/go-oniguruma v1.2.1
|
github.com/go-enry/go-oniguruma v1.2.1
|
||||||
github.com/stretchr/testify v1.3.0
|
github.com/stretchr/testify v1.8.1
|
||||||
gopkg.in/yaml.v2 v2.2.8
|
gopkg.in/yaml.v2 v2.2.8
|
||||||
)
|
)
|
||||||
|
17
go.sum
17
go.sum
@ -1,16 +1,21 @@
|
|||||||
github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
|
|
||||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
github.com/go-enry/go-oniguruma v1.2.0 h1:oBO9XC1IDT9+AoWW5oFsa/7gFeOPacEqDbyXZKWXuDs=
|
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||||
github.com/go-enry/go-oniguruma v1.2.0/go.mod h1:bWDhYP+S6xZQgiRL7wlTScFYBe023B6ilRZbCAD5Hf4=
|
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
github.com/go-enry/go-oniguruma v1.2.1 h1:k8aAMuJfMrqm/56SG2lV9Cfti6tC4x8673aHCcBk+eo=
|
github.com/go-enry/go-oniguruma v1.2.1 h1:k8aAMuJfMrqm/56SG2lV9Cfti6tC4x8673aHCcBk+eo=
|
||||||
github.com/go-enry/go-oniguruma v1.2.1/go.mod h1:bWDhYP+S6xZQgiRL7wlTScFYBe023B6ilRZbCAD5Hf4=
|
github.com/go-enry/go-oniguruma v1.2.1/go.mod h1:bWDhYP+S6xZQgiRL7wlTScFYBe023B6ilRZbCAD5Hf4=
|
||||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||||
github.com/stretchr/objx v0.1.0 h1:4G4v2dO3VZwixGIRoQ5Lfboy6nUhCyYzaqnIAPPhYs4=
|
|
||||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||||
github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q=
|
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
|
||||||
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
|
||||||
|
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||||
|
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
|
||||||
|
github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
|
||||||
|
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
|
||||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
|
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
|
||||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||||
gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10=
|
gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10=
|
||||||
gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
||||||
|
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||||
|
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||||
|
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||||
|
@ -1,9 +1,8 @@
|
|||||||
package data
|
package data
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"regexp"
|
|
||||||
|
|
||||||
"github.com/go-enry/go-enry/v2/data/rule"
|
"github.com/go-enry/go-enry/v2/data/rule"
|
||||||
|
"github.com/go-enry/go-enry/v2/regex"
|
||||||
)
|
)
|
||||||
|
|
||||||
var ContentHeuristics = map[string]*Heuristics{
|
var ContentHeuristics = map[string]*Heuristics{
|
||||||
@ -27,12 +26,12 @@ var ContentHeuristics = map[string]*Heuristics{
|
|||||||
{{- else if eq .Op "Or" -}}
|
{{- else if eq .Op "Or" -}}
|
||||||
rule.Or(
|
rule.Or(
|
||||||
{{ template "Languages" .Langs -}}
|
{{ template "Languages" .Langs -}}
|
||||||
regexp.MustCompile({{ .Pattern | stringVal }}),
|
{{ template "mustCompile" . }}
|
||||||
),
|
),
|
||||||
{{- else if eq .Op "Not" -}}
|
{{- else if eq .Op "Not" -}}
|
||||||
rule.Not(
|
rule.Not(
|
||||||
{{ template "Languages" .Langs -}}
|
{{ template "Languages" .Langs -}}
|
||||||
regexp.MustCompile({{ .Pattern | stringVal }}),
|
{{ template "mustCompile" . }}
|
||||||
),
|
),
|
||||||
{{- else if eq .Op "Always" -}}
|
{{- else if eq .Op "Always" -}}
|
||||||
rule.Always(
|
rule.Always(
|
||||||
@ -49,3 +48,11 @@ var ContentHeuristics = map[string]*Heuristics{
|
|||||||
rule.MatchingLanguages(""),
|
rule.MatchingLanguages(""),
|
||||||
{{end -}}
|
{{end -}}
|
||||||
{{end}}
|
{{end}}
|
||||||
|
|
||||||
|
{{define "mustCompile" -}}
|
||||||
|
{{ if .IsRE2 -}}
|
||||||
|
regex.MustCompileMultiline({{ .Pattern | stringVal }}),
|
||||||
|
{{- else -}}
|
||||||
|
regex.MustCompileRuby({{ .Pattern | stringVal }}),
|
||||||
|
{{ end -}}
|
||||||
|
{{end}}
|
||||||
|
@ -2,11 +2,21 @@ package data
|
|||||||
|
|
||||||
import "github.com/go-enry/go-enry/v2/regex"
|
import "github.com/go-enry/go-enry/v2/regex"
|
||||||
|
|
||||||
|
{{define "mustCompile" -}}
|
||||||
|
{{ if isRE2 . -}}
|
||||||
|
regex.MustCompile({{ . | stringVal }})
|
||||||
|
{{- else -}}
|
||||||
|
regex.MustCompileRuby({{ . | stringVal }})
|
||||||
|
{{- end -}}
|
||||||
|
{{end}}
|
||||||
|
|
||||||
var VendorMatchers = []regex.EnryRegexp{
|
var VendorMatchers = []regex.EnryRegexp{
|
||||||
{{range $regexp := . -}}
|
{{range $re := . -}}
|
||||||
regex.MustCompile(`{{ $regexp }}`),
|
{{ template "mustCompile" $re }},
|
||||||
{{end -}}
|
{{end -}}
|
||||||
}
|
}
|
||||||
|
|
||||||
// FastVendorMatcher is equivalent to matching any of the VendorMatchers.
|
// FastVendorMatcher is equivalent to matching any of the VendorMatchers.
|
||||||
var FastVendorMatcher = regex.MustCompile(`{{ optimize . }}`)
|
{{with $singleRE := collateAllRegexps . -}}
|
||||||
|
var FastVendorMatcher = {{template "mustCompile" $singleRE}}
|
||||||
|
{{end}}
|
@ -3,7 +3,6 @@
|
|||||||
package generator
|
package generator
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
"go/format"
|
"go/format"
|
||||||
"io"
|
"io"
|
||||||
@ -22,12 +21,15 @@ type File func(fileToParse, samplesDir, outPath, tmplPath, tmplName, commit stri
|
|||||||
func formatedWrite(outPath string, source []byte) error {
|
func formatedWrite(outPath string, source []byte) error {
|
||||||
formatedSource, err := format.Source(source)
|
formatedSource, err := format.Source(source)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
err = fmt.Errorf("'go fmt' fails on %v", err)
|
||||||
|
// write un-formatter source to simplify debugging
|
||||||
|
formatedSource = source
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := ioutil.WriteFile(outPath, formatedSource, 0666); err != nil {
|
if err := ioutil.WriteFile(outPath, formatedSource, 0666); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
return nil
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
func executeTemplate(w io.Writer, name, path, commit string, fmap template.FuncMap, data interface{}) error {
|
func executeTemplate(w io.Writer, name, path, commit string, fmap template.FuncMap, data interface{}) error {
|
||||||
@ -40,35 +42,21 @@ func executeTemplate(w io.Writer, name, path, commit string, fmap template.FuncM
|
|||||||
val = strings.ReplaceAll(val, "`", "`+\"`\"+`")
|
val = strings.ReplaceAll(val, "`", "`+\"`\"+`")
|
||||||
return fmt.Sprintf("`%s`", val)
|
return fmt.Sprintf("`%s`", val)
|
||||||
}
|
}
|
||||||
|
|
||||||
const headerTmpl = "header.go.tmpl"
|
|
||||||
headerPath := filepath.Join(filepath.Dir(path), headerTmpl)
|
|
||||||
|
|
||||||
h := template.Must(template.New(headerTmpl).Funcs(template.FuncMap{
|
|
||||||
"getCommit": getCommit,
|
|
||||||
"stringVal": stringVal,
|
|
||||||
}).ParseFiles(headerPath))
|
|
||||||
|
|
||||||
buf := bytes.NewBuffer(nil)
|
|
||||||
if err := h.Execute(buf, data); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
if fmap == nil {
|
if fmap == nil {
|
||||||
fmap = make(template.FuncMap)
|
fmap = make(template.FuncMap)
|
||||||
}
|
}
|
||||||
fmap["getCommit"] = getCommit
|
fmap["getCommit"] = getCommit
|
||||||
fmap["stringVal"] = stringVal
|
fmap["stringVal"] = stringVal
|
||||||
|
fmap["isRE2"] = isRE2
|
||||||
|
|
||||||
|
const headerTmpl = "header.go.tmpl"
|
||||||
|
headerPath := filepath.Join(filepath.Dir(path), headerTmpl)
|
||||||
|
|
||||||
|
h := template.Must(template.New(headerTmpl).Funcs(fmap).ParseFiles(headerPath))
|
||||||
|
if err := h.Execute(w, data); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
t := template.Must(template.New(name).Funcs(fmap).ParseFiles(path))
|
t := template.Must(template.New(name).Funcs(fmap).ParseFiles(path))
|
||||||
if err := t.Execute(buf, data); err != nil {
|
return t.Execute(w, data)
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
src, err := format.Source(buf.Bytes())
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
_, err = w.Write(src)
|
|
||||||
return err
|
|
||||||
}
|
}
|
||||||
|
@ -70,25 +70,27 @@ func loadRule(namedPatterns map[string]StringArray, rule *Rule) *LanguagePattern
|
|||||||
subp := loadRule(namedPatterns, r)
|
subp := loadRule(namedPatterns, r)
|
||||||
subPatterns = append(subPatterns, subp)
|
subPatterns = append(subPatterns, subp)
|
||||||
}
|
}
|
||||||
result = &LanguagePattern{"And", rule.Languages, "", subPatterns}
|
result = &LanguagePattern{"And", rule.Languages, "", subPatterns, true}
|
||||||
} else if len(rule.Pattern) != 0 { // OrPattern
|
} else if len(rule.Pattern) != 0 { // OrPattern
|
||||||
conjunction := strings.Join(rule.Pattern, orPipe)
|
// FIXME(bzz): this optimization should only be applied if each pattern isRE2!
|
||||||
pattern := convertToValidRegexp(conjunction)
|
pattern := strings.Join(rule.Pattern, orPipe)
|
||||||
result = &LanguagePattern{"Or", rule.Languages, pattern, nil}
|
|
||||||
|
// TODO(bzz): handle the common case Or(len(Languages)==0) better
|
||||||
|
// e.g. by emiting `rule.Rule(...)` instead of
|
||||||
|
// an (ugly) `rule.Or( rule.MatchingLanguages(""), ... )`
|
||||||
|
result = &LanguagePattern{"Or", rule.Languages, pattern, nil, isRE2(pattern)}
|
||||||
} else if rule.NegativePattern != "" { // NotPattern
|
} else if rule.NegativePattern != "" { // NotPattern
|
||||||
pattern := convertToValidRegexp(rule.NegativePattern)
|
pattern := rule.NegativePattern
|
||||||
result = &LanguagePattern{"Not", rule.Languages, pattern, nil}
|
result = &LanguagePattern{"Not", rule.Languages, pattern, nil, isRE2(pattern)}
|
||||||
} else if rule.NamedPattern != "" { // Named OrPattern
|
} else if rule.NamedPattern != "" { // Named OrPattern
|
||||||
conjunction := strings.Join(namedPatterns[rule.NamedPattern], orPipe)
|
pattern := strings.Join(namedPatterns[rule.NamedPattern], orPipe)
|
||||||
pattern := convertToValidRegexp(conjunction)
|
result = &LanguagePattern{"Or", rule.Languages, pattern, nil, isRE2(pattern)}
|
||||||
result = &LanguagePattern{"Or", rule.Languages, pattern, nil}
|
|
||||||
} else { // AlwaysPattern
|
} else { // AlwaysPattern
|
||||||
result = &LanguagePattern{"Always", rule.Languages, "", nil}
|
result = &LanguagePattern{"Always", rule.Languages, "", nil, true}
|
||||||
}
|
}
|
||||||
|
|
||||||
if isUnsupportedRegexpSyntax(result.Pattern) {
|
if !isRE2(result.Pattern) {
|
||||||
log.Printf("skipping rule: language:'%q', rule:'%q'\n", rule.Languages, result.Pattern)
|
log.Printf("RE2 incompatible syntax for heuristic language:'%s', rule:'%s'\n", rule.Languages, result.Pattern)
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
return result
|
return result
|
||||||
}
|
}
|
||||||
@ -100,6 +102,7 @@ type LanguagePattern struct {
|
|||||||
Langs []string
|
Langs []string
|
||||||
Pattern string
|
Pattern string
|
||||||
Rules []*LanguagePattern
|
Rules []*LanguagePattern
|
||||||
|
IsRE2 bool
|
||||||
}
|
}
|
||||||
|
|
||||||
type Heuristics struct {
|
type Heuristics struct {
|
||||||
@ -125,7 +128,7 @@ type Patterns struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// StringArray is workaround for parsing named_pattern,
|
// StringArray is workaround for parsing named_pattern,
|
||||||
// wich is sometimes arry and sometimes not.
|
// wich is sometimes an array and sometimes is not.
|
||||||
// See https://github.com/go-yaml/yaml/issues/100
|
// See https://github.com/go-yaml/yaml/issues/100
|
||||||
type StringArray []string
|
type StringArray []string
|
||||||
|
|
||||||
@ -173,8 +176,6 @@ func isUnsupportedRegexpSyntax(reg string) bool {
|
|||||||
(strings.HasPrefix(reg, multilinePrefix+`/`) && strings.HasSuffix(reg, `/`))
|
(strings.HasPrefix(reg, multilinePrefix+`/`) && strings.HasSuffix(reg, `/`))
|
||||||
}
|
}
|
||||||
|
|
||||||
// convertToValidRegexp converts Ruby regexp syntax to RE2 equivalent.
|
func isRE2(s string) bool {
|
||||||
// Does not work with Ruby regexp literals.
|
return !isUnsupportedRegexpSyntax(s)
|
||||||
func convertToValidRegexp(rubyRegexp string) string {
|
|
||||||
return multilinePrefix + rubyRegexp
|
|
||||||
}
|
}
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -5,6 +5,7 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
|
"log"
|
||||||
"sort"
|
"sort"
|
||||||
"strings"
|
"strings"
|
||||||
"text/template"
|
"text/template"
|
||||||
@ -25,6 +26,12 @@ func Vendor(fileToParse, samplesDir, outPath, tmplPath, tmplName, commit string)
|
|||||||
return fmt.Errorf("failed to parse YAML %s, %q", fileToParse, err)
|
return fmt.Errorf("failed to parse YAML %s, %q", fileToParse, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for _, re := range regexps {
|
||||||
|
if !isRE2(re) {
|
||||||
|
log.Printf("RE2 incompatible syntax for vendor:'%s'\n", re)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
buf := &bytes.Buffer{}
|
buf := &bytes.Buffer{}
|
||||||
if err := executeVendorTemplate(buf, regexps, tmplPath, tmplName, commit); err != nil {
|
if err := executeVendorTemplate(buf, regexps, tmplPath, tmplName, commit); err != nil {
|
||||||
return err
|
return err
|
||||||
@ -34,34 +41,14 @@ func Vendor(fileToParse, samplesDir, outPath, tmplPath, tmplName, commit string)
|
|||||||
}
|
}
|
||||||
|
|
||||||
func executeVendorTemplate(out io.Writer, regexps []string, tmplPath, tmplName, commit string) error {
|
func executeVendorTemplate(out io.Writer, regexps []string, tmplPath, tmplName, commit string) error {
|
||||||
funcs := template.FuncMap{"optimize": collateAllMatchers}
|
funcs := template.FuncMap{"collateAllRegexps": collateAllRegexps}
|
||||||
return executeTemplate(out, tmplName, tmplPath, commit, funcs, regexps)
|
return executeTemplate(out, tmplName, tmplPath, commit, funcs, regexps)
|
||||||
}
|
}
|
||||||
|
|
||||||
func collateAllMatchers(regexps []string) string {
|
// collateAllRegexps all regexps to a single large regexp.
|
||||||
// We now collate all regexps from VendorMatchers to a single large regexp
|
func collateAllRegexps(regexps []string) string {
|
||||||
// which is at least twice as fast to test than simply iterating & matching.
|
// which is at least twice as fast to test than simply iterating & matching.
|
||||||
//
|
//
|
||||||
// ---
|
|
||||||
//
|
|
||||||
// We could test each matcher from VendorMatchers in turn i.e.
|
|
||||||
//
|
|
||||||
// func IsVendor(filename string) bool {
|
|
||||||
// for _, matcher := range data.VendorMatchers {
|
|
||||||
// if matcher.MatchString(filename) {
|
|
||||||
// return true
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// return false
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// Or naïvely concatentate all these regexps using groups i.e.
|
|
||||||
//
|
|
||||||
// `(regexp1)|(regexp2)|(regexp3)|...`
|
|
||||||
//
|
|
||||||
// However, both of these are relatively slow and don't take advantage
|
|
||||||
// of the inherent structure within our regexps.
|
|
||||||
//
|
|
||||||
// Imperical observation: by looking at the regexps, we only have 3 types.
|
// Imperical observation: by looking at the regexps, we only have 3 types.
|
||||||
// 1. Those that start with `^`
|
// 1. Those that start with `^`
|
||||||
// 2. Those that start with `(^|/)`
|
// 2. Those that start with `(^|/)`
|
||||||
@ -81,8 +68,8 @@ func collateAllMatchers(regexps []string) string {
|
|||||||
|
|
||||||
sort.Strings(regexps)
|
sort.Strings(regexps)
|
||||||
|
|
||||||
|
// Check prefix, group expressions
|
||||||
var caretPrefixed, caretOrSlashPrefixed, theRest []string
|
var caretPrefixed, caretOrSlashPrefixed, theRest []string
|
||||||
// Check prefix, add to the respective group slices
|
|
||||||
for _, re := range regexps {
|
for _, re := range regexps {
|
||||||
if strings.HasPrefix(re, caret) {
|
if strings.HasPrefix(re, caret) {
|
||||||
caretPrefixed = append(caretPrefixed, re[len(caret):])
|
caretPrefixed = append(caretPrefixed, re[len(caret):])
|
||||||
@ -92,6 +79,7 @@ func collateAllMatchers(regexps []string) string {
|
|||||||
theRest = append(theRest, re)
|
theRest = append(theRest, re)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
var sb strings.Builder
|
var sb strings.Builder
|
||||||
appendGroupWithCommonPrefix(&sb, "^", caretPrefixed)
|
appendGroupWithCommonPrefix(&sb, "^", caretPrefixed)
|
||||||
sb.WriteString("|")
|
sb.WriteString("|")
|
||||||
|
@ -134,7 +134,7 @@ func main() {
|
|||||||
|
|
||||||
for _, file := range fileList {
|
for _, file := range fileList {
|
||||||
if err := file.generate(file.fileToParse, file.samplesDir, file.outPath, file.tmplPath, file.tmplName, file.commit); err != nil {
|
if err := file.generate(file.fileToParse, file.samplesDir, file.outPath, file.tmplPath, file.tmplName, file.commit); err != nil {
|
||||||
log.Fatalf("error generating template %q to %q: %+v", file.tmplPath, file.outPath, err)
|
log.Fatalf("failed to generate %q from %q - %+v", file.outPath, file.tmplPath, err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
//go:build oniguruma
|
||||||
// +build oniguruma
|
// +build oniguruma
|
||||||
|
|
||||||
package regex
|
package regex
|
||||||
@ -6,10 +7,21 @@ import (
|
|||||||
rubex "github.com/go-enry/go-oniguruma"
|
rubex "github.com/go-enry/go-oniguruma"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
const Name = Oniguruma
|
||||||
|
|
||||||
type EnryRegexp = *rubex.Regexp
|
type EnryRegexp = *rubex.Regexp
|
||||||
|
|
||||||
func MustCompile(str string) EnryRegexp {
|
func MustCompile(s string) EnryRegexp {
|
||||||
return rubex.MustCompileASCII(str)
|
return rubex.MustCompileASCII(s)
|
||||||
|
}
|
||||||
|
|
||||||
|
// MustCompileMultiline matches in multi-line mode by default with Oniguruma.
|
||||||
|
func MustCompileMultiline(s string) EnryRegexp {
|
||||||
|
return MustCompile(s)
|
||||||
|
}
|
||||||
|
|
||||||
|
func MustCompileRuby(s string) EnryRegexp {
|
||||||
|
return MustCompile(s)
|
||||||
}
|
}
|
||||||
|
|
||||||
func QuoteMeta(s string) string {
|
func QuoteMeta(s string) string {
|
||||||
|
9
regex/regex.go
Normal file
9
regex/regex.go
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
package regex
|
||||||
|
|
||||||
|
// Package regex abstracts regular expression engine
|
||||||
|
// that can be chosen at compile-time by a build tag.
|
||||||
|
|
||||||
|
const (
|
||||||
|
RE2 = "RE2"
|
||||||
|
Oniguruma = "Oniguruma"
|
||||||
|
)
|
@ -1,3 +1,4 @@
|
|||||||
|
//go:build !oniguruma
|
||||||
// +build !oniguruma
|
// +build !oniguruma
|
||||||
|
|
||||||
package regex
|
package regex
|
||||||
@ -6,12 +7,32 @@ import (
|
|||||||
"regexp"
|
"regexp"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
const Name = RE2
|
||||||
|
|
||||||
type EnryRegexp = *regexp.Regexp
|
type EnryRegexp = *regexp.Regexp
|
||||||
|
|
||||||
func MustCompile(str string) EnryRegexp {
|
func MustCompile(str string) EnryRegexp {
|
||||||
return regexp.MustCompile(str)
|
return regexp.MustCompile(str)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// MustCompileMultiline mimics Ruby defaults for regexp, where ^$ matches begin/end of line.
|
||||||
|
// I.e. it converts Ruby regexp syntaxt to RE2 equivalent
|
||||||
|
func MustCompileMultiline(s string) EnryRegexp {
|
||||||
|
const multilineModeFlag = "(?m)"
|
||||||
|
return regexp.MustCompile(multilineModeFlag + s)
|
||||||
|
}
|
||||||
|
|
||||||
|
// MustCompileRuby used for expressions with syntax not supported by RE2.
|
||||||
|
// Now it's confusing as we use the result as [data/rule.Matcher] and
|
||||||
|
//
|
||||||
|
// (*Matcher)(nil) != nil
|
||||||
|
//
|
||||||
|
// What is a better way for an expression to indicate unsupported syntax?
|
||||||
|
// e.g. add .IsValidSyntax() to both, Matcher interface and EnryRegexp implementations?
|
||||||
|
func MustCompileRuby(s string) EnryRegexp {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
func QuoteMeta(s string) string {
|
func QuoteMeta(s string) string {
|
||||||
return regexp.QuoteMeta(s)
|
return regexp.QuoteMeta(s)
|
||||||
}
|
}
|
||||||
|
27
regex/standard_test.go
Normal file
27
regex/standard_test.go
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
//go:build !oniguruma
|
||||||
|
// +build !oniguruma
|
||||||
|
|
||||||
|
package regex
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestMustCompileMultiline(t *testing.T) {
|
||||||
|
const re = `^\.(.*)!$`
|
||||||
|
want := MustCompileMultiline(re)
|
||||||
|
assert.Equal(t, "(?m)"+re, want.String())
|
||||||
|
|
||||||
|
const s = `.one
|
||||||
|
.two!
|
||||||
|
thre!`
|
||||||
|
if !want.MatchString(s) {
|
||||||
|
t.Fatalf("MustCompileMultiline(`%s`) must match multiline %q\n", re, s)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestMustCompileRuby(t *testing.T) {
|
||||||
|
assert.Nil(t, MustCompileRuby(``))
|
||||||
|
}
|
14
utils.go
14
utils.go
@ -63,7 +63,21 @@ func IsDotFile(path string) bool {
|
|||||||
|
|
||||||
// IsVendor returns whether or not path is a vendor path.
|
// IsVendor returns whether or not path is a vendor path.
|
||||||
func IsVendor(path string) bool {
|
func IsVendor(path string) bool {
|
||||||
|
// fast path: single collatated regex, if the engine supports its syntax
|
||||||
|
if data.FastVendorMatcher != nil {
|
||||||
return data.FastVendorMatcher.MatchString(path)
|
return data.FastVendorMatcher.MatchString(path)
|
||||||
|
}
|
||||||
|
|
||||||
|
// slow path: skip individual rules with unsupported syntax
|
||||||
|
for _, matcher := range data.VendorMatchers {
|
||||||
|
if matcher == nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if matcher.MatchString(path) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
// IsTest returns whether or not path is a test path.
|
// IsTest returns whether or not path is a test path.
|
||||||
|
@ -7,57 +7,62 @@ import (
|
|||||||
"path/filepath"
|
"path/filepath"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
|
"github.com/go-enry/go-enry/v2/regex"
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
)
|
)
|
||||||
|
|
||||||
//TODO(bzz): port all from test/test_file_blob.rb test_vendored()
|
// TODO(bzz): port all from test/test_file_blob.rb test_vendored()
|
||||||
//https://github.com/github/linguist/blob/86adc140d3e8903980565a2984f5532edf4ae875/test/test_file_blob.rb#L270-L583
|
// https://github.com/github/linguist/blob/86adc140d3e8903980565a2984f5532edf4ae875/test/test_file_blob.rb#L270-L583
|
||||||
var vendorTests = []struct {
|
var vendorTests = []struct {
|
||||||
|
skipOnRE2 bool // some rules are (present in code but) missing at runtime on RE2
|
||||||
path string
|
path string
|
||||||
expected bool
|
expected bool
|
||||||
}{
|
}{
|
||||||
{"cache/", true},
|
{path: "cache/", expected: true},
|
||||||
{"something_cache/", false},
|
{false, "something_cache/", false},
|
||||||
{"random/cache/", true},
|
{false, "random/cache/", true},
|
||||||
{"cache", false},
|
{false, "cache", false},
|
||||||
{"dependencies/", true},
|
{false, "dependencies/", true},
|
||||||
{"Dependencies/", true},
|
{false, "Dependencies/", true},
|
||||||
{"dependency/", false},
|
{false, "dependency/", false},
|
||||||
{"dist/", true},
|
{false, "dist/", true},
|
||||||
{"dist", false},
|
{false, "dist", false},
|
||||||
{"random/dist/", true},
|
{false, "random/dist/", true},
|
||||||
{"random/dist", false},
|
{false, "random/dist", false},
|
||||||
{"deps/", true},
|
{false, "deps/", true},
|
||||||
{"foodeps/", false},
|
{false, "foodeps/", false},
|
||||||
{"configure", true},
|
{false, "configure", true},
|
||||||
{"a/configure", true},
|
{false, "a/configure", true},
|
||||||
{"config.guess", true},
|
{false, "config.guess", true},
|
||||||
{"config.guess/", false},
|
{false, "config.guess/", false},
|
||||||
{".vscode/", true},
|
{false, ".vscode/", true},
|
||||||
{"doc/_build/", true},
|
{false, "doc/_build/", true},
|
||||||
{"a/docs/_build/", true},
|
{false, "a/docs/_build/", true},
|
||||||
{"a/dasdocs/_build-vsdoc.js", true},
|
{false, "a/dasdocs/_build-vsdoc.js", true},
|
||||||
{"a/dasdocs/_build-vsdoc.j", false},
|
{false, "a/dasdocs/_build-vsdoc.j", false},
|
||||||
{"foo/bar", false},
|
{false, "foo/bar", false},
|
||||||
{".sublime-project", true},
|
{false, ".sublime-project", true},
|
||||||
{"foo/vendor/foo", true},
|
{false, "foo/vendor/foo", true},
|
||||||
{"leaflet.draw-src.js", true},
|
{false, "leaflet.draw-src.js", true},
|
||||||
{"foo/bar/MochiKit.js", true},
|
{false, "foo/bar/MochiKit.js", true},
|
||||||
{"foo/bar/dojo.js", true},
|
{false, "foo/bar/dojo.js", true},
|
||||||
{"foo/env/whatever", true},
|
{false, "foo/env/whatever", true},
|
||||||
{"some/python/venv/", false},
|
{false, "some/python/venv/", false},
|
||||||
{"foo/.imageset/bar", true},
|
{false, "foo/.imageset/bar", true},
|
||||||
{"Vagrantfile", true},
|
{false, "Vagrantfile", true},
|
||||||
{"src/bootstrap-custom.js", true},
|
{true, "src/bootstrap-custom.js", true},
|
||||||
// {"/css/bootstrap.rtl.css", true}, // from linguist v7.23
|
// {true, "/css/bootstrap.rtl.css", true}, // from linguist v7.23
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestIsVendor(t *testing.T) {
|
func TestIsVendor(t *testing.T) {
|
||||||
for _, tt := range vendorTests {
|
for _, test := range vendorTests {
|
||||||
t.Run(tt.path, func(t *testing.T) {
|
t.Run(test.path, func(t *testing.T) {
|
||||||
if got := IsVendor(tt.path); got != tt.expected {
|
if got := IsVendor(test.path); got != test.expected {
|
||||||
t.Errorf("IsVendor(%q) = %v, expected %v", tt.path, got, tt.expected)
|
if regex.Name == regex.RE2 && test.skipOnRE2 {
|
||||||
|
return // skip
|
||||||
|
}
|
||||||
|
t.Errorf("IsVendor(%q) = %v, expected %v (usuing %s)", test.path, got, test.expected, regex.Name)
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user