mirror of
https://github.com/ralsina/tartrazine.git
synced 2024-11-10 05:22:23 +00:00
heuristics regexp engine configurable #3, adapt IsVendor optimization & tests
Regex collation optimization for IsVendor now fails gracefully. Tests that are affected by non-RE2 syntax are explicitly marked.
This commit is contained in:
parent
8df9e1ecf2
commit
8246efecce
@ -2,15 +2,21 @@ package data
|
||||
|
||||
import "github.com/go-enry/go-enry/v2/regex"
|
||||
|
||||
{{define "mustCompile" -}}
|
||||
{{ if isRE2 . -}}
|
||||
regex.MustCompile({{ . | stringVal }})
|
||||
{{- else -}}
|
||||
regex.MustCompileRuby({{ . | stringVal }})
|
||||
{{- end -}}
|
||||
{{end}}
|
||||
|
||||
var VendorMatchers = []regex.EnryRegexp{
|
||||
{{range $re := . -}}
|
||||
{{ if isRE2 $re -}}
|
||||
regex.MustCompile({{ $re | stringVal }}),
|
||||
{{- else -}}
|
||||
regex.MustCompileRuby({{ $re | stringVal }}),
|
||||
{{ end }}
|
||||
{{ template "mustCompile" $re }},
|
||||
{{end -}}
|
||||
}
|
||||
|
||||
// FastVendorMatcher is equivalent to matching any of the VendorMatchers.
|
||||
var FastVendorMatcher = regex.MustCompile(`{{ optimize . }}`)
|
||||
{{with $singleRE := collateAllRegexps . -}}
|
||||
var FastVendorMatcher = {{template "mustCompile" $singleRE}}
|
||||
{{end}}
|
@ -41,34 +41,14 @@ func Vendor(fileToParse, samplesDir, outPath, tmplPath, tmplName, commit string)
|
||||
}
|
||||
|
||||
func executeVendorTemplate(out io.Writer, regexps []string, tmplPath, tmplName, commit string) error {
|
||||
funcs := template.FuncMap{"optimize": collateAllMatchers}
|
||||
funcs := template.FuncMap{"collateAllRegexps": collateAllRegexps}
|
||||
return executeTemplate(out, tmplName, tmplPath, commit, funcs, regexps)
|
||||
}
|
||||
|
||||
func collateAllMatchers(regexps []string) string {
|
||||
// We now collate all regexps from VendorMatchers to a single large regexp
|
||||
// collateAllRegexps all regexps to a single large regexp.
|
||||
func collateAllRegexps(regexps []string) string {
|
||||
// which is at least twice as fast to test than simply iterating & matching.
|
||||
//
|
||||
// ---
|
||||
//
|
||||
// We could test each matcher from VendorMatchers in turn i.e.
|
||||
//
|
||||
// func IsVendor(filename string) bool {
|
||||
// for _, matcher := range data.VendorMatchers {
|
||||
// if matcher.MatchString(filename) {
|
||||
// return true
|
||||
// }
|
||||
// }
|
||||
// return false
|
||||
// }
|
||||
//
|
||||
// Or naïvely concatentate all these regexps using groups i.e.
|
||||
//
|
||||
// `(regexp1)|(regexp2)|(regexp3)|...`
|
||||
//
|
||||
// However, both of these are relatively slow and don't take advantage
|
||||
// of the inherent structure within our regexps.
|
||||
//
|
||||
// Imperical observation: by looking at the regexps, we only have 3 types.
|
||||
// 1. Those that start with `^`
|
||||
// 2. Those that start with `(^|/)`
|
||||
@ -88,12 +68,9 @@ func collateAllMatchers(regexps []string) string {
|
||||
|
||||
sort.Strings(regexps)
|
||||
|
||||
// Check prefix, group expressions
|
||||
var caretPrefixed, caretOrSlashPrefixed, theRest []string
|
||||
// Check prefix, add to the respective group slices
|
||||
for _, re := range regexps {
|
||||
if !isRE2(re) {
|
||||
continue
|
||||
}
|
||||
if strings.HasPrefix(re, caret) {
|
||||
caretPrefixed = append(caretPrefixed, re[len(caret):])
|
||||
} else if strings.HasPrefix(re, caretOrSlash) {
|
||||
@ -102,6 +79,7 @@ func collateAllMatchers(regexps []string) string {
|
||||
theRest = append(theRest, re)
|
||||
}
|
||||
}
|
||||
|
||||
var sb strings.Builder
|
||||
appendGroupWithCommonPrefix(&sb, "^", caretPrefixed)
|
||||
sb.WriteString("|")
|
||||
|
16
utils.go
16
utils.go
@ -63,7 +63,21 @@ func IsDotFile(path string) bool {
|
||||
|
||||
// IsVendor returns whether or not path is a vendor path.
|
||||
func IsVendor(path string) bool {
|
||||
return data.FastVendorMatcher.MatchString(path)
|
||||
// fast path: single collatated regex, if the engine supports its syntax
|
||||
if data.FastVendorMatcher != nil {
|
||||
return data.FastVendorMatcher.MatchString(path)
|
||||
}
|
||||
|
||||
// slow path: skip individual rules with unsupported syntax
|
||||
for _, matcher := range data.VendorMatchers {
|
||||
if matcher == nil {
|
||||
continue
|
||||
}
|
||||
if matcher.MatchString(path) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// IsTest returns whether or not path is a test path.
|
||||
|
@ -7,57 +7,62 @@ import (
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/go-enry/go-enry/v2/regex"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
//TODO(bzz): port all from test/test_file_blob.rb test_vendored()
|
||||
//https://github.com/github/linguist/blob/86adc140d3e8903980565a2984f5532edf4ae875/test/test_file_blob.rb#L270-L583
|
||||
// TODO(bzz): port all from test/test_file_blob.rb test_vendored()
|
||||
// https://github.com/github/linguist/blob/86adc140d3e8903980565a2984f5532edf4ae875/test/test_file_blob.rb#L270-L583
|
||||
var vendorTests = []struct {
|
||||
path string
|
||||
expected bool
|
||||
skipOnRE2 bool // some rules are (present in code but) missing at runtime on RE2
|
||||
path string
|
||||
expected bool
|
||||
}{
|
||||
{"cache/", true},
|
||||
{"something_cache/", false},
|
||||
{"random/cache/", true},
|
||||
{"cache", false},
|
||||
{"dependencies/", true},
|
||||
{"Dependencies/", true},
|
||||
{"dependency/", false},
|
||||
{"dist/", true},
|
||||
{"dist", false},
|
||||
{"random/dist/", true},
|
||||
{"random/dist", false},
|
||||
{"deps/", true},
|
||||
{"foodeps/", false},
|
||||
{"configure", true},
|
||||
{"a/configure", true},
|
||||
{"config.guess", true},
|
||||
{"config.guess/", false},
|
||||
{".vscode/", true},
|
||||
{"doc/_build/", true},
|
||||
{"a/docs/_build/", true},
|
||||
{"a/dasdocs/_build-vsdoc.js", true},
|
||||
{"a/dasdocs/_build-vsdoc.j", false},
|
||||
{"foo/bar", false},
|
||||
{".sublime-project", true},
|
||||
{"foo/vendor/foo", true},
|
||||
{"leaflet.draw-src.js", true},
|
||||
{"foo/bar/MochiKit.js", true},
|
||||
{"foo/bar/dojo.js", true},
|
||||
{"foo/env/whatever", true},
|
||||
{"some/python/venv/", false},
|
||||
{"foo/.imageset/bar", true},
|
||||
{"Vagrantfile", true},
|
||||
{"src/bootstrap-custom.js", true},
|
||||
// {"/css/bootstrap.rtl.css", true}, // from linguist v7.23
|
||||
{path: "cache/", expected: true},
|
||||
{false, "something_cache/", false},
|
||||
{false, "random/cache/", true},
|
||||
{false, "cache", false},
|
||||
{false, "dependencies/", true},
|
||||
{false, "Dependencies/", true},
|
||||
{false, "dependency/", false},
|
||||
{false, "dist/", true},
|
||||
{false, "dist", false},
|
||||
{false, "random/dist/", true},
|
||||
{false, "random/dist", false},
|
||||
{false, "deps/", true},
|
||||
{false, "foodeps/", false},
|
||||
{false, "configure", true},
|
||||
{false, "a/configure", true},
|
||||
{false, "config.guess", true},
|
||||
{false, "config.guess/", false},
|
||||
{false, ".vscode/", true},
|
||||
{false, "doc/_build/", true},
|
||||
{false, "a/docs/_build/", true},
|
||||
{false, "a/dasdocs/_build-vsdoc.js", true},
|
||||
{false, "a/dasdocs/_build-vsdoc.j", false},
|
||||
{false, "foo/bar", false},
|
||||
{false, ".sublime-project", true},
|
||||
{false, "foo/vendor/foo", true},
|
||||
{false, "leaflet.draw-src.js", true},
|
||||
{false, "foo/bar/MochiKit.js", true},
|
||||
{false, "foo/bar/dojo.js", true},
|
||||
{false, "foo/env/whatever", true},
|
||||
{false, "some/python/venv/", false},
|
||||
{false, "foo/.imageset/bar", true},
|
||||
{false, "Vagrantfile", true},
|
||||
{true, "src/bootstrap-custom.js", true},
|
||||
// {true, "/css/bootstrap.rtl.css", true}, // from linguist v7.23
|
||||
}
|
||||
|
||||
func TestIsVendor(t *testing.T) {
|
||||
for _, tt := range vendorTests {
|
||||
t.Run(tt.path, func(t *testing.T) {
|
||||
if got := IsVendor(tt.path); got != tt.expected {
|
||||
t.Errorf("IsVendor(%q) = %v, expected %v", tt.path, got, tt.expected)
|
||||
for _, test := range vendorTests {
|
||||
t.Run(test.path, func(t *testing.T) {
|
||||
if got := IsVendor(test.path); got != test.expected {
|
||||
if regex.Name == regex.RE2 && test.skipOnRE2 {
|
||||
return // skip
|
||||
}
|
||||
t.Errorf("IsVendor(%q) = %v, expected %v (usuing %s)", test.path, got, test.expected, regex.Name)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user