mirror of
https://github.com/ralsina/tartrazine.git
synced 2024-09-20 07:21:22 +00:00
heuristics regexp engine configurable #3, adapt IsVendor optimization & tests
Regex collation optimization for IsVendor now fails gracefully. Tests that are affected by non-RE2 syntax are explicitly marked.
This commit is contained in:
parent
8df9e1ecf2
commit
8246efecce
@ -2,15 +2,21 @@ package data
|
|||||||
|
|
||||||
import "github.com/go-enry/go-enry/v2/regex"
|
import "github.com/go-enry/go-enry/v2/regex"
|
||||||
|
|
||||||
|
{{define "mustCompile" -}}
|
||||||
|
{{ if isRE2 . -}}
|
||||||
|
regex.MustCompile({{ . | stringVal }})
|
||||||
|
{{- else -}}
|
||||||
|
regex.MustCompileRuby({{ . | stringVal }})
|
||||||
|
{{- end -}}
|
||||||
|
{{end}}
|
||||||
|
|
||||||
var VendorMatchers = []regex.EnryRegexp{
|
var VendorMatchers = []regex.EnryRegexp{
|
||||||
{{range $re := . -}}
|
{{range $re := . -}}
|
||||||
{{ if isRE2 $re -}}
|
{{ template "mustCompile" $re }},
|
||||||
regex.MustCompile({{ $re | stringVal }}),
|
|
||||||
{{- else -}}
|
|
||||||
regex.MustCompileRuby({{ $re | stringVal }}),
|
|
||||||
{{ end }}
|
|
||||||
{{end -}}
|
{{end -}}
|
||||||
}
|
}
|
||||||
|
|
||||||
// FastVendorMatcher is equivalent to matching any of the VendorMatchers.
|
// FastVendorMatcher is equivalent to matching any of the VendorMatchers.
|
||||||
var FastVendorMatcher = regex.MustCompile(`{{ optimize . }}`)
|
{{with $singleRE := collateAllRegexps . -}}
|
||||||
|
var FastVendorMatcher = {{template "mustCompile" $singleRE}}
|
||||||
|
{{end}}
|
@ -41,34 +41,14 @@ func Vendor(fileToParse, samplesDir, outPath, tmplPath, tmplName, commit string)
|
|||||||
}
|
}
|
||||||
|
|
||||||
func executeVendorTemplate(out io.Writer, regexps []string, tmplPath, tmplName, commit string) error {
|
func executeVendorTemplate(out io.Writer, regexps []string, tmplPath, tmplName, commit string) error {
|
||||||
funcs := template.FuncMap{"optimize": collateAllMatchers}
|
funcs := template.FuncMap{"collateAllRegexps": collateAllRegexps}
|
||||||
return executeTemplate(out, tmplName, tmplPath, commit, funcs, regexps)
|
return executeTemplate(out, tmplName, tmplPath, commit, funcs, regexps)
|
||||||
}
|
}
|
||||||
|
|
||||||
func collateAllMatchers(regexps []string) string {
|
// collateAllRegexps all regexps to a single large regexp.
|
||||||
// We now collate all regexps from VendorMatchers to a single large regexp
|
func collateAllRegexps(regexps []string) string {
|
||||||
// which is at least twice as fast to test than simply iterating & matching.
|
// which is at least twice as fast to test than simply iterating & matching.
|
||||||
//
|
//
|
||||||
// ---
|
|
||||||
//
|
|
||||||
// We could test each matcher from VendorMatchers in turn i.e.
|
|
||||||
//
|
|
||||||
// func IsVendor(filename string) bool {
|
|
||||||
// for _, matcher := range data.VendorMatchers {
|
|
||||||
// if matcher.MatchString(filename) {
|
|
||||||
// return true
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// return false
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// Or naïvely concatentate all these regexps using groups i.e.
|
|
||||||
//
|
|
||||||
// `(regexp1)|(regexp2)|(regexp3)|...`
|
|
||||||
//
|
|
||||||
// However, both of these are relatively slow and don't take advantage
|
|
||||||
// of the inherent structure within our regexps.
|
|
||||||
//
|
|
||||||
// Imperical observation: by looking at the regexps, we only have 3 types.
|
// Imperical observation: by looking at the regexps, we only have 3 types.
|
||||||
// 1. Those that start with `^`
|
// 1. Those that start with `^`
|
||||||
// 2. Those that start with `(^|/)`
|
// 2. Those that start with `(^|/)`
|
||||||
@ -88,12 +68,9 @@ func collateAllMatchers(regexps []string) string {
|
|||||||
|
|
||||||
sort.Strings(regexps)
|
sort.Strings(regexps)
|
||||||
|
|
||||||
|
// Check prefix, group expressions
|
||||||
var caretPrefixed, caretOrSlashPrefixed, theRest []string
|
var caretPrefixed, caretOrSlashPrefixed, theRest []string
|
||||||
// Check prefix, add to the respective group slices
|
|
||||||
for _, re := range regexps {
|
for _, re := range regexps {
|
||||||
if !isRE2(re) {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if strings.HasPrefix(re, caret) {
|
if strings.HasPrefix(re, caret) {
|
||||||
caretPrefixed = append(caretPrefixed, re[len(caret):])
|
caretPrefixed = append(caretPrefixed, re[len(caret):])
|
||||||
} else if strings.HasPrefix(re, caretOrSlash) {
|
} else if strings.HasPrefix(re, caretOrSlash) {
|
||||||
@ -102,6 +79,7 @@ func collateAllMatchers(regexps []string) string {
|
|||||||
theRest = append(theRest, re)
|
theRest = append(theRest, re)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
var sb strings.Builder
|
var sb strings.Builder
|
||||||
appendGroupWithCommonPrefix(&sb, "^", caretPrefixed)
|
appendGroupWithCommonPrefix(&sb, "^", caretPrefixed)
|
||||||
sb.WriteString("|")
|
sb.WriteString("|")
|
||||||
|
16
utils.go
16
utils.go
@ -63,7 +63,21 @@ func IsDotFile(path string) bool {
|
|||||||
|
|
||||||
// IsVendor returns whether or not path is a vendor path.
|
// IsVendor returns whether or not path is a vendor path.
|
||||||
func IsVendor(path string) bool {
|
func IsVendor(path string) bool {
|
||||||
return data.FastVendorMatcher.MatchString(path)
|
// fast path: single collatated regex, if the engine supports its syntax
|
||||||
|
if data.FastVendorMatcher != nil {
|
||||||
|
return data.FastVendorMatcher.MatchString(path)
|
||||||
|
}
|
||||||
|
|
||||||
|
// slow path: skip individual rules with unsupported syntax
|
||||||
|
for _, matcher := range data.VendorMatchers {
|
||||||
|
if matcher == nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if matcher.MatchString(path) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
// IsTest returns whether or not path is a test path.
|
// IsTest returns whether or not path is a test path.
|
||||||
|
@ -7,57 +7,62 @@ import (
|
|||||||
"path/filepath"
|
"path/filepath"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
|
"github.com/go-enry/go-enry/v2/regex"
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
)
|
)
|
||||||
|
|
||||||
//TODO(bzz): port all from test/test_file_blob.rb test_vendored()
|
// TODO(bzz): port all from test/test_file_blob.rb test_vendored()
|
||||||
//https://github.com/github/linguist/blob/86adc140d3e8903980565a2984f5532edf4ae875/test/test_file_blob.rb#L270-L583
|
// https://github.com/github/linguist/blob/86adc140d3e8903980565a2984f5532edf4ae875/test/test_file_blob.rb#L270-L583
|
||||||
var vendorTests = []struct {
|
var vendorTests = []struct {
|
||||||
path string
|
skipOnRE2 bool // some rules are (present in code but) missing at runtime on RE2
|
||||||
expected bool
|
path string
|
||||||
|
expected bool
|
||||||
}{
|
}{
|
||||||
{"cache/", true},
|
{path: "cache/", expected: true},
|
||||||
{"something_cache/", false},
|
{false, "something_cache/", false},
|
||||||
{"random/cache/", true},
|
{false, "random/cache/", true},
|
||||||
{"cache", false},
|
{false, "cache", false},
|
||||||
{"dependencies/", true},
|
{false, "dependencies/", true},
|
||||||
{"Dependencies/", true},
|
{false, "Dependencies/", true},
|
||||||
{"dependency/", false},
|
{false, "dependency/", false},
|
||||||
{"dist/", true},
|
{false, "dist/", true},
|
||||||
{"dist", false},
|
{false, "dist", false},
|
||||||
{"random/dist/", true},
|
{false, "random/dist/", true},
|
||||||
{"random/dist", false},
|
{false, "random/dist", false},
|
||||||
{"deps/", true},
|
{false, "deps/", true},
|
||||||
{"foodeps/", false},
|
{false, "foodeps/", false},
|
||||||
{"configure", true},
|
{false, "configure", true},
|
||||||
{"a/configure", true},
|
{false, "a/configure", true},
|
||||||
{"config.guess", true},
|
{false, "config.guess", true},
|
||||||
{"config.guess/", false},
|
{false, "config.guess/", false},
|
||||||
{".vscode/", true},
|
{false, ".vscode/", true},
|
||||||
{"doc/_build/", true},
|
{false, "doc/_build/", true},
|
||||||
{"a/docs/_build/", true},
|
{false, "a/docs/_build/", true},
|
||||||
{"a/dasdocs/_build-vsdoc.js", true},
|
{false, "a/dasdocs/_build-vsdoc.js", true},
|
||||||
{"a/dasdocs/_build-vsdoc.j", false},
|
{false, "a/dasdocs/_build-vsdoc.j", false},
|
||||||
{"foo/bar", false},
|
{false, "foo/bar", false},
|
||||||
{".sublime-project", true},
|
{false, ".sublime-project", true},
|
||||||
{"foo/vendor/foo", true},
|
{false, "foo/vendor/foo", true},
|
||||||
{"leaflet.draw-src.js", true},
|
{false, "leaflet.draw-src.js", true},
|
||||||
{"foo/bar/MochiKit.js", true},
|
{false, "foo/bar/MochiKit.js", true},
|
||||||
{"foo/bar/dojo.js", true},
|
{false, "foo/bar/dojo.js", true},
|
||||||
{"foo/env/whatever", true},
|
{false, "foo/env/whatever", true},
|
||||||
{"some/python/venv/", false},
|
{false, "some/python/venv/", false},
|
||||||
{"foo/.imageset/bar", true},
|
{false, "foo/.imageset/bar", true},
|
||||||
{"Vagrantfile", true},
|
{false, "Vagrantfile", true},
|
||||||
{"src/bootstrap-custom.js", true},
|
{true, "src/bootstrap-custom.js", true},
|
||||||
// {"/css/bootstrap.rtl.css", true}, // from linguist v7.23
|
// {true, "/css/bootstrap.rtl.css", true}, // from linguist v7.23
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestIsVendor(t *testing.T) {
|
func TestIsVendor(t *testing.T) {
|
||||||
for _, tt := range vendorTests {
|
for _, test := range vendorTests {
|
||||||
t.Run(tt.path, func(t *testing.T) {
|
t.Run(test.path, func(t *testing.T) {
|
||||||
if got := IsVendor(tt.path); got != tt.expected {
|
if got := IsVendor(test.path); got != test.expected {
|
||||||
t.Errorf("IsVendor(%q) = %v, expected %v", tt.path, got, tt.expected)
|
if regex.Name == regex.RE2 && test.skipOnRE2 {
|
||||||
|
return // skip
|
||||||
|
}
|
||||||
|
t.Errorf("IsVendor(%q) = %v, expected %v (usuing %s)", test.path, got, test.expected, regex.Name)
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user