mirror of
https://github.com/ralsina/tartrazine.git
synced 2025-08-08 09:07:54 +00:00
IsVendor: move RE collation to code generation phase
test plan: * go test -run '^TestIsVendor$' github.com/go-enry/go-enry/v2
This commit is contained in:
@@ -7,3 +7,6 @@ var VendorMatchers = []regex.EnryRegexp{
|
|||||||
regex.MustCompile(`{{ $regexp }}`),
|
regex.MustCompile(`{{ $regexp }}`),
|
||||||
{{end -}}
|
{{end -}}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// FastVendorMatcher is equivalent to matching any of the VendorMatchers.
|
||||||
|
var FastVendorMatcher = regex.MustCompile(`{{ optimize . }}`)
|
@@ -2,8 +2,12 @@ package generator
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
|
"sort"
|
||||||
|
"strings"
|
||||||
|
"text/template"
|
||||||
|
|
||||||
"gopkg.in/yaml.v2"
|
"gopkg.in/yaml.v2"
|
||||||
)
|
)
|
||||||
@@ -16,19 +20,97 @@ func Vendor(fileToParse, samplesDir, outPath, tmplPath, tmplName, commit string)
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
var regexpList []string
|
var regexps []string
|
||||||
if err := yaml.Unmarshal(data, ®expList); err != nil {
|
if err := yaml.Unmarshal(data, ®exps); err != nil {
|
||||||
return nil
|
return fmt.Errorf("failed to parse YAML %s, %q", fileToParse, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
buf := &bytes.Buffer{}
|
buf := &bytes.Buffer{}
|
||||||
if err := executeVendorTemplate(buf, regexpList, tmplPath, tmplName, commit); err != nil {
|
if err := executeVendorTemplate(buf, regexps, tmplPath, tmplName, commit); err != nil {
|
||||||
return nil
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
return formatedWrite(outPath, buf.Bytes())
|
return formatedWrite(outPath, buf.Bytes())
|
||||||
}
|
}
|
||||||
|
|
||||||
func executeVendorTemplate(out io.Writer, regexpList []string, tmplPath, tmplName, commit string) error {
|
func executeVendorTemplate(out io.Writer, regexps []string, tmplPath, tmplName, commit string) error {
|
||||||
return executeTemplate(out, tmplName, tmplPath, commit, nil, regexpList)
|
funcs := template.FuncMap{"optimize": collateAllMatchers}
|
||||||
|
return executeTemplate(out, tmplName, tmplPath, commit, funcs, regexps)
|
||||||
|
}
|
||||||
|
|
||||||
|
func collateAllMatchers(regexps []string) string {
|
||||||
|
// We now collate all regexps from VendorMatchers to a single large regexp
|
||||||
|
// which is at least twice as fast to test than simply iterating & matching.
|
||||||
|
//
|
||||||
|
// ---
|
||||||
|
//
|
||||||
|
// We could test each matcher from VendorMatchers in turn i.e.
|
||||||
|
//
|
||||||
|
// func IsVendor(filename string) bool {
|
||||||
|
// for _, matcher := range data.VendorMatchers {
|
||||||
|
// if matcher.MatchString(filename) {
|
||||||
|
// return true
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// return false
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// Or naïvely concatentate all these regexps using groups i.e.
|
||||||
|
//
|
||||||
|
// `(regexp1)|(regexp2)|(regexp3)|...`
|
||||||
|
//
|
||||||
|
// However, both of these are relatively slow and don't take advantage
|
||||||
|
// of the inherent structure within our regexps.
|
||||||
|
//
|
||||||
|
// Imperical observation: by looking at the regexps, we only have 3 types.
|
||||||
|
// 1. Those that start with `^`
|
||||||
|
// 2. Those that start with `(^|/)`
|
||||||
|
// 3. All the rest
|
||||||
|
//
|
||||||
|
// If we collate our regexps into these 3 groups - that will significantly
|
||||||
|
// reduce the likelihood of backtracking within the regexp trie matcher.
|
||||||
|
//
|
||||||
|
// A further improvement is to use non-capturing groups (?:) as otherwise
|
||||||
|
// the regexp parser, whilst matching, will have to allocate slices for
|
||||||
|
// matching positions. (A future improvement left out could be to
|
||||||
|
// enforce non-capturing groups within the sub-regexps.)
|
||||||
|
const (
|
||||||
|
caret = "^"
|
||||||
|
caretOrSlash = "(^|/)"
|
||||||
|
)
|
||||||
|
|
||||||
|
sort.Strings(regexps)
|
||||||
|
|
||||||
|
var caretPrefixed, caretOrSlashPrefixed, theRest []string
|
||||||
|
// Check prefix, add to the respective group slices
|
||||||
|
for _, re := range regexps {
|
||||||
|
if strings.HasPrefix(re, caret) {
|
||||||
|
caretPrefixed = append(caretPrefixed, re[len(caret):])
|
||||||
|
} else if strings.HasPrefix(re, caretOrSlash) {
|
||||||
|
caretOrSlashPrefixed = append(caretOrSlashPrefixed, re[len(caretOrSlash):])
|
||||||
|
} else {
|
||||||
|
theRest = append(theRest, re)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
var sb strings.Builder
|
||||||
|
appendGroupWithCommonPrefix(&sb, "^", caretPrefixed)
|
||||||
|
sb.WriteString("|")
|
||||||
|
|
||||||
|
appendGroupWithCommonPrefix(&sb, "(?:^|/)", caretOrSlashPrefixed)
|
||||||
|
sb.WriteString("|")
|
||||||
|
|
||||||
|
appendGroupWithCommonPrefix(&sb, "", theRest)
|
||||||
|
return sb.String()
|
||||||
|
}
|
||||||
|
|
||||||
|
func appendGroupWithCommonPrefix(sb *strings.Builder, commonPrefix string, res []string) {
|
||||||
|
sb.WriteString("(?:")
|
||||||
|
if commonPrefix != "" {
|
||||||
|
sb.WriteString(fmt.Sprintf("%s(?:(?:", commonPrefix))
|
||||||
|
}
|
||||||
|
sb.WriteString(strings.Join(res, ")|(?:"))
|
||||||
|
if commonPrefix != "" {
|
||||||
|
sb.WriteString("))")
|
||||||
|
}
|
||||||
|
sb.WriteString(")")
|
||||||
}
|
}
|
||||||
|
84
utils.go
84
utils.go
@@ -2,9 +2,7 @@ package enry
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"fmt"
|
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"sort"
|
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/go-enry/go-enry/v2/data"
|
"github.com/go-enry/go-enry/v2/data"
|
||||||
@@ -63,89 +61,9 @@ func IsDotFile(path string) bool {
|
|||||||
return strings.HasPrefix(base, ".") && base != "."
|
return strings.HasPrefix(base, ".") && base != "."
|
||||||
}
|
}
|
||||||
|
|
||||||
var allVendorRegExp regex.EnryRegexp
|
|
||||||
|
|
||||||
// IsVendor returns whether or not path is a vendor path.
|
// IsVendor returns whether or not path is a vendor path.
|
||||||
func IsVendor(path string) bool {
|
func IsVendor(path string) bool {
|
||||||
return allVendorRegExp.MatchString(path)
|
return data.FastVendorMatcher.MatchString(path)
|
||||||
}
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
// We now collate all regexps from VendorMatchers to a single large regexp
|
|
||||||
// which is at least twice as fast to test than simply iterating & matching.
|
|
||||||
//
|
|
||||||
// ---
|
|
||||||
//
|
|
||||||
// We could test each matcher from VendorMatchers in turn i.e.
|
|
||||||
//
|
|
||||||
// func IsVendor(filename string) bool {
|
|
||||||
// for _, matcher := range data.VendorMatchers {
|
|
||||||
// if matcher.MatchString(filename) {
|
|
||||||
// return true
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// return false
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// Or naïvely concatentate all these regexps using groups i.e.
|
|
||||||
//
|
|
||||||
// `(regexp1)|(regexp2)|(regexp3)|...`
|
|
||||||
//
|
|
||||||
// However, both of these are relatively slow and don't take advantage
|
|
||||||
// of the inherent structure within our regexps.
|
|
||||||
//
|
|
||||||
// Imperical observation: by looking at the regexps, we only have 3 types.
|
|
||||||
// 1. Those that start with `^`
|
|
||||||
// 2. Those that start with `(^|/)`
|
|
||||||
// 3. All the rest
|
|
||||||
//
|
|
||||||
// If we collate our regexps into these 3 groups - that will significantly
|
|
||||||
// reduce the likelihood of backtracking within the regexp trie matcher.
|
|
||||||
//
|
|
||||||
// A further improvement is to use non-capturing groups (?:) as otherwise
|
|
||||||
// the regexp parser, whilst matching, will have to allocate slices for
|
|
||||||
// matching positions. (A future improvement left out could be to
|
|
||||||
// enforce non-capturing groups within the sub-regexps.)
|
|
||||||
|
|
||||||
matchers := data.VendorMatchers
|
|
||||||
sort.SliceStable(matchers, func(i, j int) bool {
|
|
||||||
return matchers[i].String() < matchers[j].String()
|
|
||||||
})
|
|
||||||
|
|
||||||
var caretPrefixed, caretOrSlashPrefixed, theRest []string
|
|
||||||
// Check prefix, add to the respective group slices
|
|
||||||
for _, matcher := range matchers {
|
|
||||||
str := matcher.String()
|
|
||||||
if strings.HasPrefix(str, "^") {
|
|
||||||
caretPrefixed = append(caretPrefixed, str[1:])
|
|
||||||
} else if strings.HasPrefix(str, "(^|/)") {
|
|
||||||
caretOrSlashPrefixed = append(caretOrSlashPrefixed, str[5:])
|
|
||||||
} else {
|
|
||||||
theRest = append(theRest, str)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
var sb strings.Builder
|
|
||||||
// group 1 - start with `^`
|
|
||||||
appendGroupWithCommonPrefix(&sb, "^", caretPrefixed)
|
|
||||||
sb.WriteString("|")
|
|
||||||
// group 2 - start with `(^|/)`
|
|
||||||
appendGroupWithCommonPrefix(&sb, "(?:^|/)", caretOrSlashPrefixed)
|
|
||||||
sb.WriteString("|")
|
|
||||||
// grou 3, all rest.
|
|
||||||
appendGroupWithCommonPrefix(&sb, "", theRest)
|
|
||||||
allVendorRegExp = regex.MustCompile(sb.String())
|
|
||||||
}
|
|
||||||
|
|
||||||
func appendGroupWithCommonPrefix(sb *strings.Builder, commonPrefix string, res []string) {
|
|
||||||
sb.WriteString("(?:")
|
|
||||||
if commonPrefix != "" {
|
|
||||||
sb.WriteString(fmt.Sprintf("%s(?:(?:", commonPrefix))
|
|
||||||
}
|
|
||||||
sb.WriteString(strings.Join(res, ")|(?:"))
|
|
||||||
if commonPrefix != "" {
|
|
||||||
sb.WriteString("))")
|
|
||||||
}
|
|
||||||
sb.WriteString(")")
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// IsTest returns whether or not path is a test path.
|
// IsTest returns whether or not path is a test path.
|
||||||
|
Reference in New Issue
Block a user