Merge pull request #143 from go-enry/re-collation-at-codegen

Move venrod RE collation at codegen
This commit is contained in:
Alex
2022-12-02 10:11:39 +01:00
committed by GitHub
5 changed files with 99 additions and 90 deletions

View File

@ -173,3 +173,6 @@ var VendorMatchers = []regex.EnryRegexp{
regex.MustCompile(`(^|/)\.gitpod\.Dockerfile$`),
regex.MustCompile(`(^|/)\.github/`),
}
// FastVendorMatcher is equivalent to matching any of the VendorMatchers.
var FastVendorMatcher = regex.MustCompile(`(?:^(?:(?:[Dd]ependencies/)|(?:debian/)|(?:deps/)|(?:rebar$)))|(?:(?:^|/)(?:(?:BuddyBuildSDK\.framework/)|(?:Carthage/)|(?:Chart\.js$)|(?:Control\.FullScreen\.css)|(?:Control\.FullScreen\.js)|(?:Crashlytics\.framework/)|(?:Fabric\.framework/)|(?:Godeps/_workspace/)|(?:Jenkinsfile$)|(?:Leaflet\.Coordinates-\d+\.\d+\.\d+\.src\.js$)|(?:MathJax/)|(?:MochiKit\.js$)|(?:RealmSwift\.framework)|(?:Realm\.framework)|(?:Sparkle/)|(?:Vagrantfile$)|(?:[Bb]ourbon/.*\.(css|less|scss|styl)$)|(?:[Cc]ode[Mm]irror/(\d+\.\d+/)?(lib|mode|theme|addon|keymap|demo))|(?:[Ee]xtern(als?)?/)|(?:[Mm]icrosoft([Mm]vc)?([Aa]jax|[Vv]alidation)(\.debug)?\.js$)|(?:[Pp]ackages\/.+\.\d+\/)|(?:[Ss]pecs?/fixtures/)|(?:[Tt]ests?/fixtures/)|(?:[Vv]+endor/)|(?:\.[Dd][Ss]_[Ss]tore$)|(?:\.gitattributes$)|(?:\.github/)|(?:\.gitignore$)|(?:\.gitmodules$)|(?:\.gitpod\.Dockerfile$)|(?:\.google_apis/)|(?:\.indent\.pro)|(?:\.mvn/wrapper/)|(?:\.osx$)|(?:\.sublime-project)|(?:\.sublime-workspace)|(?:\.vscode/)|(?:\.yarn/plugins/)|(?:\.yarn/releases/)|(?:\.yarn/sdks/)|(?:\.yarn/unplugged/)|(?:\.yarn/versions/)|(?:_esy$)|(?:ace-builds/)|(?:aclocal\.m4)|(?:activator$)|(?:activator\.bat$)|(?:admin_media/)|(?:angular([^.]*)\.js$)|(?:animate\.(css|less|scss|styl)$)|(?:bootbox\.js)|(?:bootstrap([^/.]*)\.(js|css|less|scss|styl)$)|(?:bootstrap-datepicker/)|(?:bower_components/)|(?:bulma\.(css|sass|scss)$)|(?:cache/)|(?:ckeditor\.js$)|(?:config\.guess$)|(?:config\.sub$)|(?:configure$)|(?:controls\.js$)|(?:cordova([^.]*)\.js$)|(?:cordova\-\d\.\d(\.\d)?\.js$)|(?:cpplint\.py)|(?:custom\.bootstrap([^\s]*)(js|css|less|scss|styl)$)|(?:dist/)|(?:docs?/_?(build|themes?|templates?|static)/)|(?:dojo\.js$)|(?:dotnet-install\.(ps1|sh)$)|(?:dragdrop\.js$)|(?:effects\.js$)|(?:env/)|(?:erlang\.mk)|(?:extjs/.*?\.html$)|(?:extjs/.*?\.js$)|(?:extjs/.*?\.properties$)|(?:extjs/.*?\.txt$)|(?:extjs/.*?\.xml$)|(?:extjs/\.sencha/)|(?:extjs/builds/)|(?:extjs/cmd/)|(?:extjs/docs/)|(?:extjs/examples/)|(?:extjs/locale/)|(?:extjs/packages/)|(?:extjs/plugins/)|(?:extjs/resources/)|(?:extjs/src/)|(?:extjs/welcome/)|(?:fabfile\.py$)|(?:flow-typed/.*\.js$)|(?:font-?awesome/.*\.(css|less|scss|styl)$)|(?:font-?awesome\.(css|less|scss|styl)$)|(?:fontello(.*?)\.css$)|(?:foundation(\..*)?\.js$)|(?:foundation\.(css|less|scss|styl)$)|(?:fuelux\.js)|(?:gradle/wrapper/)|(?:gradlew$)|(?:gradlew\.bat$)|(?:html5shiv\.js$)|(?:inst/extdata/)|(?:jquery([^.]*)\.js$)|(?:jquery([^.]*)\.unobtrusive\-ajax\.js$)|(?:jquery([^.]*)\.validate(\.unobtrusive)?\.js$)|(?:jquery\-\d\.\d+(\.\d+)?\.js$)|(?:jquery\-ui(\-\d\.\d+(\.\d+)?)?(\.\w+)?\.(js|css)$)|(?:jquery\.(ui|effects)\.([^.]*)\.(js|css)$)|(?:jquery\.dataTables\.js)|(?:jquery\.fancybox\.(js|css))|(?:jquery\.fileupload(-\w+)?\.js$)|(?:jquery\.fn\.gantt\.js)|(?:knockout-(\d+\.){3}(debug\.)?js$)|(?:leaflet\.draw-src\.js)|(?:leaflet\.draw\.css)|(?:leaflet\.spin\.js)|(?:libtool\.m4)|(?:ltoptions\.m4)|(?:ltsugar\.m4)|(?:ltversion\.m4)|(?:lt~obsolete\.m4)|(?:materialize\.(css|less|scss|styl|js)$)|(?:modernizr\-\d\.\d+(\.\d+)?\.js$)|(?:modernizr\.custom\.\d+\.js$)|(?:mootools([^.]*)\d+\.\d+.\d+([^.]*)\.js$)|(?:mvnw$)|(?:mvnw\.cmd$)|(?:node_modules/)|(?:normalize\.(css|less|scss|styl)$)|(?:octicons\.css)|(?:pdf\.worker\.js)|(?:proguard-rules\.pro$)|(?:proguard\.pro$)|(?:prototype(.*)\.js$)|(?:puphpet/)|(?:react(-[^.]*)?\.js$)|(?:run\.n$)|(?:select2/.*\.(css|scss|js)$)|(?:shBrush([^.]*)\.js$)|(?:shCore\.js$)|(?:shLegacy\.js$)|(?:skeleton\.(css|less|scss|styl)$)|(?:slick\.\w+.js$)|(?:sprockets-octicons\.scss)|(?:testdata/)|(?:tiny_mce([^.]*)\.js$)|(?:tiny_mce/(langs|plugins|themes|utils))|(?:vendors?/)|(?:vignettes/)|(?:waf$)|(?:wicket-leaflet\.js)|(?:yahoo-([^.]*)\.js$)|(?:yui([^.]*)\.js$)))|(?:(.*?)\.d\.ts$)|(?:(3rd|[Tt]hird)[-_]?[Pp]arty/)|(?:([^\s]*)import\.(css|less|scss|styl)$)|(?:(\.|-)min\.(js|css)$)|(?:(^|\/)d3(\.v\d+)?([^.]*)\.js$)|(?:-vsdoc\.js$)|(?:\.imageset/)|(?:\.intellisense\.js$)|(?:\.xctemplate/)`)

View File

@ -2,8 +2,12 @@ package generator
import (
"bytes"
"fmt"
"io"
"io/ioutil"
"sort"
"strings"
"text/template"
"gopkg.in/yaml.v2"
)
@ -16,19 +20,97 @@ func Vendor(fileToParse, samplesDir, outPath, tmplPath, tmplName, commit string)
return err
}
var regexpList []string
if err := yaml.Unmarshal(data, &regexpList); err != nil {
return nil
var regexps []string
if err := yaml.Unmarshal(data, &regexps); err != nil {
return fmt.Errorf("failed to parse YAML %s, %q", fileToParse, err)
}
buf := &bytes.Buffer{}
if err := executeVendorTemplate(buf, regexpList, tmplPath, tmplName, commit); err != nil {
return nil
if err := executeVendorTemplate(buf, regexps, tmplPath, tmplName, commit); err != nil {
return err
}
return formatedWrite(outPath, buf.Bytes())
}
func executeVendorTemplate(out io.Writer, regexpList []string, tmplPath, tmplName, commit string) error {
return executeTemplate(out, tmplName, tmplPath, commit, nil, regexpList)
func executeVendorTemplate(out io.Writer, regexps []string, tmplPath, tmplName, commit string) error {
funcs := template.FuncMap{"optimize": collateAllMatchers}
return executeTemplate(out, tmplName, tmplPath, commit, funcs, regexps)
}
func collateAllMatchers(regexps []string) string {
// We now collate all regexps from VendorMatchers to a single large regexp
// which is at least twice as fast to test than simply iterating & matching.
//
// ---
//
// We could test each matcher from VendorMatchers in turn i.e.
//
// func IsVendor(filename string) bool {
// for _, matcher := range data.VendorMatchers {
// if matcher.MatchString(filename) {
// return true
// }
// }
// return false
// }
//
// Or naïvely concatentate all these regexps using groups i.e.
//
// `(regexp1)|(regexp2)|(regexp3)|...`
//
// However, both of these are relatively slow and don't take advantage
// of the inherent structure within our regexps.
//
// Imperical observation: by looking at the regexps, we only have 3 types.
// 1. Those that start with `^`
// 2. Those that start with `(^|/)`
// 3. All the rest
//
// If we collate our regexps into these 3 groups - that will significantly
// reduce the likelihood of backtracking within the regexp trie matcher.
//
// A further improvement is to use non-capturing groups (?:) as otherwise
// the regexp parser, whilst matching, will have to allocate slices for
// matching positions. (A future improvement left out could be to
// enforce non-capturing groups within the sub-regexps.)
const (
caret = "^"
caretOrSlash = "(^|/)"
)
sort.Strings(regexps)
var caretPrefixed, caretOrSlashPrefixed, theRest []string
// Check prefix, add to the respective group slices
for _, re := range regexps {
if strings.HasPrefix(re, caret) {
caretPrefixed = append(caretPrefixed, re[len(caret):])
} else if strings.HasPrefix(re, caretOrSlash) {
caretOrSlashPrefixed = append(caretOrSlashPrefixed, re[len(caretOrSlash):])
} else {
theRest = append(theRest, re)
}
}
var sb strings.Builder
appendGroupWithCommonPrefix(&sb, "^", caretPrefixed)
sb.WriteString("|")
appendGroupWithCommonPrefix(&sb, "(?:^|/)", caretOrSlashPrefixed)
sb.WriteString("|")
appendGroupWithCommonPrefix(&sb, "", theRest)
return sb.String()
}
func appendGroupWithCommonPrefix(sb *strings.Builder, commonPrefix string, res []string) {
sb.WriteString("(?:")
if commonPrefix != "" {
sb.WriteString(fmt.Sprintf("%s(?:(?:", commonPrefix))
}
sb.WriteString(strings.Join(res, ")|(?:"))
if commonPrefix != "" {
sb.WriteString("))")
}
sb.WriteString(")")
}