code-gen: syntax-aware generation of vendor regex

This commit is contained in:
Alex Bezzubov 2022-12-25 11:58:23 +01:00
parent 3aeb9879da
commit 319e630aaf
4 changed files with 18 additions and 3 deletions

View File

@ -3,8 +3,12 @@ package data
import "github.com/go-enry/go-enry/v2/regex"
var VendorMatchers = []regex.EnryRegexp{
{{range $regexp := . -}}
regex.MustCompile(`{{ $regexp }}`),
{{range $re := . -}}
{{ if isRE2 $re -}}
regex.MustCompile({{ $re | stringVal }}),
{{- else -}}
regex.MustCompileRuby({{ $re | stringVal }}),
{{ end }}
{{end -}}
}

View File

@ -45,6 +45,7 @@ func executeTemplate(w io.Writer, name, path, commit string, fmap template.FuncM
}
fmap["getCommit"] = getCommit
fmap["stringVal"] = stringVal
fmap["isRE2"] = isRE2
const headerTmpl = "header.go.tmpl"
headerPath := filepath.Join(filepath.Dir(path), headerTmpl)

View File

@ -90,7 +90,7 @@ func loadRule(namedPatterns map[string]StringArray, rule *Rule) *LanguagePattern
}
if !isRE2(result.Pattern) {
log.Printf("RE2 incompatible rule: language:'%s', rule:'%s'\n", rule.Languages, result.Pattern)
log.Printf("RE2 incompatible syntax for heuristic language:'%s', rule:'%s'\n", rule.Languages, result.Pattern)
}
return result
}

View File

@ -5,6 +5,7 @@ import (
"fmt"
"io"
"io/ioutil"
"log"
"sort"
"strings"
"text/template"
@ -25,6 +26,12 @@ func Vendor(fileToParse, samplesDir, outPath, tmplPath, tmplName, commit string)
return fmt.Errorf("failed to parse YAML %s, %q", fileToParse, err)
}
for _, re := range regexps {
if !isRE2(re) {
log.Printf("RE2 incompatible syntax for vendor:'%s'\n", re)
}
}
buf := &bytes.Buffer{}
if err := executeVendorTemplate(buf, regexps, tmplPath, tmplName, commit); err != nil {
return err
@ -84,6 +91,9 @@ func collateAllMatchers(regexps []string) string {
var caretPrefixed, caretOrSlashPrefixed, theRest []string
// Check prefix, add to the respective group slices
for _, re := range regexps {
if !isRE2(re) {
continue
}
if strings.HasPrefix(re, caret) {
caretPrefixed = append(caretPrefixed, re[len(caret):])
} else if strings.HasPrefix(re, caretOrSlash) {