code-gen: syntax-aware generation of vendor regex

This commit is contained in:
Alex Bezzubov 2022-12-25 11:58:23 +01:00
parent 3aeb9879da
commit 319e630aaf
4 changed files with 18 additions and 3 deletions

View File

@ -3,8 +3,12 @@ package data
import "github.com/go-enry/go-enry/v2/regex" import "github.com/go-enry/go-enry/v2/regex"
var VendorMatchers = []regex.EnryRegexp{ var VendorMatchers = []regex.EnryRegexp{
{{range $regexp := . -}} {{range $re := . -}}
regex.MustCompile(`{{ $regexp }}`), {{ if isRE2 $re -}}
regex.MustCompile({{ $re | stringVal }}),
{{- else -}}
regex.MustCompileRuby({{ $re | stringVal }}),
{{ end }}
{{end -}} {{end -}}
} }

View File

@ -45,6 +45,7 @@ func executeTemplate(w io.Writer, name, path, commit string, fmap template.FuncM
} }
fmap["getCommit"] = getCommit fmap["getCommit"] = getCommit
fmap["stringVal"] = stringVal fmap["stringVal"] = stringVal
fmap["isRE2"] = isRE2
const headerTmpl = "header.go.tmpl" const headerTmpl = "header.go.tmpl"
headerPath := filepath.Join(filepath.Dir(path), headerTmpl) headerPath := filepath.Join(filepath.Dir(path), headerTmpl)

View File

@ -90,7 +90,7 @@ func loadRule(namedPatterns map[string]StringArray, rule *Rule) *LanguagePattern
} }
if !isRE2(result.Pattern) { if !isRE2(result.Pattern) {
log.Printf("RE2 incompatible rule: language:'%s', rule:'%s'\n", rule.Languages, result.Pattern) log.Printf("RE2 incompatible syntax for heuristic language:'%s', rule:'%s'\n", rule.Languages, result.Pattern)
} }
return result return result
} }

View File

@ -5,6 +5,7 @@ import (
"fmt" "fmt"
"io" "io"
"io/ioutil" "io/ioutil"
"log"
"sort" "sort"
"strings" "strings"
"text/template" "text/template"
@ -25,6 +26,12 @@ func Vendor(fileToParse, samplesDir, outPath, tmplPath, tmplName, commit string)
return fmt.Errorf("failed to parse YAML %s, %q", fileToParse, err) return fmt.Errorf("failed to parse YAML %s, %q", fileToParse, err)
} }
for _, re := range regexps {
if !isRE2(re) {
log.Printf("RE2 incompatible syntax for vendor:'%s'\n", re)
}
}
buf := &bytes.Buffer{} buf := &bytes.Buffer{}
if err := executeVendorTemplate(buf, regexps, tmplPath, tmplName, commit); err != nil { if err := executeVendorTemplate(buf, regexps, tmplPath, tmplName, commit); err != nil {
return err return err
@ -84,6 +91,9 @@ func collateAllMatchers(regexps []string) string {
var caretPrefixed, caretOrSlashPrefixed, theRest []string var caretPrefixed, caretOrSlashPrefixed, theRest []string
// Check prefix, add to the respective group slices // Check prefix, add to the respective group slices
for _, re := range regexps { for _, re := range regexps {
if !isRE2(re) {
continue
}
if strings.HasPrefix(re, caret) { if strings.HasPrefix(re, caret) {
caretPrefixed = append(caretPrefixed, re[len(caret):]) caretPrefixed = append(caretPrefixed, re[len(caret):])
} else if strings.HasPrefix(re, caretOrSlash) { } else if strings.HasPrefix(re, caretOrSlash) {