code-gen: make content heuristics regexp engine configurable & generation syntax-aware

This commit is contained in:
Alex Bezzubov
2022-12-25 11:56:58 +01:00
parent 0b92f97b9c
commit 5e590f3554
4 changed files with 54 additions and 24 deletions

View File

@ -1,9 +1,8 @@
package data
import (
"regexp"
"github.com/go-enry/go-enry/v2/data/rule"
"github.com/go-enry/go-enry/v2/regex"
)
var ContentHeuristics = map[string]*Heuristics{
@ -27,12 +26,12 @@ var ContentHeuristics = map[string]*Heuristics{
{{- else if eq .Op "Or" -}}
rule.Or(
{{ template "Languages" .Langs -}}
regexp.MustCompile({{ .Pattern | stringVal }}),
{{ template "mustCompile" . }}
),
{{- else if eq .Op "Not" -}}
rule.Not(
{{ template "Languages" .Langs -}}
regexp.MustCompile({{ .Pattern | stringVal }}),
{{ template "mustCompile" . }}
),
{{- else if eq .Op "Always" -}}
rule.Always(
@ -49,3 +48,11 @@ var ContentHeuristics = map[string]*Heuristics{
rule.MatchingLanguages(""),
{{end -}}
{{end}}
{{define "mustCompile" -}}
{{ if .IsRE2 -}}
regex.MustCompileMultiline({{ .Pattern | stringVal }}),
{{- else -}}
regex.MustCompileRuby({{ .Pattern | stringVal }}),
{{ end -}}
{{end}}