mirror of
https://github.com/ralsina/tartrazine.git
synced 2025-06-27 14:47:50 -03:00
Add check for non-backtracking subexpressions
This commit is contained in:
@ -39,7 +39,7 @@ func GenHeuristics(fileToParse, _, outPath, tmplPath, tmplName, commit string) e
|
||||
|
||||
// loadHeuristics transforms parsed YAML to map[".ext"]->IR for code generation.
|
||||
func loadHeuristics(yaml *Heuristics) (map[string][]*LanguagePattern, error) {
|
||||
var patterns = make(map[string][]*LanguagePattern)
|
||||
patterns := make(map[string][]*LanguagePattern)
|
||||
for _, disambiguation := range yaml.Disambiguations {
|
||||
var rules []*LanguagePattern
|
||||
for _, rule := range disambiguation.Rules {
|
||||
@ -161,13 +161,14 @@ func parseYaml(file string) (*Heuristics, error) {
|
||||
// isUnsupportedRegexpSyntax filters regexp syntax that is not supported by RE2.
|
||||
// In particular, we stumbled up on usage of next cases:
|
||||
// - lookbehind & lookahead
|
||||
// - non-backtracking subexpressions
|
||||
// - named & numbered capturing group/after text matching
|
||||
// - backreference
|
||||
// - possessive quantifier
|
||||
// For referece on supported syntax see https://github.com/google/re2/wiki/Syntax
|
||||
func isUnsupportedRegexpSyntax(reg string) bool {
|
||||
return strings.Contains(reg, `(?<`) || strings.Contains(reg, `(?=`) || strings.Contains(reg, `(?!`) ||
|
||||
strings.Contains(reg, `\1`) || strings.Contains(reg, `*+`) ||
|
||||
strings.Contains(reg, `(?>`) || strings.Contains(reg, `\1`) || strings.Contains(reg, `*+`) ||
|
||||
// See https://github.com/github/linguist/pull/4243#discussion_r246105067
|
||||
(strings.HasPrefix(reg, multilinePrefix+`/`) && strings.HasSuffix(reg, `/`))
|
||||
}
|
||||
|
@ -697,12 +697,7 @@ var ContentHeuristics = map[string]*Heuristics{
|
||||
regexp.MustCompile(`(?m)^(\s*namespace\s*[\w\.]+\s*{|\s*\/\/)`),
|
||||
),
|
||||
},
|
||||
".csc": &Heuristics{
|
||||
rule.Or(
|
||||
rule.MatchingLanguages("GSC"),
|
||||
regexp.MustCompile(`(?m)^\s*#\s*(?:using|insert|include|define|namespace)[ \t]+\w|^\s*(?>(?:autoexec|private)\s+){0,2}function\s+(?>(?:autoexec|private)\s+){0,2}\w+\s*\(|\b(?:level|self)[ \t]+thread[ \t]+(?:\[\[[ \t]*(?>\w+\.)*\w+[ \t]*\]\]|\w+)[ \t]*\([^\r\n\)]*\)[ \t]*;|^[ \t]*#[ \t]*(?:precache|using_animtree)[ \t]*\(`),
|
||||
),
|
||||
},
|
||||
".csc": &Heuristics{},
|
||||
".csl": &Heuristics{
|
||||
rule.Or(
|
||||
rule.MatchingLanguages("XML"),
|
||||
@ -879,18 +874,8 @@ var ContentHeuristics = map[string]*Heuristics{
|
||||
regexp.MustCompile(`(?m)^\[indent=[0-9]+\]`),
|
||||
),
|
||||
},
|
||||
".gsc": &Heuristics{
|
||||
rule.Or(
|
||||
rule.MatchingLanguages("GSC"),
|
||||
regexp.MustCompile(`(?m)^\s*#\s*(?:using|insert|include|define|namespace)[ \t]+\w|^\s*(?>(?:autoexec|private)\s+){0,2}function\s+(?>(?:autoexec|private)\s+){0,2}\w+\s*\(|\b(?:level|self)[ \t]+thread[ \t]+(?:\[\[[ \t]*(?>\w+\.)*\w+[ \t]*\]\]|\w+)[ \t]*\([^\r\n\)]*\)[ \t]*;|^[ \t]*#[ \t]*(?:precache|using_animtree)[ \t]*\(`),
|
||||
),
|
||||
},
|
||||
".gsh": &Heuristics{
|
||||
rule.Or(
|
||||
rule.MatchingLanguages("GSC"),
|
||||
regexp.MustCompile(`(?m)^\s*#\s*(?:using|insert|include|define|namespace)[ \t]+\w|^\s*(?>(?:autoexec|private)\s+){0,2}function\s+(?>(?:autoexec|private)\s+){0,2}\w+\s*\(|\b(?:level|self)[ \t]+thread[ \t]+(?:\[\[[ \t]*(?>\w+\.)*\w+[ \t]*\]\]|\w+)[ \t]*\([^\r\n\)]*\)[ \t]*;|^[ \t]*#[ \t]*(?:precache|using_animtree)[ \t]*\(`),
|
||||
),
|
||||
},
|
||||
".gsc": &Heuristics{},
|
||||
".gsh": &Heuristics{},
|
||||
".h": &Heuristics{
|
||||
rule.Or(
|
||||
rule.MatchingLanguages("Objective-C"),
|
||||
|
Reference in New Issue
Block a user