diff --git a/README.md b/README.md index b38277d..8011bfc 100644 --- a/README.md +++ b/README.md @@ -184,6 +184,8 @@ Parsing [linguist/samples](https://github.com/github/linguist/tree/master/sample In all the cases above that have an issue number - we plan to update enry to match Linguist behavior. +> All the issues related to heuristics' regexp syntax incompatibilities with the RE2 engine can be avoided by using `oniguruma` instead (see [instuctions](#misc)) + ## Benchmarks Enry's language detection has been compared with Linguist's on [_linguist/samples_](https://github.com/github/linguist/tree/master/samples). diff --git a/data/content.go b/data/content.go index d191799..ca53b77 100644 --- a/data/content.go +++ b/data/content.go @@ -4,9 +4,8 @@ package data import ( - "regexp" - "github.com/go-enry/go-enry/v2/data/rule" + "github.com/go-enry/go-enry/v2/regex" ) var ContentHeuristics = map[string]*Heuristics{ @@ -15,31 +14,31 @@ var ContentHeuristics = map[string]*Heuristics{ rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), ), ), rule.And( rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), + regex.MustCompileMultiline(`^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), ), ), rule.Or( rule.MatchingLanguages("Roff"), - regexp.MustCompile(`(?m)^\.(?:[A-Za-z]{2}(?:\s|$)|\\")`), + regex.MustCompileMultiline(`^\.(?:[A-Za-z]{2}(?:\s|$)|\\")`), ), }, ".1in": &Heuristics{ @@ -47,26 +46,26 @@ var ContentHeuristics = map[string]*Heuristics{ rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), ), ), rule.And( rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), + regex.MustCompileMultiline(`^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), ), ), rule.Always( @@ -78,26 +77,26 @@ var ContentHeuristics = map[string]*Heuristics{ rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), ), ), rule.And( rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), + regex.MustCompileMultiline(`^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), ), ), rule.Always( @@ -109,26 +108,26 @@ var ContentHeuristics = map[string]*Heuristics{ rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), ), ), rule.And( rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), + regex.MustCompileMultiline(`^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), ), ), rule.Always( @@ -140,31 +139,31 @@ var ContentHeuristics = map[string]*Heuristics{ rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), ), ), rule.And( rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), + regex.MustCompileMultiline(`^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), ), ), rule.Or( rule.MatchingLanguages("Roff"), - regexp.MustCompile(`(?m)^\.(?:[A-Za-z]{2}(?:\s|$)|\\")`), + regex.MustCompileMultiline(`^\.(?:[A-Za-z]{2}(?:\s|$)|\\")`), ), }, ".3": &Heuristics{ @@ -172,31 +171,31 @@ var ContentHeuristics = map[string]*Heuristics{ rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), ), ), rule.And( rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), + regex.MustCompileMultiline(`^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), ), ), rule.Or( rule.MatchingLanguages("Roff"), - regexp.MustCompile(`(?m)^\.(?:[A-Za-z]{2}(?:\s|$)|\\")`), + regex.MustCompileMultiline(`^\.(?:[A-Za-z]{2}(?:\s|$)|\\")`), ), }, ".3in": &Heuristics{ @@ -204,26 +203,26 @@ var ContentHeuristics = map[string]*Heuristics{ rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), ), ), rule.And( rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), + regex.MustCompileMultiline(`^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), ), ), rule.Always( @@ -235,26 +234,26 @@ var ContentHeuristics = map[string]*Heuristics{ rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), ), ), rule.And( rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), + regex.MustCompileMultiline(`^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), ), ), rule.Always( @@ -266,26 +265,26 @@ var ContentHeuristics = map[string]*Heuristics{ rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), ), ), rule.And( rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), + regex.MustCompileMultiline(`^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), ), ), rule.Always( @@ -297,26 +296,26 @@ var ContentHeuristics = map[string]*Heuristics{ rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), ), ), rule.And( rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), + regex.MustCompileMultiline(`^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), ), ), rule.Always( @@ -328,26 +327,26 @@ var ContentHeuristics = map[string]*Heuristics{ rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), ), ), rule.And( rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), + regex.MustCompileMultiline(`^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), ), ), rule.Always( @@ -359,26 +358,26 @@ var ContentHeuristics = map[string]*Heuristics{ rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), ), ), rule.And( rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), + regex.MustCompileMultiline(`^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), ), ), rule.Always( @@ -390,31 +389,31 @@ var ContentHeuristics = map[string]*Heuristics{ rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), ), ), rule.And( rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), + regex.MustCompileMultiline(`^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), ), ), rule.Or( rule.MatchingLanguages("Roff"), - regexp.MustCompile(`(?m)^\.(?:[A-Za-z]{2}(?:\s|$)|\\")`), + regex.MustCompileMultiline(`^\.(?:[A-Za-z]{2}(?:\s|$)|\\")`), ), }, ".5": &Heuristics{ @@ -422,31 +421,31 @@ var ContentHeuristics = map[string]*Heuristics{ rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), ), ), rule.And( rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), + regex.MustCompileMultiline(`^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), ), ), rule.Or( rule.MatchingLanguages("Roff"), - regexp.MustCompile(`(?m)^\.(?:[A-Za-z]{2}(?:\s|$)|\\")`), + regex.MustCompileMultiline(`^\.(?:[A-Za-z]{2}(?:\s|$)|\\")`), ), }, ".6": &Heuristics{ @@ -454,31 +453,31 @@ var ContentHeuristics = map[string]*Heuristics{ rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), ), ), rule.And( rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), + regex.MustCompileMultiline(`^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), ), ), rule.Or( rule.MatchingLanguages("Roff"), - regexp.MustCompile(`(?m)^\.(?:[A-Za-z]{2}(?:\s|$)|\\")`), + regex.MustCompileMultiline(`^\.(?:[A-Za-z]{2}(?:\s|$)|\\")`), ), }, ".7": &Heuristics{ @@ -486,31 +485,31 @@ var ContentHeuristics = map[string]*Heuristics{ rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), ), ), rule.And( rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), + regex.MustCompileMultiline(`^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), ), ), rule.Or( rule.MatchingLanguages("Roff"), - regexp.MustCompile(`(?m)^\.(?:[A-Za-z]{2}(?:\s|$)|\\")`), + regex.MustCompileMultiline(`^\.(?:[A-Za-z]{2}(?:\s|$)|\\")`), ), }, ".8": &Heuristics{ @@ -518,31 +517,31 @@ var ContentHeuristics = map[string]*Heuristics{ rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), ), ), rule.And( rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), + regex.MustCompileMultiline(`^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), ), ), rule.Or( rule.MatchingLanguages("Roff"), - regexp.MustCompile(`(?m)^\.(?:[A-Za-z]{2}(?:\s|$)|\\")`), + regex.MustCompileMultiline(`^\.(?:[A-Za-z]{2}(?:\s|$)|\\")`), ), }, ".9": &Heuristics{ @@ -550,31 +549,31 @@ var ContentHeuristics = map[string]*Heuristics{ rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), ), ), rule.And( rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), + regex.MustCompileMultiline(`^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), ), ), rule.Or( rule.MatchingLanguages("Roff"), - regexp.MustCompile(`(?m)^\.(?:[A-Za-z]{2}(?:\s|$)|\\")`), + regex.MustCompileMultiline(`^\.(?:[A-Za-z]{2}(?:\s|$)|\\")`), ), }, ".al": &Heuristics{ @@ -582,38 +581,43 @@ var ContentHeuristics = map[string]*Heuristics{ rule.MatchingLanguages("AL"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)\b(?i:(CODEUNIT|PAGE|PAGEEXTENSION|PAGECUSTOMIZATION|DOTNET|ENUM|ENUMEXTENSION|VALUE|QUERY|REPORT|TABLE|TABLEEXTENSION|XMLPORT|PROFILE|CONTROLADDIN))\b`), + regex.MustCompileMultiline(`\b(?i:(CODEUNIT|PAGE|PAGEEXTENSION|PAGECUSTOMIZATION|DOTNET|ENUM|ENUMEXTENSION|VALUE|QUERY|REPORT|TABLE|TABLEEXTENSION|XMLPORT|PROFILE|CONTROLADDIN))\b`), ), ), rule.Always( rule.MatchingLanguages("Perl"), ), }, - ".as": &Heuristics{}, + ".as": &Heuristics{ + rule.Or( + rule.MatchingLanguages("ActionScript"), + regex.MustCompileRuby(`^\s*(?:package(?:\s+[\w.]+)?\s+(?:{|$)|import\s+[\w.*]+\s*;|(?=.*?(?:intrinsic|extends))(intrinsic\s+)?class\s+[\w<>.]+(?:\s+extends\s+[\w<>.]+)?|(?:(?:public|protected|private|static)\s+)*(?:(?:var|const|local)\s+\w+\s*:\s*[\w<>.]+(?:\s*=.*)?\s*;|function\s+\w+\s*\((?:\s*\w+\s*:\s*[\w<>.]+\s*(,\s*\w+\s*:\s*[\w<>.]+\s*)*)?\)))`), + ), + }, ".asc": &Heuristics{ rule.Or( rule.MatchingLanguages("Public Key"), - regexp.MustCompile(`(?m)^(----[- ]BEGIN|ssh-(rsa|dss)) `), + regex.MustCompileMultiline(`^(----[- ]BEGIN|ssh-(rsa|dss)) `), ), rule.Or( rule.MatchingLanguages("AsciiDoc"), - regexp.MustCompile(`(?m)^[=-]+(\s|\n)|{{[A-Za-z]`), + regex.MustCompileMultiline(`^[=-]+(\s|\n)|{{[A-Za-z]`), ), rule.Or( rule.MatchingLanguages("AGS Script"), - regexp.MustCompile(`(?m)^(\/\/.+|((import|export)\s+)?(function|int|float|char)\s+((room|repeatedly|on|game)_)?([A-Za-z]+[A-Za-z_0-9]+)\s*[;\(])`), + regex.MustCompileMultiline(`^(\/\/.+|((import|export)\s+)?(function|int|float|char)\s+((room|repeatedly|on|game)_)?([A-Za-z]+[A-Za-z_0-9]+)\s*[;\(])`), ), }, ".asm": &Heuristics{ rule.Or( rule.MatchingLanguages("Motorola 68K Assembly"), - regexp.MustCompile(`(?m)(?im)\bmoveq(?:\.l)?\s+#(?:\$-?[0-9a-f]{1,3}|%[0-1]{1,8}|-?[0-9]{1,3}),\s*d[0-7]\b|(?im)^\s*move(?:\.[bwl])?\s+(?:sr|usp),\s*[^\s]+|(?im)^\s*move\.[bwl]\s+.*\b[ad]\d|(?im)^\s*movem\.[bwl]\b|(?im)^\s*move[mp](?:\.[wl])?\b|(?im)^\s*btst\b|(?im)^\s*dbra\b`), + regex.MustCompileMultiline(`(?im)\bmoveq(?:\.l)?\s+#(?:\$-?[0-9a-f]{1,3}|%[0-1]{1,8}|-?[0-9]{1,3}),\s*d[0-7]\b|(?im)^\s*move(?:\.[bwl])?\s+(?:sr|usp),\s*[^\s]+|(?im)^\s*move\.[bwl]\s+.*\b[ad]\d|(?im)^\s*movem\.[bwl]\b|(?im)^\s*move[mp](?:\.[wl])?\b|(?im)^\s*btst\b|(?im)^\s*dbra\b`), ), }, ".asy": &Heuristics{ rule.Or( rule.MatchingLanguages("LTspice Symbol"), - regexp.MustCompile(`(?m)^SymbolType[ \t]`), + regex.MustCompileMultiline(`^SymbolType[ \t]`), ), rule.Always( rule.MatchingLanguages("Asymptote"), @@ -622,193 +626,211 @@ var ContentHeuristics = map[string]*Heuristics{ ".bas": &Heuristics{ rule.Or( rule.MatchingLanguages("FreeBasic"), - regexp.MustCompile(`(?m)^[ \t]*#(?:define|endif|endmacro|ifn?def|if|include|lang|macro)\s`), + regex.MustCompileMultiline(`^[ \t]*#(?:define|endif|endmacro|ifn?def|if|include|lang|macro)\s`), ), rule.Or( rule.MatchingLanguages("BASIC"), - regexp.MustCompile(`(?m)^\s*\d+`), + regex.MustCompileMultiline(`^\s*\d+`), ), }, ".bb": &Heuristics{ rule.Or( rule.MatchingLanguages("BlitzBasic"), - regexp.MustCompile(`(?m)(<^\s*; |End Function)`), + regex.MustCompileMultiline(`(<^\s*; |End Function)`), ), rule.Or( rule.MatchingLanguages("BitBake"), - regexp.MustCompile(`(?m)^\s*(# |include|require)\b`), + regex.MustCompileMultiline(`^\s*(# |include|require)\b`), ), rule.Or( rule.MatchingLanguages("Clojure"), - regexp.MustCompile(`(?m)\((def|defn|defmacro|let)\s`), + regex.MustCompileMultiline(`\((def|defn|defmacro|let)\s`), ), }, ".bi": &Heuristics{ rule.Or( rule.MatchingLanguages("FreeBasic"), - regexp.MustCompile(`(?m)^[ \t]*#(?:define|endif|endmacro|ifn?def|if|include|lang|macro)\s`), + regex.MustCompileMultiline(`^[ \t]*#(?:define|endif|endmacro|ifn?def|if|include|lang|macro)\s`), + ), + }, + ".bs": &Heuristics{ + rule.Or( + rule.MatchingLanguages("Bikeshed"), + regex.MustCompileRuby(`^(?i:\r\n]*>`), + ), + rule.Or( + rule.MatchingLanguages("BrighterScript"), + regex.MustCompileRuby(`(?i:^\s*(?=^sub\s)(?:sub\s*\w+\(.*?\))|(?::\s*sub\(.*?\))$)|(?i:^\s*(end\ssub)$)|(?i:^\s*(?=^function\s)(?:function\s*\w+\(.*?\)\s*as\s*\w*)|(?::\s*function\(.*?\)\s*as\s*\w*)$)|(?i:^\s*(end\sfunction)$)`), ), }, - ".bs": &Heuristics{}, ".builds": &Heuristics{ rule.Or( rule.MatchingLanguages("XML"), - regexp.MustCompile(`(?m)^(\s*)(?i:(?:autoexec|private)\s+){0,2}function\s+(?>(?:autoexec|private)\s+){0,2}\w+\s*\(|\b(?:level|self)[ \t]+thread[ \t]+(?:\[\[[ \t]*(?>\w+\.)*\w+[ \t]*\]\]|\w+)[ \t]*\([^\r\n\)]*\)[ \t]*;|^[ \t]*#[ \t]*(?:precache|using_animtree)[ \t]*\(`), ), }, - ".csc": &Heuristics{}, ".csl": &Heuristics{ rule.Or( rule.MatchingLanguages("XML"), - regexp.MustCompile(`(?m)(?i:^\s*(\<\?xml|xmlns))`), + regex.MustCompileMultiline(`(?i:^\s*(\<\?xml|xmlns))`), ), rule.Or( rule.MatchingLanguages("Kusto"), - regexp.MustCompile(`(?m)(^\|\s*(where|extend|project|limit|summarize))|(^\.\w+)`), + regex.MustCompileMultiline(`(^\|\s*(where|extend|project|limit|summarize))|(^\.\w+)`), ), }, ".d": &Heuristics{ rule.Or( rule.MatchingLanguages("D"), - regexp.MustCompile(`(?m)^module\s+[\w.]*\s*;|import\s+[\w\s,.:]*;|\w+\s+\w+\s*\(.*\)(?:\(.*\))?\s*{[^}]*}|unittest\s*(?:\(.*\))?\s*{[^}]*}`), + regex.MustCompileMultiline(`^module\s+[\w.]*\s*;|import\s+[\w\s,.:]*;|\w+\s+\w+\s*\(.*\)(?:\(.*\))?\s*{[^}]*}|unittest\s*(?:\(.*\))?\s*{[^}]*}`), ), rule.Or( rule.MatchingLanguages("DTrace"), - regexp.MustCompile(`(?m)^(\w+:\w*:\w*:\w*|BEGIN|END|provider\s+|(tick|profile)-\w+\s+{[^}]*}|#pragma\s+D\s+(option|attributes|depends_on)\s|#pragma\s+ident\s)`), + regex.MustCompileMultiline(`^(\w+:\w*:\w*:\w*|BEGIN|END|provider\s+|(tick|profile)-\w+\s+{[^}]*}|#pragma\s+D\s+(option|attributes|depends_on)\s|#pragma\s+ident\s)`), ), rule.Or( rule.MatchingLanguages("Makefile"), - regexp.MustCompile(`(?m)([\/\\].*:\s+.*\s\\$|: \\$|^[ %]:|^[\w\s\/\\.]+\w+\.\w+\s*:\s+[\w\s\/\\.]+\w+\.\w+)`), + regex.MustCompileMultiline(`([\/\\].*:\s+.*\s\\$|: \\$|^[ %]:|^[\w\s\/\\.]+\w+\.\w+\s*:\s+[\w\s\/\\.]+\w+\.\w+)`), ), }, ".dsp": &Heuristics{ rule.Or( rule.MatchingLanguages("Microsoft Developer Studio Project"), - regexp.MustCompile(`(?m)# Microsoft Developer Studio Generated Build File`), + regex.MustCompileMultiline(`# Microsoft Developer Studio Generated Build File`), ), rule.Or( rule.MatchingLanguages("Faust"), - regexp.MustCompile(`(?m)\bprocess\s*[(=]|\b(library|import)\s*\(\s*"|\bdeclare\s+(name|version|author|copyright|license)\s+"`), + regex.MustCompileMultiline(`\bprocess\s*[(=]|\b(library|import)\s*\(\s*"|\bdeclare\s+(name|version|author|copyright|license)\s+"`), ), }, ".e": &Heuristics{ rule.Or( rule.MatchingLanguages("E"), - regexp.MustCompile(`(?m)^\s*(def|var)\s+(.+):=|^\s*(def|to)\s+(\w+)(\(.+\))?\s+{|^\s*(when)\s+(\(.+\))\s+->\s+{`), + regex.MustCompileMultiline(`^\s*(def|var)\s+(.+):=|^\s*(def|to)\s+(\w+)(\(.+\))?\s+{|^\s*(when)\s+(\(.+\))\s+->\s+{`), ), rule.Or( rule.MatchingLanguages("Eiffel"), - regexp.MustCompile(`(?m)^\s*\w+\s*(?:,\s*\w+)*[:]\s*\w+\s|^\s*\w+\s*(?:\(\s*\w+[:][^)]+\))?(?:[:]\s*\w+)?(?:--.+\s+)*\s+(?:do|local)\s|^\s*(?:across|deferred|elseif|ensure|feature|from|inherit|inspect|invariant|note|once|require|undefine|variant|when)\s*$`), + regex.MustCompileMultiline(`^\s*\w+\s*(?:,\s*\w+)*[:]\s*\w+\s|^\s*\w+\s*(?:\(\s*\w+[:][^)]+\))?(?:[:]\s*\w+)?(?:--.+\s+)*\s+(?:do|local)\s|^\s*(?:across|deferred|elseif|ensure|feature|from|inherit|inspect|invariant|note|once|require|undefine|variant|when)\s*$`), ), rule.Or( rule.MatchingLanguages("Euphoria"), - regexp.MustCompile(`(?m)^\s*namespace\s|^\s*(?:public\s+)?include\s|^\s*(?:(?:public|export|global)\s+)?(?:atom|constant|enum|function|integer|object|procedure|sequence|type)\s`), + regex.MustCompileMultiline(`^\s*namespace\s|^\s*(?:public\s+)?include\s|^\s*(?:(?:public|export|global)\s+)?(?:atom|constant|enum|function|integer|object|procedure|sequence|type)\s`), ), }, ".ecl": &Heuristics{ rule.Or( rule.MatchingLanguages("ECLiPSe"), - regexp.MustCompile(`(?m)^[^#]+:-`), + regex.MustCompileMultiline(`^[^#]+:-`), ), rule.Or( rule.MatchingLanguages("ECL"), - regexp.MustCompile(`(?m):=`), + regex.MustCompileMultiline(`:=`), ), }, ".es": &Heuristics{ rule.Or( rule.MatchingLanguages("Erlang"), - regexp.MustCompile(`(?m)^\s*(?:%%|main\s*\(.*?\)\s*->)`), + regex.MustCompileMultiline(`^\s*(?:%%|main\s*\(.*?\)\s*->)`), + ), + rule.Or( + rule.MatchingLanguages("JavaScript"), + regex.MustCompileRuby(`(?m:\/\/|("|')use strict\1|export\s+default\s|\/\*.*?\*\/)`), ), }, ".ex": &Heuristics{ rule.Or( rule.MatchingLanguages("Elixir"), - regexp.MustCompile(`(?m)^\s*@moduledoc\s|^\s*(?:cond|import|quote|unless)\s|^\s*def(?:exception|impl|macro|module|protocol)[(\s]`), + regex.MustCompileMultiline(`^\s*@moduledoc\s|^\s*(?:cond|import|quote|unless)\s|^\s*def(?:exception|impl|macro|module|protocol)[(\s]`), ), rule.Or( rule.MatchingLanguages("Euphoria"), - regexp.MustCompile(`(?m)^\s*namespace\s|^\s*(?:public\s+)?include\s|^\s*(?:(?:public|export|global)\s+)?(?:atom|constant|enum|function|integer|object|procedure|sequence|type)\s`), + regex.MustCompileMultiline(`^\s*namespace\s|^\s*(?:public\s+)?include\s|^\s*(?:(?:public|export|global)\s+)?(?:atom|constant|enum|function|integer|object|procedure|sequence|type)\s`), ), }, ".f": &Heuristics{ rule.Or( rule.MatchingLanguages("Forth"), - regexp.MustCompile(`(?m)^: `), + regex.MustCompileMultiline(`^: `), ), rule.Or( rule.MatchingLanguages("Filebench WML"), - regexp.MustCompile(`(?m)flowop`), + regex.MustCompileMultiline(`flowop`), ), rule.Or( rule.MatchingLanguages("Fortran"), - regexp.MustCompile(`(?m)^(?i:[c*][^abd-z]| (subroutine|program|end|data)\s|\s*!)`), + regex.MustCompileMultiline(`^(?i:[c*][^abd-z]| (subroutine|program|end|data)\s|\s*!)`), ), }, ".for": &Heuristics{ rule.Or( rule.MatchingLanguages("Forth"), - regexp.MustCompile(`(?m)^: `), + regex.MustCompileMultiline(`^: `), ), rule.Or( rule.MatchingLanguages("Fortran"), - regexp.MustCompile(`(?m)^(?i:[c*][^abd-z]| (subroutine|program|end|data)\s|\s*!)`), + regex.MustCompileMultiline(`^(?i:[c*][^abd-z]| (subroutine|program|end|data)\s|\s*!)`), ), }, ".fr": &Heuristics{ rule.Or( rule.MatchingLanguages("Forth"), - regexp.MustCompile(`(?m)^(: |also |new-device|previous )`), + regex.MustCompileMultiline(`^(: |also |new-device|previous )`), ), rule.Or( rule.MatchingLanguages("Frege"), - regexp.MustCompile(`(?m)^\s*(import|module|package|data|type) `), + regex.MustCompileMultiline(`^\s*(import|module|package|data|type) `), ), rule.Always( rule.MatchingLanguages("Text"), @@ -817,49 +839,53 @@ var ContentHeuristics = map[string]*Heuristics{ ".fs": &Heuristics{ rule.Or( rule.MatchingLanguages("Forth"), - regexp.MustCompile(`(?m)^(: |new-device)`), + regex.MustCompileMultiline(`^(: |new-device)`), ), rule.Or( rule.MatchingLanguages("F#"), - regexp.MustCompile(`(?m)^\s*(#light|import|let|module|namespace|open|type)`), + regex.MustCompileMultiline(`^\s*(#light|import|let|module|namespace|open|type)`), ), rule.Or( rule.MatchingLanguages("GLSL"), - regexp.MustCompile(`(?m)^\s*(#version|precision|uniform|varying|vec[234])`), + regex.MustCompileMultiline(`^\s*(#version|precision|uniform|varying|vec[234])`), ), rule.Or( rule.MatchingLanguages("Filterscript"), - regexp.MustCompile(`(?m)#include|#pragma\s+(rs|version)|__attribute__`), + regex.MustCompileMultiline(`#include|#pragma\s+(rs|version)|__attribute__`), ), }, ".ftl": &Heuristics{ + rule.Or( + rule.MatchingLanguages("FreeMarker"), + regex.MustCompileRuby(`^(?:<|[a-zA-Z-][a-zA-Z0-9_-]+[ \t]+\w)|\${\w+[^\n]*?}|^[ \t]*(?:<#--.*?-->|<#([a-z]+)(?=\s|>)[^>]*>.*?|\[#--.*?--\]|\[#([a-z]+)(?=\s|\])[^\]]*\].*?\[#\2\])`), + ), rule.Or( rule.MatchingLanguages("Fluent"), - regexp.MustCompile(`(?m)^-?[a-zA-Z][a-zA-Z0-9_-]* *=|\{\$-?[a-zA-Z][-\w]*(?:\.[a-zA-Z][-\w]*)?\}`), + regex.MustCompileMultiline(`^-?[a-zA-Z][a-zA-Z0-9_-]* *=|\{\$-?[a-zA-Z][-\w]*(?:\.[a-zA-Z][-\w]*)?\}`), ), }, ".gd": &Heuristics{ rule.Or( rule.MatchingLanguages("GAP"), - regexp.MustCompile(`(?m)\s*(Declare|BindGlobal|KeyDependentOperation)`), + regex.MustCompileMultiline(`\s*(Declare|BindGlobal|KeyDependentOperation)`), ), rule.Or( rule.MatchingLanguages("GDScript"), - regexp.MustCompile(`(?m)\s*(extends|var|const|enum|func|class|signal|tool|yield|assert|onready)`), + regex.MustCompileMultiline(`\s*(extends|var|const|enum|func|class|signal|tool|yield|assert|onready)`), ), }, ".gml": &Heuristics{ rule.Or( rule.MatchingLanguages("XML"), - regexp.MustCompile(`(?m)(?i:^\s*(\<\?xml|xmlns))`), + regex.MustCompileMultiline(`(?i:^\s*(\<\?xml|xmlns))`), ), rule.Or( rule.MatchingLanguages("Graph Modeling Language"), - regexp.MustCompile(`(?m)(?i:^\s*(graph|node)\s+\[$)`), + regex.MustCompileMultiline(`(?i:^\s*(graph|node)\s+\[$)`), ), rule.Or( rule.MatchingLanguages("Gerber Image"), - regexp.MustCompile(`(?m)^[DGMT][0-9]{2}\*$`), + regex.MustCompileMultiline(`^[DGMT][0-9]{2}\*$`), ), rule.Always( rule.MatchingLanguages("Game Maker Language"), @@ -868,27 +894,37 @@ var ContentHeuristics = map[string]*Heuristics{ ".gs": &Heuristics{ rule.Or( rule.MatchingLanguages("GLSL"), - regexp.MustCompile(`(?m)^#version\s+[0-9]+\b`), + regex.MustCompileMultiline(`^#version\s+[0-9]+\b`), ), rule.Or( rule.MatchingLanguages("Gosu"), - regexp.MustCompile(`(?m)^uses (java|gw)\.`), + regex.MustCompileMultiline(`^uses (java|gw)\.`), ), rule.Or( rule.MatchingLanguages("Genie"), - regexp.MustCompile(`(?m)^\[indent=[0-9]+\]`), + regex.MustCompileMultiline(`^\[indent=[0-9]+\]`), + ), + }, + ".gsc": &Heuristics{ + rule.Or( + rule.MatchingLanguages("GSC"), + regex.MustCompileRuby(`^\s*#\s*(?:using|insert|include|define|namespace)[ \t]+\w|^\s*(?>(?:autoexec|private)\s+){0,2}function\s+(?>(?:autoexec|private)\s+){0,2}\w+\s*\(|\b(?:level|self)[ \t]+thread[ \t]+(?:\[\[[ \t]*(?>\w+\.)*\w+[ \t]*\]\]|\w+)[ \t]*\([^\r\n\)]*\)[ \t]*;|^[ \t]*#[ \t]*(?:precache|using_animtree)[ \t]*\(`), + ), + }, + ".gsh": &Heuristics{ + rule.Or( + rule.MatchingLanguages("GSC"), + regex.MustCompileRuby(`^\s*#\s*(?:using|insert|include|define|namespace)[ \t]+\w|^\s*(?>(?:autoexec|private)\s+){0,2}function\s+(?>(?:autoexec|private)\s+){0,2}\w+\s*\(|\b(?:level|self)[ \t]+thread[ \t]+(?:\[\[[ \t]*(?>\w+\.)*\w+[ \t]*\]\]|\w+)[ \t]*\([^\r\n\)]*\)[ \t]*;|^[ \t]*#[ \t]*(?:precache|using_animtree)[ \t]*\(`), ), }, - ".gsc": &Heuristics{}, - ".gsh": &Heuristics{}, ".h": &Heuristics{ rule.Or( rule.MatchingLanguages("Objective-C"), - regexp.MustCompile(`(?m)^\s*(@(interface|class|protocol|property|end|synchronised|selector|implementation)\b|#import\s+.+\.h[">])`), + regex.MustCompileMultiline(`^\s*(@(interface|class|protocol|property|end|synchronised|selector|implementation)\b|#import\s+.+\.h[">])`), ), rule.Or( rule.MatchingLanguages("C++"), - regexp.MustCompile(`(?m)^\s*#\s*include <(cstdint|string|vector|map|list|array|bitset|queue|stack|forward_list|unordered_map|unordered_set|(i|o|io)stream)>|^\s*template\s*<|^[ \t]*(try|constexpr)|^[ \t]*catch\s*\(|^[ \t]*(class|(using[ \t]+)?namespace)\s+\w+|^[ \t]*(private|public|protected):$|std::\w+`), + regex.MustCompileMultiline(`^\s*#\s*include <(cstdint|string|vector|map|list|array|bitset|queue|stack|forward_list|unordered_map|unordered_set|(i|o|io)stream)>|^\s*template\s*<|^[ \t]*(try|constexpr)|^[ \t]*catch\s*\(|^[ \t]*(class|(using[ \t]+)?namespace)\s+\w+|^[ \t]*(private|public|protected):$|std::\w+`), ), rule.Always( rule.MatchingLanguages("C"), @@ -897,23 +933,23 @@ var ContentHeuristics = map[string]*Heuristics{ ".hh": &Heuristics{ rule.Or( rule.MatchingLanguages("Hack"), - regexp.MustCompile(`(?m)<\?hh`), + regex.MustCompileMultiline(`<\?hh`), ), }, ".i": &Heuristics{ rule.Or( rule.MatchingLanguages("Motorola 68K Assembly"), - regexp.MustCompile(`(?m)(?im)\bmoveq(?:\.l)?\s+#(?:\$-?[0-9a-f]{1,3}|%[0-1]{1,8}|-?[0-9]{1,3}),\s*d[0-7]\b|(?im)^\s*move(?:\.[bwl])?\s+(?:sr|usp),\s*[^\s]+|(?im)^\s*move\.[bwl]\s+.*\b[ad]\d|(?im)^\s*movem\.[bwl]\b|(?im)^\s*move[mp](?:\.[wl])?\b|(?im)^\s*btst\b|(?im)^\s*dbra\b`), + regex.MustCompileMultiline(`(?im)\bmoveq(?:\.l)?\s+#(?:\$-?[0-9a-f]{1,3}|%[0-1]{1,8}|-?[0-9]{1,3}),\s*d[0-7]\b|(?im)^\s*move(?:\.[bwl])?\s+(?:sr|usp),\s*[^\s]+|(?im)^\s*move\.[bwl]\s+.*\b[ad]\d|(?im)^\s*movem\.[bwl]\b|(?im)^\s*move[mp](?:\.[wl])?\b|(?im)^\s*btst\b|(?im)^\s*dbra\b`), ), rule.Or( rule.MatchingLanguages("SWIG"), - regexp.MustCompile(`(?m)^[ \t]*%[a-z_]+\b|^%[{}]$`), + regex.MustCompileMultiline(`^[ \t]*%[a-z_]+\b|^%[{}]$`), ), }, ".ice": &Heuristics{ rule.Or( rule.MatchingLanguages("JSON"), - regexp.MustCompile(`(?m)\A\s*[{\[]`), + regex.MustCompileMultiline(`\A\s*[{\[]`), ), rule.Always( rule.MatchingLanguages("Slice"), @@ -922,57 +958,61 @@ var ContentHeuristics = map[string]*Heuristics{ ".inc": &Heuristics{ rule.Or( rule.MatchingLanguages("Motorola 68K Assembly"), - regexp.MustCompile(`(?m)(?im)\bmoveq(?:\.l)?\s+#(?:\$-?[0-9a-f]{1,3}|%[0-1]{1,8}|-?[0-9]{1,3}),\s*d[0-7]\b|(?im)^\s*move(?:\.[bwl])?\s+(?:sr|usp),\s*[^\s]+|(?im)^\s*move\.[bwl]\s+.*\b[ad]\d|(?im)^\s*movem\.[bwl]\b|(?im)^\s*move[mp](?:\.[wl])?\b|(?im)^\s*btst\b|(?im)^\s*dbra\b`), + regex.MustCompileMultiline(`(?im)\bmoveq(?:\.l)?\s+#(?:\$-?[0-9a-f]{1,3}|%[0-1]{1,8}|-?[0-9]{1,3}),\s*d[0-7]\b|(?im)^\s*move(?:\.[bwl])?\s+(?:sr|usp),\s*[^\s]+|(?im)^\s*move\.[bwl]\s+.*\b[ad]\d|(?im)^\s*movem\.[bwl]\b|(?im)^\s*move[mp](?:\.[wl])?\b|(?im)^\s*btst\b|(?im)^\s*dbra\b`), ), rule.Or( rule.MatchingLanguages("PHP"), - regexp.MustCompile(`(?m)^<\?(?:php)?`), + regex.MustCompileMultiline(`^<\?(?:php)?`), ), rule.Or( rule.MatchingLanguages("SourcePawn"), - regexp.MustCompile(`(?m)^public\s+(?:SharedPlugin(?:\s+|:)__pl_\w+\s*=(?:\s*{)?|(?:void\s+)?__pl_\w+_SetNTVOptional\(\)(?:\s*{)?)|^methodmap\s+\w+\s+<\s+\w+|^\s*MarkNativeAsOptional\s*\(`), + regex.MustCompileMultiline(`^public\s+(?:SharedPlugin(?:\s+|:)__pl_\w+\s*=(?:\s*{)?|(?:void\s+)?__pl_\w+_SetNTVOptional\(\)(?:\s*{)?)|^methodmap\s+\w+\s+<\s+\w+|^\s*MarkNativeAsOptional\s*\(`), + ), + rule.Or( + rule.MatchingLanguages("NASL"), + regex.MustCompileRuby(`^\s*include\s*\(\s*(?:"|')[\\/\w\-\.:\s]+\.(?:nasl|inc)\s*(?:"|')\s*\)\s*;|^\s*(?:global|local)_var\s+(?:\w+(?:\s*=\s*[\w\-"']+)?\s*)(?:,\s*\w+(?:\s*=\s*[\w\-"']+)?\s*)*+\s*;|^\s*namespace\s+\w+\s*{|^\s*object\s+\w+\s*(?:extends\s+\w+(?:::\w+)?)?\s*{|^\s*(?:public\s+|private\s+|\s*)function\s+\w+\s*\([\w\s,]*\)\s*{`), ), rule.Or( rule.MatchingLanguages("POV-Ray SDL"), - regexp.MustCompile(`(?m)^\s*#(declare|local|macro|while)\s`), + regex.MustCompileMultiline(`^\s*#(declare|local|macro|while)\s`), ), rule.Or( rule.MatchingLanguages("Pascal"), - regexp.MustCompile(`(?m)(?i:^\s*{\$(?:mode|ifdef|undef|define)[ ]+[a-z0-9_]+})|^\s*end[.;]\s*$`), + regex.MustCompileMultiline(`(?i:^\s*{\$(?:mode|ifdef|undef|define)[ ]+[a-z0-9_]+})|^\s*end[.;]\s*$`), ), }, ".l": &Heuristics{ rule.Or( rule.MatchingLanguages("Common Lisp"), - regexp.MustCompile(`(?m)\(def(un|macro)\s`), + regex.MustCompileMultiline(`\(def(un|macro)\s`), ), rule.Or( rule.MatchingLanguages("Lex"), - regexp.MustCompile(`(?m)^(%[%{}]xs|<.*>)`), + regex.MustCompileMultiline(`^(%[%{}]xs|<.*>)`), ), rule.Or( rule.MatchingLanguages("Roff"), - regexp.MustCompile(`(?m)^\.[A-Za-z]{2}(\s|$)`), + regex.MustCompileMultiline(`^\.[A-Za-z]{2}(\s|$)`), ), rule.Or( rule.MatchingLanguages("PicoLisp"), - regexp.MustCompile(`(?m)^\((de|class|rel|code|data|must)\s`), + regex.MustCompileMultiline(`^\((de|class|rel|code|data|must)\s`), ), }, ".lisp": &Heuristics{ rule.Or( rule.MatchingLanguages("Common Lisp"), - regexp.MustCompile(`(?m)^\s*\((?i:defun|in-package|defpackage) `), + regex.MustCompileMultiline(`^\s*\((?i:defun|in-package|defpackage) `), ), rule.Or( rule.MatchingLanguages("NewLisp"), - regexp.MustCompile(`(?m)^\s*\(define `), + regex.MustCompileMultiline(`^\s*\(define `), ), }, ".ls": &Heuristics{ rule.Or( rule.MatchingLanguages("LoomScript"), - regexp.MustCompile(`(?m)^\s*package\s*[\w\.\/\*\s]*\s*{`), + regex.MustCompileMultiline(`^\s*package\s*[\w\.\/\*\s]*\s*{`), ), rule.Always( rule.MatchingLanguages("LiveScript"), @@ -981,54 +1021,54 @@ var ContentHeuristics = map[string]*Heuristics{ ".lsp": &Heuristics{ rule.Or( rule.MatchingLanguages("Common Lisp"), - regexp.MustCompile(`(?m)^\s*\((?i:defun|in-package|defpackage) `), + regex.MustCompileMultiline(`^\s*\((?i:defun|in-package|defpackage) `), ), rule.Or( rule.MatchingLanguages("NewLisp"), - regexp.MustCompile(`(?m)^\s*\(define `), + regex.MustCompileMultiline(`^\s*\(define `), ), }, ".m": &Heuristics{ rule.Or( rule.MatchingLanguages("Objective-C"), - regexp.MustCompile(`(?m)^\s*(@(interface|class|protocol|property|end|synchronised|selector|implementation)\b|#import\s+.+\.h[">])`), + regex.MustCompileMultiline(`^\s*(@(interface|class|protocol|property|end|synchronised|selector|implementation)\b|#import\s+.+\.h[">])`), ), rule.Or( rule.MatchingLanguages("Mercury"), - regexp.MustCompile(`(?m):- module`), + regex.MustCompileMultiline(`:- module`), ), rule.Or( rule.MatchingLanguages("MUF"), - regexp.MustCompile(`(?m)^: `), + regex.MustCompileMultiline(`^: `), ), rule.Or( rule.MatchingLanguages("M"), - regexp.MustCompile(`(?m)^\s*;`), + regex.MustCompileMultiline(`^\s*;`), ), rule.And( rule.MatchingLanguages("Mathematica"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)\(\*`), + regex.MustCompileMultiline(`\(\*`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)\*\)$`), + regex.MustCompileMultiline(`\*\)$`), ), ), rule.Or( rule.MatchingLanguages("MATLAB"), - regexp.MustCompile(`(?m)^\s*%`), + regex.MustCompileMultiline(`^\s*%`), ), rule.Or( rule.MatchingLanguages("Limbo"), - regexp.MustCompile(`(?m)^\w+\s*:\s*module\s*{`), + regex.MustCompileMultiline(`^\w+\s*:\s*module\s*{`), ), }, ".m4": &Heuristics{ rule.Or( rule.MatchingLanguages("M4Sugar"), - regexp.MustCompile(`(?m)AC_DEFUN|AC_PREREQ|AC_INIT|^_?m4_`), + regex.MustCompileMultiline(`AC_DEFUN|AC_PREREQ|AC_INIT|^_?m4_`), ), rule.Always( rule.MatchingLanguages("M4"), @@ -1039,26 +1079,26 @@ var ContentHeuristics = map[string]*Heuristics{ rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), ), ), rule.And( rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), + regex.MustCompileMultiline(`^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), ), ), rule.Always( @@ -1068,27 +1108,31 @@ var ContentHeuristics = map[string]*Heuristics{ ".mask": &Heuristics{ rule.Or( rule.MatchingLanguages("Unity3D Asset"), - regexp.MustCompile(`(?m)tag:unity3d.com`), + regex.MustCompileMultiline(`tag:unity3d.com`), ), }, ".mc": &Heuristics{ + rule.Or( + rule.MatchingLanguages("Win32 Message File"), + regex.MustCompileRuby(`(?i)^[ \t]*(?>\/\*\s*)?MessageId=|^\.$`), + ), rule.Or( rule.MatchingLanguages("M4"), - regexp.MustCompile(`(?m)^dnl|^divert\((?:-?\d+)?\)|^\w+\(`+"`"+`[^\n]*?'[),]`), + regex.MustCompileMultiline(`^dnl|^divert\((?:-?\d+)?\)|^\w+\(`+"`"+`[^\n]*?'[),]`), ), rule.Or( rule.MatchingLanguages("Monkey C"), - regexp.MustCompile(`(?m)\b(?:using|module|function|class|var)\s+\w`), + regex.MustCompileMultiline(`\b(?:using|module|function|class|var)\s+\w`), ), }, ".md": &Heuristics{ rule.Or( rule.MatchingLanguages("Markdown"), - regexp.MustCompile(`(?m)(^[-A-Za-z0-9=#!\*\[|>])|<\/|\A\z`), + regex.MustCompileMultiline(`(^[-A-Za-z0-9=#!\*\[|>])|<\/|\A\z`), ), rule.Or( rule.MatchingLanguages("GCC Machine Description"), - regexp.MustCompile(`(?m)^(;;|\(define_)`), + regex.MustCompileMultiline(`^(;;|\(define_)`), ), rule.Always( rule.MatchingLanguages("Markdown"), @@ -1099,26 +1143,26 @@ var ContentHeuristics = map[string]*Heuristics{ rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), ), ), rule.And( rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), + regex.MustCompileMultiline(`^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), ), ), rule.Always( @@ -1128,21 +1172,21 @@ var ContentHeuristics = map[string]*Heuristics{ ".ml": &Heuristics{ rule.Or( rule.MatchingLanguages("OCaml"), - regexp.MustCompile(`(?m)(^\s*module)|let rec |match\s+(\S+\s)+with`), + regex.MustCompileMultiline(`(^\s*module)|let rec |match\s+(\S+\s)+with`), ), rule.Or( rule.MatchingLanguages("Standard ML"), - regexp.MustCompile(`(?m)=> |case\s+(\S+\s)+of`), + regex.MustCompileMultiline(`=> |case\s+(\S+\s)+of`), ), }, ".mod": &Heuristics{ rule.Or( rule.MatchingLanguages("XML"), - regexp.MustCompile(`(?m)\s`), + regex.MustCompileMultiline(`^\s+\w+\s+=>\s`), ), }, ".pro": &Heuristics{ rule.Or( rule.MatchingLanguages("Proguard"), - regexp.MustCompile(`(?m)^-(include\b.*\.pro$|keep\b|keepclassmembers\b|keepattributes\b)`), + regex.MustCompileMultiline(`^-(include\b.*\.pro$|keep\b|keepclassmembers\b|keepattributes\b)`), ), rule.Or( rule.MatchingLanguages("Prolog"), - regexp.MustCompile(`(?m)^[^\[#]+:-`), + regex.MustCompileMultiline(`^[^\[#]+:-`), ), rule.Or( rule.MatchingLanguages("INI"), - regexp.MustCompile(`(?m)last_client=`), + regex.MustCompileMultiline(`last_client=`), ), rule.And( rule.MatchingLanguages("QMake"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)HEADERS`), + regex.MustCompileMultiline(`HEADERS`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)SOURCES`), + regex.MustCompileMultiline(`SOURCES`), ), ), rule.Or( rule.MatchingLanguages("IDL"), - regexp.MustCompile(`(?m)^\s*function[ \w,]+$`), + regex.MustCompileMultiline(`^\s*function[ \w,]+$`), ), }, ".properties": &Heuristics{ @@ -1320,89 +1364,93 @@ var ContentHeuristics = map[string]*Heuristics{ rule.MatchingLanguages("INI"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[^#!;][^=]*=`), + regex.MustCompileMultiline(`^[^#!;][^=]*=`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[;\[]`), + regex.MustCompileMultiline(`^[;\[]`), ), ), rule.And( rule.MatchingLanguages("Java Properties"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[^#!;][^=]*=`), + regex.MustCompileMultiline(`^[^#!;][^=]*=`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[#!]`), + regex.MustCompileMultiline(`^[#!]`), ), ), rule.Or( rule.MatchingLanguages("INI"), - regexp.MustCompile(`(?m)^[^#!;][^=]*=`), + regex.MustCompileMultiline(`^[^#!;][^=]*=`), ), rule.Or( rule.MatchingLanguages("Java Properties"), - regexp.MustCompile(`(?m)^[^#!][^:]*:`), + regex.MustCompileMultiline(`^[^#!][^:]*:`), ), }, ".q": &Heuristics{ rule.Or( rule.MatchingLanguages("q"), - regexp.MustCompile(`(?m)((?i:[A-Z.][\w.]*:{)|(^|\n)\\(cd?|d|l|p|ts?) )`), + regex.MustCompileMultiline(`((?i:[A-Z.][\w.]*:{)|(^|\n)\\(cd?|d|l|p|ts?) )`), ), rule.Or( rule.MatchingLanguages("HiveQL"), - regexp.MustCompile(`(?m)(?i:SELECT\s+[\w*,]+\s+FROM|(CREATE|ALTER|DROP)\s(DATABASE|SCHEMA|TABLE))`), + regex.MustCompileMultiline(`(?i:SELECT\s+[\w*,]+\s+FROM|(CREATE|ALTER|DROP)\s(DATABASE|SCHEMA|TABLE))`), ), }, ".qs": &Heuristics{ rule.Or( rule.MatchingLanguages("Q#"), - regexp.MustCompile(`(?m)^((\/{2,3})?\s*(namespace|operation)\b)`), + regex.MustCompileMultiline(`^((\/{2,3})?\s*(namespace|operation)\b)`), ), rule.Or( rule.MatchingLanguages("Qt Script"), - regexp.MustCompile(`(?m)(\w+\.prototype\.\w+|===|\bvar\b)`), + regex.MustCompileMultiline(`(\w+\.prototype\.\w+|===|\bvar\b)`), ), }, ".r": &Heuristics{ rule.Or( rule.MatchingLanguages("Rebol"), - regexp.MustCompile(`(?m)(?i:\bRebol\b)`), + regex.MustCompileMultiline(`(?i:\bRebol\b)`), ), rule.Or( rule.MatchingLanguages("R"), - regexp.MustCompile(`(?m)<-|^\s*#`), + regex.MustCompileMultiline(`<-|^\s*#`), ), }, ".re": &Heuristics{ rule.Or( rule.MatchingLanguages("Reason"), - regexp.MustCompile(`(?m)^\s*module\s+type\s|^\s*(?:include|open)\s+\w+\s*;\s*$|^\s*let\s+(?:module\s\w+\s*=\s*{|\w+:\s+.*=.*;\s*$)`), + regex.MustCompileMultiline(`^\s*module\s+type\s|^\s*(?:include|open)\s+\w+\s*;\s*$|^\s*let\s+(?:module\s\w+\s*=\s*{|\w+:\s+.*=.*;\s*$)`), ), rule.Or( rule.MatchingLanguages("C++"), - regexp.MustCompile(`(?m)^\s*#(?:(?:if|ifdef|define|pragma)\s+\w|\s*include\s+<[^>]+>)|^\s*template\s*<`), + regex.MustCompileMultiline(`^\s*#(?:(?:if|ifdef|define|pragma)\s+\w|\s*include\s+<[^>]+>)|^\s*template\s*<`), ), }, ".res": &Heuristics{ rule.Or( rule.MatchingLanguages("ReScript"), - regexp.MustCompile(`(?m)^\s*(let|module|type)\s+\w*\s+=\s+|^\s*(?:include|open)\s+\w+\s*$`), + regex.MustCompileMultiline(`^\s*(let|module|type)\s+\w*\s+=\s+|^\s*(?:include|open)\s+\w+\s*$`), ), }, ".rno": &Heuristics{ + rule.Or( + rule.MatchingLanguages("RUNOFF"), + regex.MustCompileRuby(`(?i:^\.!|^\f|\f$|^\.end lit(?:eral)?\b|^\.[a-zA-Z].*?;\.[a-zA-Z](?:[; \t])|\^\*[^\s*][^*]*\\\*(?=$|\s)|^\.c;[ \t]*\w+)`), + ), rule.Or( rule.MatchingLanguages("Roff"), - regexp.MustCompile(`(?m)^\.\\" `), + regex.MustCompileMultiline(`^\.\\" `), ), }, ".rpy": &Heuristics{ rule.Or( rule.MatchingLanguages("Python"), - regexp.MustCompile(`(?m)(?m:^(import|from|class|def)\s)`), + regex.MustCompileMultiline(`(?m:^(import|from|class|def)\s)`), ), rule.Always( rule.MatchingLanguages("Ren'Py"), @@ -1411,55 +1459,59 @@ var ContentHeuristics = map[string]*Heuristics{ ".rs": &Heuristics{ rule.Or( rule.MatchingLanguages("Rust"), - regexp.MustCompile(`(?m)^(use |fn |mod |pub |macro_rules|impl|#!?\[)`), + regex.MustCompileMultiline(`^(use |fn |mod |pub |macro_rules|impl|#!?\[)`), ), rule.Or( rule.MatchingLanguages("RenderScript"), - regexp.MustCompile(`(?m)#include|#pragma\s+(rs|version)|__attribute__`), + regex.MustCompileMultiline(`#include|#pragma\s+(rs|version)|__attribute__`), ), rule.Or( rule.MatchingLanguages("XML"), - regexp.MustCompile(`(?m)^\s*<\?xml`), + regex.MustCompileMultiline(`^\s*<\?xml`), ), }, ".s": &Heuristics{ rule.Or( rule.MatchingLanguages("Motorola 68K Assembly"), - regexp.MustCompile(`(?m)(?im)\bmoveq(?:\.l)?\s+#(?:\$-?[0-9a-f]{1,3}|%[0-1]{1,8}|-?[0-9]{1,3}),\s*d[0-7]\b|(?im)^\s*move(?:\.[bwl])?\s+(?:sr|usp),\s*[^\s]+|(?im)^\s*move\.[bwl]\s+.*\b[ad]\d|(?im)^\s*movem\.[bwl]\b|(?im)^\s*move[mp](?:\.[wl])?\b|(?im)^\s*btst\b|(?im)^\s*dbra\b`), + regex.MustCompileMultiline(`(?im)\bmoveq(?:\.l)?\s+#(?:\$-?[0-9a-f]{1,3}|%[0-1]{1,8}|-?[0-9]{1,3}),\s*d[0-7]\b|(?im)^\s*move(?:\.[bwl])?\s+(?:sr|usp),\s*[^\s]+|(?im)^\s*move\.[bwl]\s+.*\b[ad]\d|(?im)^\s*movem\.[bwl]\b|(?im)^\s*move[mp](?:\.[wl])?\b|(?im)^\s*btst\b|(?im)^\s*dbra\b`), ), }, ".sc": &Heuristics{ rule.Or( rule.MatchingLanguages("SuperCollider"), - regexp.MustCompile(`(?m)(?i:\^(this|super)\.|^\s*~\w+\s*=\.)`), + regex.MustCompileMultiline(`(?i:\^(this|super)\.|^\s*~\w+\s*=\.)`), ), rule.Or( rule.MatchingLanguages("Scala"), - regexp.MustCompile(`(?m)(^\s*import (scala|java)\.|^\s*class\b)`), + regex.MustCompileMultiline(`(^\s*import (scala|java)\.|^\s*class\b)`), ), }, ".sol": &Heuristics{ + rule.Or( + rule.MatchingLanguages("Solidity"), + regex.MustCompileRuby(`\bpragma\s+solidity\b|\b(?:abstract\s+)?contract\s+(?!\d)[a-zA-Z0-9$_]+(?:\s+is\s+(?:[a-zA-Z0-9$_][^\{]*?)?)?\s*\{`), + ), rule.Or( rule.MatchingLanguages("Gerber Image"), - regexp.MustCompile(`(?m)^[DGMT][0-9]{2}\*\r?\n`), + regex.MustCompileMultiline(`^[DGMT][0-9]{2}\*\r?\n`), ), }, ".sql": &Heuristics{ rule.Or( rule.MatchingLanguages("PLpgSQL"), - regexp.MustCompile(`(?m)(?i:^\\i\b|AS\s+\$\$|LANGUAGE\s+'?plpgsql'?|BEGIN(\s+WORK)?\s*;)`), + regex.MustCompileMultiline(`(?i:^\\i\b|AS\s+\$\$|LANGUAGE\s+'?plpgsql'?|BEGIN(\s+WORK)?\s*;)`), ), rule.Or( rule.MatchingLanguages("SQLPL"), - regexp.MustCompile(`(?m)(?i:ALTER\s+MODULE|MODE\s+DB2SQL|\bSYS(CAT|PROC)\.|ASSOCIATE\s+RESULT\s+SET|\bEND!\s*$)`), + regex.MustCompileMultiline(`(?i:ALTER\s+MODULE|MODE\s+DB2SQL|\bSYS(CAT|PROC)\.|ASSOCIATE\s+RESULT\s+SET|\bEND!\s*$)`), ), rule.Or( rule.MatchingLanguages("PLSQL"), - regexp.MustCompile(`(?m)(?i:\$\$PLSQL_|XMLTYPE|systimestamp|\.nextval|CONNECT\s+BY|AUTHID\s+(DEFINER|CURRENT_USER)|constructor\W+function)`), + regex.MustCompileMultiline(`(?i:\$\$PLSQL_|XMLTYPE|systimestamp|\.nextval|CONNECT\s+BY|AUTHID\s+(DEFINER|CURRENT_USER)|constructor\W+function)`), ), rule.Or( rule.MatchingLanguages("TSQL"), - regexp.MustCompile(`(?m)(?i:^\s*GO\b|BEGIN(\s+TRY|\s+CATCH)|OUTPUT\s+INSERTED|DECLARE\s+@|\[dbo\])`), + regex.MustCompileMultiline(`(?i:^\s*GO\b|BEGIN(\s+TRY|\s+CATCH)|OUTPUT\s+INSERTED|DECLARE\s+@|\[dbo\])`), ), rule.Always( rule.MatchingLanguages("SQL"), @@ -1468,53 +1520,62 @@ var ContentHeuristics = map[string]*Heuristics{ ".srt": &Heuristics{ rule.Or( rule.MatchingLanguages("SubRip Text"), - regexp.MustCompile(`(?m)^(\d{2}:\d{2}:\d{2},\d{3})\s*(-->)\s*(\d{2}:\d{2}:\d{2},\d{3})$`), + regex.MustCompileMultiline(`^(\d{2}:\d{2}:\d{2},\d{3})\s*(-->)\s*(\d{2}:\d{2}:\d{2},\d{3})$`), ), }, ".st": &Heuristics{ + rule.Or( + rule.MatchingLanguages("StringTemplate"), + regex.MustCompileRuby(`\$\w+[($]|(.)!\s*.+?\s*!\1||\[!\s*.+?\s*!\]|\{!\s*.+?\s*!\}`), + ), rule.Or( rule.MatchingLanguages("Smalltalk"), - regexp.MustCompile(`(?m)\A\s*[\[{(^"'\w#]|[a-zA-Z_]\w*\s*:=\s*[a-zA-Z_]\w*|class\s*>>\s*[a-zA-Z_]\w*|^[a-zA-Z_]\w*\s+[a-zA-Z_]\w*:|^Class\s*{|if(?:True|False):\s*\[`), + regex.MustCompileMultiline(`\A\s*[\[{(^"'\w#]|[a-zA-Z_]\w*\s*:=\s*[a-zA-Z_]\w*|class\s*>>\s*[a-zA-Z_]\w*|^[a-zA-Z_]\w*\s+[a-zA-Z_]\w*:|^Class\s*{|if(?:True|False):\s*\[`), ), }, ".star": &Heuristics{ rule.Or( rule.MatchingLanguages("STAR"), - regexp.MustCompile(`(?m)^loop_\s*$`), + regex.MustCompileMultiline(`^loop_\s*$`), ), rule.Always( rule.MatchingLanguages("Starlark"), ), }, - ".stl": &Heuristics{}, + ".stl": &Heuristics{ + rule.Or( + rule.MatchingLanguages("STL"), + regex.MustCompileRuby(`\A\s*solid(?=$|\s)(?m:.*?)\Rendsolid(?:$|\s)`), + ), + }, ".t": &Heuristics{ rule.Or( rule.MatchingLanguages("Perl"), - regexp.MustCompile(`(?m)\buse\s+(?:strict\b|v?5\.)`), + regex.MustCompileMultiline(`\buse\s+(?:strict\b|v?5\.)`), ), rule.Or( rule.MatchingLanguages("Raku"), - regexp.MustCompile(`(?m)^\s*(?:use\s+v6\b|\bmodule\b|\bmy\s+class\b)`), + regex.MustCompileMultiline(`^\s*(?:use\s+v6\b|\bmodule\b|\bmy\s+class\b)`), ), rule.Or( rule.MatchingLanguages("Turing"), - regexp.MustCompile(`(?m)^\s*%[ \t]+|^\s*var\s+\w+(\s*:\s*\w+)?\s*:=\s*\w+`), + regex.MustCompileMultiline(`^\s*%[ \t]+|^\s*var\s+\w+(\s*:\s*\w+)?\s*:=\s*\w+`), ), }, ".toc": &Heuristics{ rule.Or( rule.MatchingLanguages("World of Warcraft Addon Data"), - regexp.MustCompile(`(?m)^## |@no-lib-strip@`), + regex.MustCompileMultiline(`^## |@no-lib-strip@`), ), rule.Or( rule.MatchingLanguages("TeX"), - regexp.MustCompile(`(?m)^\\(contentsline|defcounter|beamer|boolfalse)`), + regex.MustCompileMultiline(`^\\(contentsline|defcounter|beamer|boolfalse)`), ), }, ".ts": &Heuristics{ rule.Or( rule.MatchingLanguages("XML"), - regexp.MustCompile(`(?m) `), + regex.MustCompileMultiline(`gap> `), ), rule.Always( rule.MatchingLanguages("Scilab"), @@ -1532,37 +1593,46 @@ var ContentHeuristics = map[string]*Heuristics{ ".tsx": &Heuristics{ rule.Or( rule.MatchingLanguages("TSX"), - regexp.MustCompile(`(?m)^\s*(import.+(from\s+|require\()['"]react|\/\/\/\s*]?[0-9]+|m)?|[ \t]ex)(?=:(?=[ \t]*set?[ \t][^\r\n:]+:)|:(?![ \t]*set?[ \t]))(?:(?:[ \t]*:[ \t]*|[ \t])\w*(?:[ \t]*=(?:[^\\\s]|\\.)*)?)*[ \t:](?:filetype|ft|syntax)[ \t]*=(help)(?=$|\s|:)`), + ), rule.Always( rule.MatchingLanguages("Text"), ), }, - ".url": &Heuristics{}, + ".url": &Heuristics{ + rule.Or( + rule.MatchingLanguages("INI"), + regex.MustCompileRuby(`^\[InternetShortcut\]\R(?>[^\s\[][^\n]*\R)*URL=`), + ), + }, ".v": &Heuristics{ rule.Or( rule.MatchingLanguages("Coq"), - regexp.MustCompile(`(?m)(?:^|\s)(?:Proof|Qed)\.(?:$|\s)|(?:^|\s)Require[ \t]+(Import|Export)\s`), + regex.MustCompileMultiline(`(?:^|\s)(?:Proof|Qed)\.(?:$|\s)|(?:^|\s)Require[ \t]+(Import|Export)\s`), ), rule.Or( rule.MatchingLanguages("Verilog"), - regexp.MustCompile(`(?m)^[ \t]*module\s+[^\s()]+\s+\#?\(|^[ \t]*`+"`"+`(?:define|ifdef|ifndef|include|timescale)|^[ \t]*always[ \t]+@|^[ \t]*initial[ \t]+(begin|@)`), + regex.MustCompileMultiline(`^[ \t]*module\s+[^\s()]+\s+\#?\(|^[ \t]*`+"`"+`(?:define|ifdef|ifndef|include|timescale)|^[ \t]*always[ \t]+@|^[ \t]*initial[ \t]+(begin|@)`), ), rule.Or( rule.MatchingLanguages("V"), - regexp.MustCompile(`(?m)\$(?:if|else)[ \t]|^[ \t]*fn\s+[^\s()]+\(.*?\).*?\{|^[ \t]*for\s*\{`), + regex.MustCompileMultiline(`\$(?:if|else)[ \t]|^[ \t]*fn\s+[^\s()]+\(.*?\).*?\{|^[ \t]*for\s*\{`), ), }, ".vba": &Heuristics{ rule.Or( rule.MatchingLanguages("Vim Script"), - regexp.MustCompile(`(?m)^UseVimball`), + regex.MustCompileMultiline(`^UseVimball`), ), rule.Always( rule.MatchingLanguages("VBA"), @@ -1571,35 +1641,35 @@ var ContentHeuristics = map[string]*Heuristics{ ".w": &Heuristics{ rule.Or( rule.MatchingLanguages("OpenEdge ABL"), - regexp.MustCompile(`(?m)&ANALYZE-SUSPEND _UIB-CODE-BLOCK _CUSTOM _DEFINITIONS`), + regex.MustCompileMultiline(`&ANALYZE-SUSPEND _UIB-CODE-BLOCK _CUSTOM _DEFINITIONS`), ), rule.Or( rule.MatchingLanguages("CWeb"), - regexp.MustCompile(`(?m)^@(<|\w+\.)`), + regex.MustCompileMultiline(`^@(<|\w+\.)`), ), }, ".x": &Heuristics{ rule.Or( rule.MatchingLanguages("DirectX 3D File"), - regexp.MustCompile(`(?m)^xof 030(2|3)(?:txt|bin|tzip|bzip)\b`), + regex.MustCompileMultiline(`^xof 030(2|3)(?:txt|bin|tzip|bzip)\b`), ), rule.Or( rule.MatchingLanguages("RPC"), - regexp.MustCompile(`(?m)\b(program|version)\s+\w+\s*{|\bunion\s+\w+\s+switch\s*\(`), + regex.MustCompileMultiline(`\b(program|version)\s+\w+\s*{|\bunion\s+\w+\s+switch\s*\(`), ), rule.Or( rule.MatchingLanguages("Logos"), - regexp.MustCompile(`(?m)^%(end|ctor|hook|group)\b`), + regex.MustCompileMultiline(`^%(end|ctor|hook|group)\b`), ), rule.Or( rule.MatchingLanguages("Linker Script"), - regexp.MustCompile(`(?m)OUTPUT_ARCH\(|OUTPUT_FORMAT\(|SECTIONS`), + regex.MustCompileMultiline(`OUTPUT_ARCH\(|OUTPUT_FORMAT\(|SECTIONS`), ), }, ".yaml": &Heuristics{ rule.Or( rule.MatchingLanguages("MiniYAML"), - regexp.MustCompile(`(?m)^\t+.*?[^\s:].*?:`), + regex.MustCompileMultiline(`^\t+.*?[^\s:].*?:`), ), rule.Always( rule.MatchingLanguages("YAML"), @@ -1608,7 +1678,7 @@ var ContentHeuristics = map[string]*Heuristics{ ".yy": &Heuristics{ rule.Or( rule.MatchingLanguages("JSON"), - regexp.MustCompile(`(?m)\"modelName\"\:\s*\"GM`), + regex.MustCompileMultiline(`\"modelName\"\:\s*\"GM`), ), rule.Always( rule.MatchingLanguages("Yacc"), diff --git a/data/rule/rule.go b/data/rule/rule.go index 7dc13bc..8ad4397 100644 --- a/data/rule/rule.go +++ b/data/rule/rule.go @@ -3,6 +3,15 @@ // with colliding extensions, based on regexps from Linguist data. package rule +import "github.com/go-enry/go-enry/v2/regex" + +// Matcher checks if the data matches (number of) pattern(s). +// Every heuristic rule below implements this interface. +// A regexp.Regexp satisfies this interface and can be used instead. +type Matcher interface { + Match(data []byte) bool +} + // Heuristic consist of (a number of) rules where each, if matches, // identifies content as belonging to a programming language(s). type Heuristic interface { @@ -10,15 +19,7 @@ type Heuristic interface { Languages() []string } -// Matcher checks if the data matches (number of) pattern. -// Every heuristic rule below implements this interface. -// A regexp.Regexp satisfies this interface and can be used instead. -type Matcher interface { - Match(data []byte) bool -} - -// languages struct incapsulate data common to every Matcher: all languages -// that it identifies. +// languages base struct with all the languages that a Matcher identifies. type languages struct { langs []string } @@ -33,6 +34,10 @@ func MatchingLanguages(langs ...string) languages { return languages{langs} } +func noLanguages() languages { + return MatchingLanguages([]string{}...) +} + // Implements a Heuristic. type or struct { languages @@ -40,14 +45,19 @@ type or struct { } // Or rule matches, if a single matching pattern exists. -// It receives only one pattern as it relies on compile-time optimization that -// represtes union with | inside a single regexp. -func Or(l languages, r Matcher) Heuristic { - return or{l, r} +// It receives only one pattern as it relies on optimization that +// represtes union with | inside a single regexp during code generation. +func Or(l languages, p Matcher) Heuristic { + //FIXME(bzz): this will not be the case as only some of the patterns may + // be non-RE2 => we shouldn't collate them not to loose the (accuracty of) whole rule + return or{l, p} } // Match implements rule.Matcher. func (r or) Match(data []byte) bool { + if runOnRE2AndRegexNotAccepted(r.pattern) { + return false + } return r.pattern.Match(data) } @@ -65,6 +75,9 @@ func And(l languages, m ...Matcher) Heuristic { // Match implements data.Matcher. func (r and) Match(data []byte) bool { for _, p := range r.patterns { + if runOnRE2AndRegexNotAccepted(p) { + continue + } if !p.Match(data) { return false } @@ -86,6 +99,9 @@ func Not(l languages, r ...Matcher) Heuristic { // Match implements data.Matcher. func (r not) Match(data []byte) bool { for _, p := range r.Patterns { + if runOnRE2AndRegexNotAccepted(p) { + continue + } if p.Match(data) { return false } @@ -107,3 +123,11 @@ func Always(l languages) Heuristic { func (r always) Match(data []byte) bool { return true } + +// Checks if a regex syntax isn't accepted by RE2 engine. +// It's nil by construction from regex.MustCompileRuby but +// is used here as a Matcher interface wich itself is non-nil. +func runOnRE2AndRegexNotAccepted(re Matcher) bool { + v, ok := re.(regex.EnryRegexp) + return ok && v == nil +} diff --git a/data/rule/rule_test.go b/data/rule/rule_test.go index 208fc07..ce53b67 100644 --- a/data/rule/rule_test.go +++ b/data/rule/rule_test.go @@ -1,39 +1,71 @@ package rule import ( - "regexp" "testing" + "github.com/go-enry/go-enry/v2/regex" "github.com/stretchr/testify/assert" ) const lang = "ActionScript" -var fixtures = []struct { +type fixture struct { name string rule Heuristic numLangs int - matching string + match string noMatch string -}{ - {"Always", Always(MatchingLanguages(lang)), 1, "a", ""}, - {"Not", Not(MatchingLanguages(lang), regexp.MustCompile(`a`)), 1, "b", "a"}, - {"And", And(MatchingLanguages(lang), regexp.MustCompile(`a`), regexp.MustCompile(`b`)), 1, "ab", "a"}, - {"Or", Or(MatchingLanguages(lang), regexp.MustCompile(`a|b`)), 1, "ab", "c"}, } -func TestRules(t *testing.T) { - for _, f := range fixtures { - t.Run(f.name, func(t *testing.T) { - assert.NotNil(t, f.rule) - assert.NotNil(t, f.rule.Languages()) - assert.Equal(t, f.numLangs, len(f.rule.Languages())) - assert.Truef(t, f.rule.Match([]byte(f.matching)), - "'%s' is expected to .Match() by rule %s%v", f.matching, f.name, f.rule) - if f.noMatch != "" { - assert.Falsef(t, f.rule.Match([]byte(f.noMatch)), - "'%s' is expected NOT to .Match() by rule %s%v", f.noMatch, f.name, f.rule) - } +var specificFixtures = map[string][]fixture{ + "": { // cases that don't vary between the engines + {"Always", Always(MatchingLanguages(lang)), 1, "a", ""}, + {"Not", Not(MatchingLanguages(lang), regex.MustCompile(`a`)), 1, "b", "a"}, + {"And", And(MatchingLanguages(lang), regex.MustCompile(`a`), regex.MustCompile(`b`)), 1, "ab", "a"}, + {"Or", Or(MatchingLanguages(lang), regex.MustCompile(`a|b`)), 1, "ab", "c"}, + // the results of these depend on the regex engine + // {"NilOr", Or(noLanguages(), regex.MustCompileRuby(``)), 0, "", "a"}, + // {"NilNot", Not(noLanguages(), regex.MustCompileRuby(`a`)), 0, "", "a"}, + }, + regex.RE2: { + {"NilAnd", And(noLanguages(), regex.MustCompileRuby(`a`), regex.MustCompile(`b`)), 0, "b", "a"}, + {"NilNot", Not(noLanguages(), regex.MustCompileRuby(`a`), regex.MustCompile(`b`)), 0, "c", "b"}, + }, + regex.Oniguruma: { + {"NilAnd", And(noLanguages(), regex.MustCompileRuby(`a`), regex.MustCompile(`b`)), 0, "ab", "c"}, + {"NilNot", Not(noLanguages(), regex.MustCompileRuby(`a`), regex.MustCompile(`b`)), 0, "c", "a"}, + {"NilOr", Or(noLanguages(), regex.MustCompileRuby(`a`) /*, regexp.MustCompile(`b`)*/), 0, "a", "b"}, + }, +} + +func testRulesForEngine(t *testing.T, engine string) { + if engine != "" && regex.Name != engine { + return + } + for _, f := range specificFixtures[engine] { + t.Run(engine+f.name, func(t *testing.T) { + check(t, f) }) } } + +func TestRules(t *testing.T) { + //TODO(bzz): can all be run in parallel + testRulesForEngine(t, "") + testRulesForEngine(t, regex.RE2) + testRulesForEngine(t, regex.Oniguruma) +} + +func check(t *testing.T, f fixture) { + assert.NotNil(t, f.rule) + assert.NotNil(t, f.rule.Languages()) + assert.Equal(t, f.numLangs, len(f.rule.Languages())) + if f.match != "" { + assert.Truef(t, f.rule.Match([]byte(f.match)), + "'%s' is expected to .Match() by rule %s%v", f.match, f.name, f.rule) + } + if f.noMatch != "" { + assert.Falsef(t, f.rule.Match([]byte(f.noMatch)), + "'%s' is expected NOT to .Match() by rule %s%v", f.noMatch, f.name, f.rule) + } +} diff --git a/enry.go b/enry.go index 769ca4f..0cf3c31 100644 --- a/enry.go +++ b/enry.go @@ -1,15 +1,15 @@ /* - Package enry implements multiple strategies for programming language identification. +Package enry identifies programming languages. - Identification is made based on file name and file content using a service - of strategies to narrow down possible option. - Each strategy is available as a separate API call, as well as a main enty point +Identification is based on file name and content using a series +of strategies to narrow down possible options. +Each strategy is available as a separate API call, as well as though the main enty point: - GetLanguage(filename string, content []byte) (language string) + GetLanguage(filename string, content []byte) (language string) - It is a port of the https://github.com/github/linguist from Ruby. - Upstream Linguist YAML files are used to generate datastructures for data - package. +It is a port of the https://github.com/github/linguist from Ruby. +Upstream Linguist YAML files are used to generate datastructures for data +package. */ package enry // import "github.com/go-enry/go-enry/v2" diff --git a/go.mod b/go.mod index db58fd5..c4fb1aa 100644 --- a/go.mod +++ b/go.mod @@ -4,6 +4,6 @@ go 1.14 require ( github.com/go-enry/go-oniguruma v1.2.1 - github.com/stretchr/testify v1.3.0 + github.com/stretchr/testify v1.8.1 gopkg.in/yaml.v2 v2.2.8 ) diff --git a/go.sum b/go.sum index 451dfd6..f2da5aa 100644 --- a/go.sum +++ b/go.sum @@ -1,16 +1,21 @@ -github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/go-enry/go-oniguruma v1.2.0 h1:oBO9XC1IDT9+AoWW5oFsa/7gFeOPacEqDbyXZKWXuDs= -github.com/go-enry/go-oniguruma v1.2.0/go.mod h1:bWDhYP+S6xZQgiRL7wlTScFYBe023B6ilRZbCAD5Hf4= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/go-enry/go-oniguruma v1.2.1 h1:k8aAMuJfMrqm/56SG2lV9Cfti6tC4x8673aHCcBk+eo= github.com/go-enry/go-oniguruma v1.2.1/go.mod h1:bWDhYP+S6xZQgiRL7wlTScFYBe023B6ilRZbCAD5Hf4= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/stretchr/objx v0.1.0 h1:4G4v2dO3VZwixGIRoQ5Lfboy6nUhCyYzaqnIAPPhYs4= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q= -github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= +github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10= gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/internal/code-generator/assets/content.go.tmpl b/internal/code-generator/assets/content.go.tmpl index f03669e..dd83166 100644 --- a/internal/code-generator/assets/content.go.tmpl +++ b/internal/code-generator/assets/content.go.tmpl @@ -1,9 +1,8 @@ package data import ( - "regexp" - "github.com/go-enry/go-enry/v2/data/rule" + "github.com/go-enry/go-enry/v2/regex" ) var ContentHeuristics = map[string]*Heuristics{ @@ -27,12 +26,12 @@ var ContentHeuristics = map[string]*Heuristics{ {{- else if eq .Op "Or" -}} rule.Or( {{ template "Languages" .Langs -}} - regexp.MustCompile({{ .Pattern | stringVal }}), + {{ template "mustCompile" . }} ), {{- else if eq .Op "Not" -}} rule.Not( {{ template "Languages" .Langs -}} - regexp.MustCompile({{ .Pattern | stringVal }}), + {{ template "mustCompile" . }} ), {{- else if eq .Op "Always" -}} rule.Always( @@ -49,3 +48,11 @@ var ContentHeuristics = map[string]*Heuristics{ rule.MatchingLanguages(""), {{end -}} {{end}} + +{{define "mustCompile" -}} + {{ if .IsRE2 -}} + regex.MustCompileMultiline({{ .Pattern | stringVal }}), + {{- else -}} + regex.MustCompileRuby({{ .Pattern | stringVal }}), + {{ end -}} +{{end}} diff --git a/internal/code-generator/assets/vendor.go.tmpl b/internal/code-generator/assets/vendor.go.tmpl index 4b5274e..19eb2b3 100644 --- a/internal/code-generator/assets/vendor.go.tmpl +++ b/internal/code-generator/assets/vendor.go.tmpl @@ -2,11 +2,21 @@ package data import "github.com/go-enry/go-enry/v2/regex" +{{define "mustCompile" -}} + {{ if isRE2 . -}} + regex.MustCompile({{ . | stringVal }}) + {{- else -}} + regex.MustCompileRuby({{ . | stringVal }}) + {{- end -}} +{{end}} + var VendorMatchers = []regex.EnryRegexp{ - {{range $regexp := . -}} - regex.MustCompile(`{{ $regexp }}`), + {{range $re := . -}} + {{ template "mustCompile" $re }}, {{end -}} } // FastVendorMatcher is equivalent to matching any of the VendorMatchers. -var FastVendorMatcher = regex.MustCompile(`{{ optimize . }}`) \ No newline at end of file +{{with $singleRE := collateAllRegexps . -}} +var FastVendorMatcher = {{template "mustCompile" $singleRE}} +{{end}} \ No newline at end of file diff --git a/internal/code-generator/generator/generator.go b/internal/code-generator/generator/generator.go index 1ebb8ca..b081d47 100644 --- a/internal/code-generator/generator/generator.go +++ b/internal/code-generator/generator/generator.go @@ -3,7 +3,6 @@ package generator import ( - "bytes" "fmt" "go/format" "io" @@ -22,12 +21,15 @@ type File func(fileToParse, samplesDir, outPath, tmplPath, tmplName, commit stri func formatedWrite(outPath string, source []byte) error { formatedSource, err := format.Source(source) if err != nil { - return err + err = fmt.Errorf("'go fmt' fails on %v", err) + // write un-formatter source to simplify debugging + formatedSource = source } + if err := ioutil.WriteFile(outPath, formatedSource, 0666); err != nil { return err } - return nil + return err } func executeTemplate(w io.Writer, name, path, commit string, fmap template.FuncMap, data interface{}) error { @@ -40,35 +42,21 @@ func executeTemplate(w io.Writer, name, path, commit string, fmap template.FuncM val = strings.ReplaceAll(val, "`", "`+\"`\"+`") return fmt.Sprintf("`%s`", val) } - - const headerTmpl = "header.go.tmpl" - headerPath := filepath.Join(filepath.Dir(path), headerTmpl) - - h := template.Must(template.New(headerTmpl).Funcs(template.FuncMap{ - "getCommit": getCommit, - "stringVal": stringVal, - }).ParseFiles(headerPath)) - - buf := bytes.NewBuffer(nil) - if err := h.Execute(buf, data); err != nil { - return err - } - if fmap == nil { fmap = make(template.FuncMap) } fmap["getCommit"] = getCommit fmap["stringVal"] = stringVal + fmap["isRE2"] = isRE2 + + const headerTmpl = "header.go.tmpl" + headerPath := filepath.Join(filepath.Dir(path), headerTmpl) + + h := template.Must(template.New(headerTmpl).Funcs(fmap).ParseFiles(headerPath)) + if err := h.Execute(w, data); err != nil { + return err + } t := template.Must(template.New(name).Funcs(fmap).ParseFiles(path)) - if err := t.Execute(buf, data); err != nil { - return err - } - - src, err := format.Source(buf.Bytes()) - if err != nil { - return err - } - _, err = w.Write(src) - return err + return t.Execute(w, data) } diff --git a/internal/code-generator/generator/heuristics.go b/internal/code-generator/generator/heuristics.go index b226b8b..65df2a1 100644 --- a/internal/code-generator/generator/heuristics.go +++ b/internal/code-generator/generator/heuristics.go @@ -70,25 +70,27 @@ func loadRule(namedPatterns map[string]StringArray, rule *Rule) *LanguagePattern subp := loadRule(namedPatterns, r) subPatterns = append(subPatterns, subp) } - result = &LanguagePattern{"And", rule.Languages, "", subPatterns} + result = &LanguagePattern{"And", rule.Languages, "", subPatterns, true} } else if len(rule.Pattern) != 0 { // OrPattern - conjunction := strings.Join(rule.Pattern, orPipe) - pattern := convertToValidRegexp(conjunction) - result = &LanguagePattern{"Or", rule.Languages, pattern, nil} + // FIXME(bzz): this optimization should only be applied if each pattern isRE2! + pattern := strings.Join(rule.Pattern, orPipe) + + // TODO(bzz): handle the common case Or(len(Languages)==0) better + // e.g. by emiting `rule.Rule(...)` instead of + // an (ugly) `rule.Or( rule.MatchingLanguages(""), ... )` + result = &LanguagePattern{"Or", rule.Languages, pattern, nil, isRE2(pattern)} } else if rule.NegativePattern != "" { // NotPattern - pattern := convertToValidRegexp(rule.NegativePattern) - result = &LanguagePattern{"Not", rule.Languages, pattern, nil} + pattern := rule.NegativePattern + result = &LanguagePattern{"Not", rule.Languages, pattern, nil, isRE2(pattern)} } else if rule.NamedPattern != "" { // Named OrPattern - conjunction := strings.Join(namedPatterns[rule.NamedPattern], orPipe) - pattern := convertToValidRegexp(conjunction) - result = &LanguagePattern{"Or", rule.Languages, pattern, nil} + pattern := strings.Join(namedPatterns[rule.NamedPattern], orPipe) + result = &LanguagePattern{"Or", rule.Languages, pattern, nil, isRE2(pattern)} } else { // AlwaysPattern - result = &LanguagePattern{"Always", rule.Languages, "", nil} + result = &LanguagePattern{"Always", rule.Languages, "", nil, true} } - if isUnsupportedRegexpSyntax(result.Pattern) { - log.Printf("skipping rule: language:'%q', rule:'%q'\n", rule.Languages, result.Pattern) - return nil + if !isRE2(result.Pattern) { + log.Printf("RE2 incompatible syntax for heuristic language:'%s', rule:'%s'\n", rule.Languages, result.Pattern) } return result } @@ -100,6 +102,7 @@ type LanguagePattern struct { Langs []string Pattern string Rules []*LanguagePattern + IsRE2 bool } type Heuristics struct { @@ -125,7 +128,7 @@ type Patterns struct { } // StringArray is workaround for parsing named_pattern, -// wich is sometimes arry and sometimes not. +// wich is sometimes an array and sometimes is not. // See https://github.com/go-yaml/yaml/issues/100 type StringArray []string @@ -173,8 +176,6 @@ func isUnsupportedRegexpSyntax(reg string) bool { (strings.HasPrefix(reg, multilinePrefix+`/`) && strings.HasSuffix(reg, `/`)) } -// convertToValidRegexp converts Ruby regexp syntax to RE2 equivalent. -// Does not work with Ruby regexp literals. -func convertToValidRegexp(rubyRegexp string) string { - return multilinePrefix + rubyRegexp +func isRE2(s string) bool { + return !isUnsupportedRegexpSyntax(s) } diff --git a/internal/code-generator/generator/test_files/content.gold b/internal/code-generator/generator/test_files/content.gold index d191799..ca53b77 100644 --- a/internal/code-generator/generator/test_files/content.gold +++ b/internal/code-generator/generator/test_files/content.gold @@ -4,9 +4,8 @@ package data import ( - "regexp" - "github.com/go-enry/go-enry/v2/data/rule" + "github.com/go-enry/go-enry/v2/regex" ) var ContentHeuristics = map[string]*Heuristics{ @@ -15,31 +14,31 @@ var ContentHeuristics = map[string]*Heuristics{ rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), ), ), rule.And( rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), + regex.MustCompileMultiline(`^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), ), ), rule.Or( rule.MatchingLanguages("Roff"), - regexp.MustCompile(`(?m)^\.(?:[A-Za-z]{2}(?:\s|$)|\\")`), + regex.MustCompileMultiline(`^\.(?:[A-Za-z]{2}(?:\s|$)|\\")`), ), }, ".1in": &Heuristics{ @@ -47,26 +46,26 @@ var ContentHeuristics = map[string]*Heuristics{ rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), ), ), rule.And( rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), + regex.MustCompileMultiline(`^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), ), ), rule.Always( @@ -78,26 +77,26 @@ var ContentHeuristics = map[string]*Heuristics{ rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), ), ), rule.And( rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), + regex.MustCompileMultiline(`^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), ), ), rule.Always( @@ -109,26 +108,26 @@ var ContentHeuristics = map[string]*Heuristics{ rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), ), ), rule.And( rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), + regex.MustCompileMultiline(`^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), ), ), rule.Always( @@ -140,31 +139,31 @@ var ContentHeuristics = map[string]*Heuristics{ rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), ), ), rule.And( rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), + regex.MustCompileMultiline(`^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), ), ), rule.Or( rule.MatchingLanguages("Roff"), - regexp.MustCompile(`(?m)^\.(?:[A-Za-z]{2}(?:\s|$)|\\")`), + regex.MustCompileMultiline(`^\.(?:[A-Za-z]{2}(?:\s|$)|\\")`), ), }, ".3": &Heuristics{ @@ -172,31 +171,31 @@ var ContentHeuristics = map[string]*Heuristics{ rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), ), ), rule.And( rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), + regex.MustCompileMultiline(`^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), ), ), rule.Or( rule.MatchingLanguages("Roff"), - regexp.MustCompile(`(?m)^\.(?:[A-Za-z]{2}(?:\s|$)|\\")`), + regex.MustCompileMultiline(`^\.(?:[A-Za-z]{2}(?:\s|$)|\\")`), ), }, ".3in": &Heuristics{ @@ -204,26 +203,26 @@ var ContentHeuristics = map[string]*Heuristics{ rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), ), ), rule.And( rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), + regex.MustCompileMultiline(`^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), ), ), rule.Always( @@ -235,26 +234,26 @@ var ContentHeuristics = map[string]*Heuristics{ rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), ), ), rule.And( rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), + regex.MustCompileMultiline(`^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), ), ), rule.Always( @@ -266,26 +265,26 @@ var ContentHeuristics = map[string]*Heuristics{ rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), ), ), rule.And( rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), + regex.MustCompileMultiline(`^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), ), ), rule.Always( @@ -297,26 +296,26 @@ var ContentHeuristics = map[string]*Heuristics{ rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), ), ), rule.And( rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), + regex.MustCompileMultiline(`^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), ), ), rule.Always( @@ -328,26 +327,26 @@ var ContentHeuristics = map[string]*Heuristics{ rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), ), ), rule.And( rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), + regex.MustCompileMultiline(`^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), ), ), rule.Always( @@ -359,26 +358,26 @@ var ContentHeuristics = map[string]*Heuristics{ rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), ), ), rule.And( rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), + regex.MustCompileMultiline(`^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), ), ), rule.Always( @@ -390,31 +389,31 @@ var ContentHeuristics = map[string]*Heuristics{ rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), ), ), rule.And( rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), + regex.MustCompileMultiline(`^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), ), ), rule.Or( rule.MatchingLanguages("Roff"), - regexp.MustCompile(`(?m)^\.(?:[A-Za-z]{2}(?:\s|$)|\\")`), + regex.MustCompileMultiline(`^\.(?:[A-Za-z]{2}(?:\s|$)|\\")`), ), }, ".5": &Heuristics{ @@ -422,31 +421,31 @@ var ContentHeuristics = map[string]*Heuristics{ rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), ), ), rule.And( rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), + regex.MustCompileMultiline(`^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), ), ), rule.Or( rule.MatchingLanguages("Roff"), - regexp.MustCompile(`(?m)^\.(?:[A-Za-z]{2}(?:\s|$)|\\")`), + regex.MustCompileMultiline(`^\.(?:[A-Za-z]{2}(?:\s|$)|\\")`), ), }, ".6": &Heuristics{ @@ -454,31 +453,31 @@ var ContentHeuristics = map[string]*Heuristics{ rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), ), ), rule.And( rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), + regex.MustCompileMultiline(`^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), ), ), rule.Or( rule.MatchingLanguages("Roff"), - regexp.MustCompile(`(?m)^\.(?:[A-Za-z]{2}(?:\s|$)|\\")`), + regex.MustCompileMultiline(`^\.(?:[A-Za-z]{2}(?:\s|$)|\\")`), ), }, ".7": &Heuristics{ @@ -486,31 +485,31 @@ var ContentHeuristics = map[string]*Heuristics{ rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), ), ), rule.And( rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), + regex.MustCompileMultiline(`^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), ), ), rule.Or( rule.MatchingLanguages("Roff"), - regexp.MustCompile(`(?m)^\.(?:[A-Za-z]{2}(?:\s|$)|\\")`), + regex.MustCompileMultiline(`^\.(?:[A-Za-z]{2}(?:\s|$)|\\")`), ), }, ".8": &Heuristics{ @@ -518,31 +517,31 @@ var ContentHeuristics = map[string]*Heuristics{ rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), ), ), rule.And( rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), + regex.MustCompileMultiline(`^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), ), ), rule.Or( rule.MatchingLanguages("Roff"), - regexp.MustCompile(`(?m)^\.(?:[A-Za-z]{2}(?:\s|$)|\\")`), + regex.MustCompileMultiline(`^\.(?:[A-Za-z]{2}(?:\s|$)|\\")`), ), }, ".9": &Heuristics{ @@ -550,31 +549,31 @@ var ContentHeuristics = map[string]*Heuristics{ rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), ), ), rule.And( rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), + regex.MustCompileMultiline(`^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), ), ), rule.Or( rule.MatchingLanguages("Roff"), - regexp.MustCompile(`(?m)^\.(?:[A-Za-z]{2}(?:\s|$)|\\")`), + regex.MustCompileMultiline(`^\.(?:[A-Za-z]{2}(?:\s|$)|\\")`), ), }, ".al": &Heuristics{ @@ -582,38 +581,43 @@ var ContentHeuristics = map[string]*Heuristics{ rule.MatchingLanguages("AL"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)\b(?i:(CODEUNIT|PAGE|PAGEEXTENSION|PAGECUSTOMIZATION|DOTNET|ENUM|ENUMEXTENSION|VALUE|QUERY|REPORT|TABLE|TABLEEXTENSION|XMLPORT|PROFILE|CONTROLADDIN))\b`), + regex.MustCompileMultiline(`\b(?i:(CODEUNIT|PAGE|PAGEEXTENSION|PAGECUSTOMIZATION|DOTNET|ENUM|ENUMEXTENSION|VALUE|QUERY|REPORT|TABLE|TABLEEXTENSION|XMLPORT|PROFILE|CONTROLADDIN))\b`), ), ), rule.Always( rule.MatchingLanguages("Perl"), ), }, - ".as": &Heuristics{}, + ".as": &Heuristics{ + rule.Or( + rule.MatchingLanguages("ActionScript"), + regex.MustCompileRuby(`^\s*(?:package(?:\s+[\w.]+)?\s+(?:{|$)|import\s+[\w.*]+\s*;|(?=.*?(?:intrinsic|extends))(intrinsic\s+)?class\s+[\w<>.]+(?:\s+extends\s+[\w<>.]+)?|(?:(?:public|protected|private|static)\s+)*(?:(?:var|const|local)\s+\w+\s*:\s*[\w<>.]+(?:\s*=.*)?\s*;|function\s+\w+\s*\((?:\s*\w+\s*:\s*[\w<>.]+\s*(,\s*\w+\s*:\s*[\w<>.]+\s*)*)?\)))`), + ), + }, ".asc": &Heuristics{ rule.Or( rule.MatchingLanguages("Public Key"), - regexp.MustCompile(`(?m)^(----[- ]BEGIN|ssh-(rsa|dss)) `), + regex.MustCompileMultiline(`^(----[- ]BEGIN|ssh-(rsa|dss)) `), ), rule.Or( rule.MatchingLanguages("AsciiDoc"), - regexp.MustCompile(`(?m)^[=-]+(\s|\n)|{{[A-Za-z]`), + regex.MustCompileMultiline(`^[=-]+(\s|\n)|{{[A-Za-z]`), ), rule.Or( rule.MatchingLanguages("AGS Script"), - regexp.MustCompile(`(?m)^(\/\/.+|((import|export)\s+)?(function|int|float|char)\s+((room|repeatedly|on|game)_)?([A-Za-z]+[A-Za-z_0-9]+)\s*[;\(])`), + regex.MustCompileMultiline(`^(\/\/.+|((import|export)\s+)?(function|int|float|char)\s+((room|repeatedly|on|game)_)?([A-Za-z]+[A-Za-z_0-9]+)\s*[;\(])`), ), }, ".asm": &Heuristics{ rule.Or( rule.MatchingLanguages("Motorola 68K Assembly"), - regexp.MustCompile(`(?m)(?im)\bmoveq(?:\.l)?\s+#(?:\$-?[0-9a-f]{1,3}|%[0-1]{1,8}|-?[0-9]{1,3}),\s*d[0-7]\b|(?im)^\s*move(?:\.[bwl])?\s+(?:sr|usp),\s*[^\s]+|(?im)^\s*move\.[bwl]\s+.*\b[ad]\d|(?im)^\s*movem\.[bwl]\b|(?im)^\s*move[mp](?:\.[wl])?\b|(?im)^\s*btst\b|(?im)^\s*dbra\b`), + regex.MustCompileMultiline(`(?im)\bmoveq(?:\.l)?\s+#(?:\$-?[0-9a-f]{1,3}|%[0-1]{1,8}|-?[0-9]{1,3}),\s*d[0-7]\b|(?im)^\s*move(?:\.[bwl])?\s+(?:sr|usp),\s*[^\s]+|(?im)^\s*move\.[bwl]\s+.*\b[ad]\d|(?im)^\s*movem\.[bwl]\b|(?im)^\s*move[mp](?:\.[wl])?\b|(?im)^\s*btst\b|(?im)^\s*dbra\b`), ), }, ".asy": &Heuristics{ rule.Or( rule.MatchingLanguages("LTspice Symbol"), - regexp.MustCompile(`(?m)^SymbolType[ \t]`), + regex.MustCompileMultiline(`^SymbolType[ \t]`), ), rule.Always( rule.MatchingLanguages("Asymptote"), @@ -622,193 +626,211 @@ var ContentHeuristics = map[string]*Heuristics{ ".bas": &Heuristics{ rule.Or( rule.MatchingLanguages("FreeBasic"), - regexp.MustCompile(`(?m)^[ \t]*#(?:define|endif|endmacro|ifn?def|if|include|lang|macro)\s`), + regex.MustCompileMultiline(`^[ \t]*#(?:define|endif|endmacro|ifn?def|if|include|lang|macro)\s`), ), rule.Or( rule.MatchingLanguages("BASIC"), - regexp.MustCompile(`(?m)^\s*\d+`), + regex.MustCompileMultiline(`^\s*\d+`), ), }, ".bb": &Heuristics{ rule.Or( rule.MatchingLanguages("BlitzBasic"), - regexp.MustCompile(`(?m)(<^\s*; |End Function)`), + regex.MustCompileMultiline(`(<^\s*; |End Function)`), ), rule.Or( rule.MatchingLanguages("BitBake"), - regexp.MustCompile(`(?m)^\s*(# |include|require)\b`), + regex.MustCompileMultiline(`^\s*(# |include|require)\b`), ), rule.Or( rule.MatchingLanguages("Clojure"), - regexp.MustCompile(`(?m)\((def|defn|defmacro|let)\s`), + regex.MustCompileMultiline(`\((def|defn|defmacro|let)\s`), ), }, ".bi": &Heuristics{ rule.Or( rule.MatchingLanguages("FreeBasic"), - regexp.MustCompile(`(?m)^[ \t]*#(?:define|endif|endmacro|ifn?def|if|include|lang|macro)\s`), + regex.MustCompileMultiline(`^[ \t]*#(?:define|endif|endmacro|ifn?def|if|include|lang|macro)\s`), + ), + }, + ".bs": &Heuristics{ + rule.Or( + rule.MatchingLanguages("Bikeshed"), + regex.MustCompileRuby(`^(?i:\r\n]*>`), + ), + rule.Or( + rule.MatchingLanguages("BrighterScript"), + regex.MustCompileRuby(`(?i:^\s*(?=^sub\s)(?:sub\s*\w+\(.*?\))|(?::\s*sub\(.*?\))$)|(?i:^\s*(end\ssub)$)|(?i:^\s*(?=^function\s)(?:function\s*\w+\(.*?\)\s*as\s*\w*)|(?::\s*function\(.*?\)\s*as\s*\w*)$)|(?i:^\s*(end\sfunction)$)`), ), }, - ".bs": &Heuristics{}, ".builds": &Heuristics{ rule.Or( rule.MatchingLanguages("XML"), - regexp.MustCompile(`(?m)^(\s*)(?i:(?:autoexec|private)\s+){0,2}function\s+(?>(?:autoexec|private)\s+){0,2}\w+\s*\(|\b(?:level|self)[ \t]+thread[ \t]+(?:\[\[[ \t]*(?>\w+\.)*\w+[ \t]*\]\]|\w+)[ \t]*\([^\r\n\)]*\)[ \t]*;|^[ \t]*#[ \t]*(?:precache|using_animtree)[ \t]*\(`), ), }, - ".csc": &Heuristics{}, ".csl": &Heuristics{ rule.Or( rule.MatchingLanguages("XML"), - regexp.MustCompile(`(?m)(?i:^\s*(\<\?xml|xmlns))`), + regex.MustCompileMultiline(`(?i:^\s*(\<\?xml|xmlns))`), ), rule.Or( rule.MatchingLanguages("Kusto"), - regexp.MustCompile(`(?m)(^\|\s*(where|extend|project|limit|summarize))|(^\.\w+)`), + regex.MustCompileMultiline(`(^\|\s*(where|extend|project|limit|summarize))|(^\.\w+)`), ), }, ".d": &Heuristics{ rule.Or( rule.MatchingLanguages("D"), - regexp.MustCompile(`(?m)^module\s+[\w.]*\s*;|import\s+[\w\s,.:]*;|\w+\s+\w+\s*\(.*\)(?:\(.*\))?\s*{[^}]*}|unittest\s*(?:\(.*\))?\s*{[^}]*}`), + regex.MustCompileMultiline(`^module\s+[\w.]*\s*;|import\s+[\w\s,.:]*;|\w+\s+\w+\s*\(.*\)(?:\(.*\))?\s*{[^}]*}|unittest\s*(?:\(.*\))?\s*{[^}]*}`), ), rule.Or( rule.MatchingLanguages("DTrace"), - regexp.MustCompile(`(?m)^(\w+:\w*:\w*:\w*|BEGIN|END|provider\s+|(tick|profile)-\w+\s+{[^}]*}|#pragma\s+D\s+(option|attributes|depends_on)\s|#pragma\s+ident\s)`), + regex.MustCompileMultiline(`^(\w+:\w*:\w*:\w*|BEGIN|END|provider\s+|(tick|profile)-\w+\s+{[^}]*}|#pragma\s+D\s+(option|attributes|depends_on)\s|#pragma\s+ident\s)`), ), rule.Or( rule.MatchingLanguages("Makefile"), - regexp.MustCompile(`(?m)([\/\\].*:\s+.*\s\\$|: \\$|^[ %]:|^[\w\s\/\\.]+\w+\.\w+\s*:\s+[\w\s\/\\.]+\w+\.\w+)`), + regex.MustCompileMultiline(`([\/\\].*:\s+.*\s\\$|: \\$|^[ %]:|^[\w\s\/\\.]+\w+\.\w+\s*:\s+[\w\s\/\\.]+\w+\.\w+)`), ), }, ".dsp": &Heuristics{ rule.Or( rule.MatchingLanguages("Microsoft Developer Studio Project"), - regexp.MustCompile(`(?m)# Microsoft Developer Studio Generated Build File`), + regex.MustCompileMultiline(`# Microsoft Developer Studio Generated Build File`), ), rule.Or( rule.MatchingLanguages("Faust"), - regexp.MustCompile(`(?m)\bprocess\s*[(=]|\b(library|import)\s*\(\s*"|\bdeclare\s+(name|version|author|copyright|license)\s+"`), + regex.MustCompileMultiline(`\bprocess\s*[(=]|\b(library|import)\s*\(\s*"|\bdeclare\s+(name|version|author|copyright|license)\s+"`), ), }, ".e": &Heuristics{ rule.Or( rule.MatchingLanguages("E"), - regexp.MustCompile(`(?m)^\s*(def|var)\s+(.+):=|^\s*(def|to)\s+(\w+)(\(.+\))?\s+{|^\s*(when)\s+(\(.+\))\s+->\s+{`), + regex.MustCompileMultiline(`^\s*(def|var)\s+(.+):=|^\s*(def|to)\s+(\w+)(\(.+\))?\s+{|^\s*(when)\s+(\(.+\))\s+->\s+{`), ), rule.Or( rule.MatchingLanguages("Eiffel"), - regexp.MustCompile(`(?m)^\s*\w+\s*(?:,\s*\w+)*[:]\s*\w+\s|^\s*\w+\s*(?:\(\s*\w+[:][^)]+\))?(?:[:]\s*\w+)?(?:--.+\s+)*\s+(?:do|local)\s|^\s*(?:across|deferred|elseif|ensure|feature|from|inherit|inspect|invariant|note|once|require|undefine|variant|when)\s*$`), + regex.MustCompileMultiline(`^\s*\w+\s*(?:,\s*\w+)*[:]\s*\w+\s|^\s*\w+\s*(?:\(\s*\w+[:][^)]+\))?(?:[:]\s*\w+)?(?:--.+\s+)*\s+(?:do|local)\s|^\s*(?:across|deferred|elseif|ensure|feature|from|inherit|inspect|invariant|note|once|require|undefine|variant|when)\s*$`), ), rule.Or( rule.MatchingLanguages("Euphoria"), - regexp.MustCompile(`(?m)^\s*namespace\s|^\s*(?:public\s+)?include\s|^\s*(?:(?:public|export|global)\s+)?(?:atom|constant|enum|function|integer|object|procedure|sequence|type)\s`), + regex.MustCompileMultiline(`^\s*namespace\s|^\s*(?:public\s+)?include\s|^\s*(?:(?:public|export|global)\s+)?(?:atom|constant|enum|function|integer|object|procedure|sequence|type)\s`), ), }, ".ecl": &Heuristics{ rule.Or( rule.MatchingLanguages("ECLiPSe"), - regexp.MustCompile(`(?m)^[^#]+:-`), + regex.MustCompileMultiline(`^[^#]+:-`), ), rule.Or( rule.MatchingLanguages("ECL"), - regexp.MustCompile(`(?m):=`), + regex.MustCompileMultiline(`:=`), ), }, ".es": &Heuristics{ rule.Or( rule.MatchingLanguages("Erlang"), - regexp.MustCompile(`(?m)^\s*(?:%%|main\s*\(.*?\)\s*->)`), + regex.MustCompileMultiline(`^\s*(?:%%|main\s*\(.*?\)\s*->)`), + ), + rule.Or( + rule.MatchingLanguages("JavaScript"), + regex.MustCompileRuby(`(?m:\/\/|("|')use strict\1|export\s+default\s|\/\*.*?\*\/)`), ), }, ".ex": &Heuristics{ rule.Or( rule.MatchingLanguages("Elixir"), - regexp.MustCompile(`(?m)^\s*@moduledoc\s|^\s*(?:cond|import|quote|unless)\s|^\s*def(?:exception|impl|macro|module|protocol)[(\s]`), + regex.MustCompileMultiline(`^\s*@moduledoc\s|^\s*(?:cond|import|quote|unless)\s|^\s*def(?:exception|impl|macro|module|protocol)[(\s]`), ), rule.Or( rule.MatchingLanguages("Euphoria"), - regexp.MustCompile(`(?m)^\s*namespace\s|^\s*(?:public\s+)?include\s|^\s*(?:(?:public|export|global)\s+)?(?:atom|constant|enum|function|integer|object|procedure|sequence|type)\s`), + regex.MustCompileMultiline(`^\s*namespace\s|^\s*(?:public\s+)?include\s|^\s*(?:(?:public|export|global)\s+)?(?:atom|constant|enum|function|integer|object|procedure|sequence|type)\s`), ), }, ".f": &Heuristics{ rule.Or( rule.MatchingLanguages("Forth"), - regexp.MustCompile(`(?m)^: `), + regex.MustCompileMultiline(`^: `), ), rule.Or( rule.MatchingLanguages("Filebench WML"), - regexp.MustCompile(`(?m)flowop`), + regex.MustCompileMultiline(`flowop`), ), rule.Or( rule.MatchingLanguages("Fortran"), - regexp.MustCompile(`(?m)^(?i:[c*][^abd-z]| (subroutine|program|end|data)\s|\s*!)`), + regex.MustCompileMultiline(`^(?i:[c*][^abd-z]| (subroutine|program|end|data)\s|\s*!)`), ), }, ".for": &Heuristics{ rule.Or( rule.MatchingLanguages("Forth"), - regexp.MustCompile(`(?m)^: `), + regex.MustCompileMultiline(`^: `), ), rule.Or( rule.MatchingLanguages("Fortran"), - regexp.MustCompile(`(?m)^(?i:[c*][^abd-z]| (subroutine|program|end|data)\s|\s*!)`), + regex.MustCompileMultiline(`^(?i:[c*][^abd-z]| (subroutine|program|end|data)\s|\s*!)`), ), }, ".fr": &Heuristics{ rule.Or( rule.MatchingLanguages("Forth"), - regexp.MustCompile(`(?m)^(: |also |new-device|previous )`), + regex.MustCompileMultiline(`^(: |also |new-device|previous )`), ), rule.Or( rule.MatchingLanguages("Frege"), - regexp.MustCompile(`(?m)^\s*(import|module|package|data|type) `), + regex.MustCompileMultiline(`^\s*(import|module|package|data|type) `), ), rule.Always( rule.MatchingLanguages("Text"), @@ -817,49 +839,53 @@ var ContentHeuristics = map[string]*Heuristics{ ".fs": &Heuristics{ rule.Or( rule.MatchingLanguages("Forth"), - regexp.MustCompile(`(?m)^(: |new-device)`), + regex.MustCompileMultiline(`^(: |new-device)`), ), rule.Or( rule.MatchingLanguages("F#"), - regexp.MustCompile(`(?m)^\s*(#light|import|let|module|namespace|open|type)`), + regex.MustCompileMultiline(`^\s*(#light|import|let|module|namespace|open|type)`), ), rule.Or( rule.MatchingLanguages("GLSL"), - regexp.MustCompile(`(?m)^\s*(#version|precision|uniform|varying|vec[234])`), + regex.MustCompileMultiline(`^\s*(#version|precision|uniform|varying|vec[234])`), ), rule.Or( rule.MatchingLanguages("Filterscript"), - regexp.MustCompile(`(?m)#include|#pragma\s+(rs|version)|__attribute__`), + regex.MustCompileMultiline(`#include|#pragma\s+(rs|version)|__attribute__`), ), }, ".ftl": &Heuristics{ + rule.Or( + rule.MatchingLanguages("FreeMarker"), + regex.MustCompileRuby(`^(?:<|[a-zA-Z-][a-zA-Z0-9_-]+[ \t]+\w)|\${\w+[^\n]*?}|^[ \t]*(?:<#--.*?-->|<#([a-z]+)(?=\s|>)[^>]*>.*?|\[#--.*?--\]|\[#([a-z]+)(?=\s|\])[^\]]*\].*?\[#\2\])`), + ), rule.Or( rule.MatchingLanguages("Fluent"), - regexp.MustCompile(`(?m)^-?[a-zA-Z][a-zA-Z0-9_-]* *=|\{\$-?[a-zA-Z][-\w]*(?:\.[a-zA-Z][-\w]*)?\}`), + regex.MustCompileMultiline(`^-?[a-zA-Z][a-zA-Z0-9_-]* *=|\{\$-?[a-zA-Z][-\w]*(?:\.[a-zA-Z][-\w]*)?\}`), ), }, ".gd": &Heuristics{ rule.Or( rule.MatchingLanguages("GAP"), - regexp.MustCompile(`(?m)\s*(Declare|BindGlobal|KeyDependentOperation)`), + regex.MustCompileMultiline(`\s*(Declare|BindGlobal|KeyDependentOperation)`), ), rule.Or( rule.MatchingLanguages("GDScript"), - regexp.MustCompile(`(?m)\s*(extends|var|const|enum|func|class|signal|tool|yield|assert|onready)`), + regex.MustCompileMultiline(`\s*(extends|var|const|enum|func|class|signal|tool|yield|assert|onready)`), ), }, ".gml": &Heuristics{ rule.Or( rule.MatchingLanguages("XML"), - regexp.MustCompile(`(?m)(?i:^\s*(\<\?xml|xmlns))`), + regex.MustCompileMultiline(`(?i:^\s*(\<\?xml|xmlns))`), ), rule.Or( rule.MatchingLanguages("Graph Modeling Language"), - regexp.MustCompile(`(?m)(?i:^\s*(graph|node)\s+\[$)`), + regex.MustCompileMultiline(`(?i:^\s*(graph|node)\s+\[$)`), ), rule.Or( rule.MatchingLanguages("Gerber Image"), - regexp.MustCompile(`(?m)^[DGMT][0-9]{2}\*$`), + regex.MustCompileMultiline(`^[DGMT][0-9]{2}\*$`), ), rule.Always( rule.MatchingLanguages("Game Maker Language"), @@ -868,27 +894,37 @@ var ContentHeuristics = map[string]*Heuristics{ ".gs": &Heuristics{ rule.Or( rule.MatchingLanguages("GLSL"), - regexp.MustCompile(`(?m)^#version\s+[0-9]+\b`), + regex.MustCompileMultiline(`^#version\s+[0-9]+\b`), ), rule.Or( rule.MatchingLanguages("Gosu"), - regexp.MustCompile(`(?m)^uses (java|gw)\.`), + regex.MustCompileMultiline(`^uses (java|gw)\.`), ), rule.Or( rule.MatchingLanguages("Genie"), - regexp.MustCompile(`(?m)^\[indent=[0-9]+\]`), + regex.MustCompileMultiline(`^\[indent=[0-9]+\]`), + ), + }, + ".gsc": &Heuristics{ + rule.Or( + rule.MatchingLanguages("GSC"), + regex.MustCompileRuby(`^\s*#\s*(?:using|insert|include|define|namespace)[ \t]+\w|^\s*(?>(?:autoexec|private)\s+){0,2}function\s+(?>(?:autoexec|private)\s+){0,2}\w+\s*\(|\b(?:level|self)[ \t]+thread[ \t]+(?:\[\[[ \t]*(?>\w+\.)*\w+[ \t]*\]\]|\w+)[ \t]*\([^\r\n\)]*\)[ \t]*;|^[ \t]*#[ \t]*(?:precache|using_animtree)[ \t]*\(`), + ), + }, + ".gsh": &Heuristics{ + rule.Or( + rule.MatchingLanguages("GSC"), + regex.MustCompileRuby(`^\s*#\s*(?:using|insert|include|define|namespace)[ \t]+\w|^\s*(?>(?:autoexec|private)\s+){0,2}function\s+(?>(?:autoexec|private)\s+){0,2}\w+\s*\(|\b(?:level|self)[ \t]+thread[ \t]+(?:\[\[[ \t]*(?>\w+\.)*\w+[ \t]*\]\]|\w+)[ \t]*\([^\r\n\)]*\)[ \t]*;|^[ \t]*#[ \t]*(?:precache|using_animtree)[ \t]*\(`), ), }, - ".gsc": &Heuristics{}, - ".gsh": &Heuristics{}, ".h": &Heuristics{ rule.Or( rule.MatchingLanguages("Objective-C"), - regexp.MustCompile(`(?m)^\s*(@(interface|class|protocol|property|end|synchronised|selector|implementation)\b|#import\s+.+\.h[">])`), + regex.MustCompileMultiline(`^\s*(@(interface|class|protocol|property|end|synchronised|selector|implementation)\b|#import\s+.+\.h[">])`), ), rule.Or( rule.MatchingLanguages("C++"), - regexp.MustCompile(`(?m)^\s*#\s*include <(cstdint|string|vector|map|list|array|bitset|queue|stack|forward_list|unordered_map|unordered_set|(i|o|io)stream)>|^\s*template\s*<|^[ \t]*(try|constexpr)|^[ \t]*catch\s*\(|^[ \t]*(class|(using[ \t]+)?namespace)\s+\w+|^[ \t]*(private|public|protected):$|std::\w+`), + regex.MustCompileMultiline(`^\s*#\s*include <(cstdint|string|vector|map|list|array|bitset|queue|stack|forward_list|unordered_map|unordered_set|(i|o|io)stream)>|^\s*template\s*<|^[ \t]*(try|constexpr)|^[ \t]*catch\s*\(|^[ \t]*(class|(using[ \t]+)?namespace)\s+\w+|^[ \t]*(private|public|protected):$|std::\w+`), ), rule.Always( rule.MatchingLanguages("C"), @@ -897,23 +933,23 @@ var ContentHeuristics = map[string]*Heuristics{ ".hh": &Heuristics{ rule.Or( rule.MatchingLanguages("Hack"), - regexp.MustCompile(`(?m)<\?hh`), + regex.MustCompileMultiline(`<\?hh`), ), }, ".i": &Heuristics{ rule.Or( rule.MatchingLanguages("Motorola 68K Assembly"), - regexp.MustCompile(`(?m)(?im)\bmoveq(?:\.l)?\s+#(?:\$-?[0-9a-f]{1,3}|%[0-1]{1,8}|-?[0-9]{1,3}),\s*d[0-7]\b|(?im)^\s*move(?:\.[bwl])?\s+(?:sr|usp),\s*[^\s]+|(?im)^\s*move\.[bwl]\s+.*\b[ad]\d|(?im)^\s*movem\.[bwl]\b|(?im)^\s*move[mp](?:\.[wl])?\b|(?im)^\s*btst\b|(?im)^\s*dbra\b`), + regex.MustCompileMultiline(`(?im)\bmoveq(?:\.l)?\s+#(?:\$-?[0-9a-f]{1,3}|%[0-1]{1,8}|-?[0-9]{1,3}),\s*d[0-7]\b|(?im)^\s*move(?:\.[bwl])?\s+(?:sr|usp),\s*[^\s]+|(?im)^\s*move\.[bwl]\s+.*\b[ad]\d|(?im)^\s*movem\.[bwl]\b|(?im)^\s*move[mp](?:\.[wl])?\b|(?im)^\s*btst\b|(?im)^\s*dbra\b`), ), rule.Or( rule.MatchingLanguages("SWIG"), - regexp.MustCompile(`(?m)^[ \t]*%[a-z_]+\b|^%[{}]$`), + regex.MustCompileMultiline(`^[ \t]*%[a-z_]+\b|^%[{}]$`), ), }, ".ice": &Heuristics{ rule.Or( rule.MatchingLanguages("JSON"), - regexp.MustCompile(`(?m)\A\s*[{\[]`), + regex.MustCompileMultiline(`\A\s*[{\[]`), ), rule.Always( rule.MatchingLanguages("Slice"), @@ -922,57 +958,61 @@ var ContentHeuristics = map[string]*Heuristics{ ".inc": &Heuristics{ rule.Or( rule.MatchingLanguages("Motorola 68K Assembly"), - regexp.MustCompile(`(?m)(?im)\bmoveq(?:\.l)?\s+#(?:\$-?[0-9a-f]{1,3}|%[0-1]{1,8}|-?[0-9]{1,3}),\s*d[0-7]\b|(?im)^\s*move(?:\.[bwl])?\s+(?:sr|usp),\s*[^\s]+|(?im)^\s*move\.[bwl]\s+.*\b[ad]\d|(?im)^\s*movem\.[bwl]\b|(?im)^\s*move[mp](?:\.[wl])?\b|(?im)^\s*btst\b|(?im)^\s*dbra\b`), + regex.MustCompileMultiline(`(?im)\bmoveq(?:\.l)?\s+#(?:\$-?[0-9a-f]{1,3}|%[0-1]{1,8}|-?[0-9]{1,3}),\s*d[0-7]\b|(?im)^\s*move(?:\.[bwl])?\s+(?:sr|usp),\s*[^\s]+|(?im)^\s*move\.[bwl]\s+.*\b[ad]\d|(?im)^\s*movem\.[bwl]\b|(?im)^\s*move[mp](?:\.[wl])?\b|(?im)^\s*btst\b|(?im)^\s*dbra\b`), ), rule.Or( rule.MatchingLanguages("PHP"), - regexp.MustCompile(`(?m)^<\?(?:php)?`), + regex.MustCompileMultiline(`^<\?(?:php)?`), ), rule.Or( rule.MatchingLanguages("SourcePawn"), - regexp.MustCompile(`(?m)^public\s+(?:SharedPlugin(?:\s+|:)__pl_\w+\s*=(?:\s*{)?|(?:void\s+)?__pl_\w+_SetNTVOptional\(\)(?:\s*{)?)|^methodmap\s+\w+\s+<\s+\w+|^\s*MarkNativeAsOptional\s*\(`), + regex.MustCompileMultiline(`^public\s+(?:SharedPlugin(?:\s+|:)__pl_\w+\s*=(?:\s*{)?|(?:void\s+)?__pl_\w+_SetNTVOptional\(\)(?:\s*{)?)|^methodmap\s+\w+\s+<\s+\w+|^\s*MarkNativeAsOptional\s*\(`), + ), + rule.Or( + rule.MatchingLanguages("NASL"), + regex.MustCompileRuby(`^\s*include\s*\(\s*(?:"|')[\\/\w\-\.:\s]+\.(?:nasl|inc)\s*(?:"|')\s*\)\s*;|^\s*(?:global|local)_var\s+(?:\w+(?:\s*=\s*[\w\-"']+)?\s*)(?:,\s*\w+(?:\s*=\s*[\w\-"']+)?\s*)*+\s*;|^\s*namespace\s+\w+\s*{|^\s*object\s+\w+\s*(?:extends\s+\w+(?:::\w+)?)?\s*{|^\s*(?:public\s+|private\s+|\s*)function\s+\w+\s*\([\w\s,]*\)\s*{`), ), rule.Or( rule.MatchingLanguages("POV-Ray SDL"), - regexp.MustCompile(`(?m)^\s*#(declare|local|macro|while)\s`), + regex.MustCompileMultiline(`^\s*#(declare|local|macro|while)\s`), ), rule.Or( rule.MatchingLanguages("Pascal"), - regexp.MustCompile(`(?m)(?i:^\s*{\$(?:mode|ifdef|undef|define)[ ]+[a-z0-9_]+})|^\s*end[.;]\s*$`), + regex.MustCompileMultiline(`(?i:^\s*{\$(?:mode|ifdef|undef|define)[ ]+[a-z0-9_]+})|^\s*end[.;]\s*$`), ), }, ".l": &Heuristics{ rule.Or( rule.MatchingLanguages("Common Lisp"), - regexp.MustCompile(`(?m)\(def(un|macro)\s`), + regex.MustCompileMultiline(`\(def(un|macro)\s`), ), rule.Or( rule.MatchingLanguages("Lex"), - regexp.MustCompile(`(?m)^(%[%{}]xs|<.*>)`), + regex.MustCompileMultiline(`^(%[%{}]xs|<.*>)`), ), rule.Or( rule.MatchingLanguages("Roff"), - regexp.MustCompile(`(?m)^\.[A-Za-z]{2}(\s|$)`), + regex.MustCompileMultiline(`^\.[A-Za-z]{2}(\s|$)`), ), rule.Or( rule.MatchingLanguages("PicoLisp"), - regexp.MustCompile(`(?m)^\((de|class|rel|code|data|must)\s`), + regex.MustCompileMultiline(`^\((de|class|rel|code|data|must)\s`), ), }, ".lisp": &Heuristics{ rule.Or( rule.MatchingLanguages("Common Lisp"), - regexp.MustCompile(`(?m)^\s*\((?i:defun|in-package|defpackage) `), + regex.MustCompileMultiline(`^\s*\((?i:defun|in-package|defpackage) `), ), rule.Or( rule.MatchingLanguages("NewLisp"), - regexp.MustCompile(`(?m)^\s*\(define `), + regex.MustCompileMultiline(`^\s*\(define `), ), }, ".ls": &Heuristics{ rule.Or( rule.MatchingLanguages("LoomScript"), - regexp.MustCompile(`(?m)^\s*package\s*[\w\.\/\*\s]*\s*{`), + regex.MustCompileMultiline(`^\s*package\s*[\w\.\/\*\s]*\s*{`), ), rule.Always( rule.MatchingLanguages("LiveScript"), @@ -981,54 +1021,54 @@ var ContentHeuristics = map[string]*Heuristics{ ".lsp": &Heuristics{ rule.Or( rule.MatchingLanguages("Common Lisp"), - regexp.MustCompile(`(?m)^\s*\((?i:defun|in-package|defpackage) `), + regex.MustCompileMultiline(`^\s*\((?i:defun|in-package|defpackage) `), ), rule.Or( rule.MatchingLanguages("NewLisp"), - regexp.MustCompile(`(?m)^\s*\(define `), + regex.MustCompileMultiline(`^\s*\(define `), ), }, ".m": &Heuristics{ rule.Or( rule.MatchingLanguages("Objective-C"), - regexp.MustCompile(`(?m)^\s*(@(interface|class|protocol|property|end|synchronised|selector|implementation)\b|#import\s+.+\.h[">])`), + regex.MustCompileMultiline(`^\s*(@(interface|class|protocol|property|end|synchronised|selector|implementation)\b|#import\s+.+\.h[">])`), ), rule.Or( rule.MatchingLanguages("Mercury"), - regexp.MustCompile(`(?m):- module`), + regex.MustCompileMultiline(`:- module`), ), rule.Or( rule.MatchingLanguages("MUF"), - regexp.MustCompile(`(?m)^: `), + regex.MustCompileMultiline(`^: `), ), rule.Or( rule.MatchingLanguages("M"), - regexp.MustCompile(`(?m)^\s*;`), + regex.MustCompileMultiline(`^\s*;`), ), rule.And( rule.MatchingLanguages("Mathematica"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)\(\*`), + regex.MustCompileMultiline(`\(\*`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)\*\)$`), + regex.MustCompileMultiline(`\*\)$`), ), ), rule.Or( rule.MatchingLanguages("MATLAB"), - regexp.MustCompile(`(?m)^\s*%`), + regex.MustCompileMultiline(`^\s*%`), ), rule.Or( rule.MatchingLanguages("Limbo"), - regexp.MustCompile(`(?m)^\w+\s*:\s*module\s*{`), + regex.MustCompileMultiline(`^\w+\s*:\s*module\s*{`), ), }, ".m4": &Heuristics{ rule.Or( rule.MatchingLanguages("M4Sugar"), - regexp.MustCompile(`(?m)AC_DEFUN|AC_PREREQ|AC_INIT|^_?m4_`), + regex.MustCompileMultiline(`AC_DEFUN|AC_PREREQ|AC_INIT|^_?m4_`), ), rule.Always( rule.MatchingLanguages("M4"), @@ -1039,26 +1079,26 @@ var ContentHeuristics = map[string]*Heuristics{ rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), ), ), rule.And( rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), + regex.MustCompileMultiline(`^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), ), ), rule.Always( @@ -1068,27 +1108,31 @@ var ContentHeuristics = map[string]*Heuristics{ ".mask": &Heuristics{ rule.Or( rule.MatchingLanguages("Unity3D Asset"), - regexp.MustCompile(`(?m)tag:unity3d.com`), + regex.MustCompileMultiline(`tag:unity3d.com`), ), }, ".mc": &Heuristics{ + rule.Or( + rule.MatchingLanguages("Win32 Message File"), + regex.MustCompileRuby(`(?i)^[ \t]*(?>\/\*\s*)?MessageId=|^\.$`), + ), rule.Or( rule.MatchingLanguages("M4"), - regexp.MustCompile(`(?m)^dnl|^divert\((?:-?\d+)?\)|^\w+\(`+"`"+`[^\n]*?'[),]`), + regex.MustCompileMultiline(`^dnl|^divert\((?:-?\d+)?\)|^\w+\(`+"`"+`[^\n]*?'[),]`), ), rule.Or( rule.MatchingLanguages("Monkey C"), - regexp.MustCompile(`(?m)\b(?:using|module|function|class|var)\s+\w`), + regex.MustCompileMultiline(`\b(?:using|module|function|class|var)\s+\w`), ), }, ".md": &Heuristics{ rule.Or( rule.MatchingLanguages("Markdown"), - regexp.MustCompile(`(?m)(^[-A-Za-z0-9=#!\*\[|>])|<\/|\A\z`), + regex.MustCompileMultiline(`(^[-A-Za-z0-9=#!\*\[|>])|<\/|\A\z`), ), rule.Or( rule.MatchingLanguages("GCC Machine Description"), - regexp.MustCompile(`(?m)^(;;|\(define_)`), + regex.MustCompileMultiline(`^(;;|\(define_)`), ), rule.Always( rule.MatchingLanguages("Markdown"), @@ -1099,26 +1143,26 @@ var ContentHeuristics = map[string]*Heuristics{ rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Dd +(?:[^"\s]+|"[^"]+")`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), + regex.MustCompileMultiline(`^[.'][ \t]*Sh +(?:[^"\s]|"[^"]+")`), ), ), rule.And( rule.MatchingLanguages("Roff Manpage"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), + regex.MustCompileMultiline(`^[.'][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), + regex.MustCompileMultiline(`^[.'][ \t]*SH +(?:[^"\s]+|"[^"\s]+)`), ), ), rule.Always( @@ -1128,21 +1172,21 @@ var ContentHeuristics = map[string]*Heuristics{ ".ml": &Heuristics{ rule.Or( rule.MatchingLanguages("OCaml"), - regexp.MustCompile(`(?m)(^\s*module)|let rec |match\s+(\S+\s)+with`), + regex.MustCompileMultiline(`(^\s*module)|let rec |match\s+(\S+\s)+with`), ), rule.Or( rule.MatchingLanguages("Standard ML"), - regexp.MustCompile(`(?m)=> |case\s+(\S+\s)+of`), + regex.MustCompileMultiline(`=> |case\s+(\S+\s)+of`), ), }, ".mod": &Heuristics{ rule.Or( rule.MatchingLanguages("XML"), - regexp.MustCompile(`(?m)\s`), + regex.MustCompileMultiline(`^\s+\w+\s+=>\s`), ), }, ".pro": &Heuristics{ rule.Or( rule.MatchingLanguages("Proguard"), - regexp.MustCompile(`(?m)^-(include\b.*\.pro$|keep\b|keepclassmembers\b|keepattributes\b)`), + regex.MustCompileMultiline(`^-(include\b.*\.pro$|keep\b|keepclassmembers\b|keepattributes\b)`), ), rule.Or( rule.MatchingLanguages("Prolog"), - regexp.MustCompile(`(?m)^[^\[#]+:-`), + regex.MustCompileMultiline(`^[^\[#]+:-`), ), rule.Or( rule.MatchingLanguages("INI"), - regexp.MustCompile(`(?m)last_client=`), + regex.MustCompileMultiline(`last_client=`), ), rule.And( rule.MatchingLanguages("QMake"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)HEADERS`), + regex.MustCompileMultiline(`HEADERS`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)SOURCES`), + regex.MustCompileMultiline(`SOURCES`), ), ), rule.Or( rule.MatchingLanguages("IDL"), - regexp.MustCompile(`(?m)^\s*function[ \w,]+$`), + regex.MustCompileMultiline(`^\s*function[ \w,]+$`), ), }, ".properties": &Heuristics{ @@ -1320,89 +1364,93 @@ var ContentHeuristics = map[string]*Heuristics{ rule.MatchingLanguages("INI"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[^#!;][^=]*=`), + regex.MustCompileMultiline(`^[^#!;][^=]*=`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[;\[]`), + regex.MustCompileMultiline(`^[;\[]`), ), ), rule.And( rule.MatchingLanguages("Java Properties"), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[^#!;][^=]*=`), + regex.MustCompileMultiline(`^[^#!;][^=]*=`), ), rule.Or( rule.MatchingLanguages(""), - regexp.MustCompile(`(?m)^[#!]`), + regex.MustCompileMultiline(`^[#!]`), ), ), rule.Or( rule.MatchingLanguages("INI"), - regexp.MustCompile(`(?m)^[^#!;][^=]*=`), + regex.MustCompileMultiline(`^[^#!;][^=]*=`), ), rule.Or( rule.MatchingLanguages("Java Properties"), - regexp.MustCompile(`(?m)^[^#!][^:]*:`), + regex.MustCompileMultiline(`^[^#!][^:]*:`), ), }, ".q": &Heuristics{ rule.Or( rule.MatchingLanguages("q"), - regexp.MustCompile(`(?m)((?i:[A-Z.][\w.]*:{)|(^|\n)\\(cd?|d|l|p|ts?) )`), + regex.MustCompileMultiline(`((?i:[A-Z.][\w.]*:{)|(^|\n)\\(cd?|d|l|p|ts?) )`), ), rule.Or( rule.MatchingLanguages("HiveQL"), - regexp.MustCompile(`(?m)(?i:SELECT\s+[\w*,]+\s+FROM|(CREATE|ALTER|DROP)\s(DATABASE|SCHEMA|TABLE))`), + regex.MustCompileMultiline(`(?i:SELECT\s+[\w*,]+\s+FROM|(CREATE|ALTER|DROP)\s(DATABASE|SCHEMA|TABLE))`), ), }, ".qs": &Heuristics{ rule.Or( rule.MatchingLanguages("Q#"), - regexp.MustCompile(`(?m)^((\/{2,3})?\s*(namespace|operation)\b)`), + regex.MustCompileMultiline(`^((\/{2,3})?\s*(namespace|operation)\b)`), ), rule.Or( rule.MatchingLanguages("Qt Script"), - regexp.MustCompile(`(?m)(\w+\.prototype\.\w+|===|\bvar\b)`), + regex.MustCompileMultiline(`(\w+\.prototype\.\w+|===|\bvar\b)`), ), }, ".r": &Heuristics{ rule.Or( rule.MatchingLanguages("Rebol"), - regexp.MustCompile(`(?m)(?i:\bRebol\b)`), + regex.MustCompileMultiline(`(?i:\bRebol\b)`), ), rule.Or( rule.MatchingLanguages("R"), - regexp.MustCompile(`(?m)<-|^\s*#`), + regex.MustCompileMultiline(`<-|^\s*#`), ), }, ".re": &Heuristics{ rule.Or( rule.MatchingLanguages("Reason"), - regexp.MustCompile(`(?m)^\s*module\s+type\s|^\s*(?:include|open)\s+\w+\s*;\s*$|^\s*let\s+(?:module\s\w+\s*=\s*{|\w+:\s+.*=.*;\s*$)`), + regex.MustCompileMultiline(`^\s*module\s+type\s|^\s*(?:include|open)\s+\w+\s*;\s*$|^\s*let\s+(?:module\s\w+\s*=\s*{|\w+:\s+.*=.*;\s*$)`), ), rule.Or( rule.MatchingLanguages("C++"), - regexp.MustCompile(`(?m)^\s*#(?:(?:if|ifdef|define|pragma)\s+\w|\s*include\s+<[^>]+>)|^\s*template\s*<`), + regex.MustCompileMultiline(`^\s*#(?:(?:if|ifdef|define|pragma)\s+\w|\s*include\s+<[^>]+>)|^\s*template\s*<`), ), }, ".res": &Heuristics{ rule.Or( rule.MatchingLanguages("ReScript"), - regexp.MustCompile(`(?m)^\s*(let|module|type)\s+\w*\s+=\s+|^\s*(?:include|open)\s+\w+\s*$`), + regex.MustCompileMultiline(`^\s*(let|module|type)\s+\w*\s+=\s+|^\s*(?:include|open)\s+\w+\s*$`), ), }, ".rno": &Heuristics{ + rule.Or( + rule.MatchingLanguages("RUNOFF"), + regex.MustCompileRuby(`(?i:^\.!|^\f|\f$|^\.end lit(?:eral)?\b|^\.[a-zA-Z].*?;\.[a-zA-Z](?:[; \t])|\^\*[^\s*][^*]*\\\*(?=$|\s)|^\.c;[ \t]*\w+)`), + ), rule.Or( rule.MatchingLanguages("Roff"), - regexp.MustCompile(`(?m)^\.\\" `), + regex.MustCompileMultiline(`^\.\\" `), ), }, ".rpy": &Heuristics{ rule.Or( rule.MatchingLanguages("Python"), - regexp.MustCompile(`(?m)(?m:^(import|from|class|def)\s)`), + regex.MustCompileMultiline(`(?m:^(import|from|class|def)\s)`), ), rule.Always( rule.MatchingLanguages("Ren'Py"), @@ -1411,55 +1459,59 @@ var ContentHeuristics = map[string]*Heuristics{ ".rs": &Heuristics{ rule.Or( rule.MatchingLanguages("Rust"), - regexp.MustCompile(`(?m)^(use |fn |mod |pub |macro_rules|impl|#!?\[)`), + regex.MustCompileMultiline(`^(use |fn |mod |pub |macro_rules|impl|#!?\[)`), ), rule.Or( rule.MatchingLanguages("RenderScript"), - regexp.MustCompile(`(?m)#include|#pragma\s+(rs|version)|__attribute__`), + regex.MustCompileMultiline(`#include|#pragma\s+(rs|version)|__attribute__`), ), rule.Or( rule.MatchingLanguages("XML"), - regexp.MustCompile(`(?m)^\s*<\?xml`), + regex.MustCompileMultiline(`^\s*<\?xml`), ), }, ".s": &Heuristics{ rule.Or( rule.MatchingLanguages("Motorola 68K Assembly"), - regexp.MustCompile(`(?m)(?im)\bmoveq(?:\.l)?\s+#(?:\$-?[0-9a-f]{1,3}|%[0-1]{1,8}|-?[0-9]{1,3}),\s*d[0-7]\b|(?im)^\s*move(?:\.[bwl])?\s+(?:sr|usp),\s*[^\s]+|(?im)^\s*move\.[bwl]\s+.*\b[ad]\d|(?im)^\s*movem\.[bwl]\b|(?im)^\s*move[mp](?:\.[wl])?\b|(?im)^\s*btst\b|(?im)^\s*dbra\b`), + regex.MustCompileMultiline(`(?im)\bmoveq(?:\.l)?\s+#(?:\$-?[0-9a-f]{1,3}|%[0-1]{1,8}|-?[0-9]{1,3}),\s*d[0-7]\b|(?im)^\s*move(?:\.[bwl])?\s+(?:sr|usp),\s*[^\s]+|(?im)^\s*move\.[bwl]\s+.*\b[ad]\d|(?im)^\s*movem\.[bwl]\b|(?im)^\s*move[mp](?:\.[wl])?\b|(?im)^\s*btst\b|(?im)^\s*dbra\b`), ), }, ".sc": &Heuristics{ rule.Or( rule.MatchingLanguages("SuperCollider"), - regexp.MustCompile(`(?m)(?i:\^(this|super)\.|^\s*~\w+\s*=\.)`), + regex.MustCompileMultiline(`(?i:\^(this|super)\.|^\s*~\w+\s*=\.)`), ), rule.Or( rule.MatchingLanguages("Scala"), - regexp.MustCompile(`(?m)(^\s*import (scala|java)\.|^\s*class\b)`), + regex.MustCompileMultiline(`(^\s*import (scala|java)\.|^\s*class\b)`), ), }, ".sol": &Heuristics{ + rule.Or( + rule.MatchingLanguages("Solidity"), + regex.MustCompileRuby(`\bpragma\s+solidity\b|\b(?:abstract\s+)?contract\s+(?!\d)[a-zA-Z0-9$_]+(?:\s+is\s+(?:[a-zA-Z0-9$_][^\{]*?)?)?\s*\{`), + ), rule.Or( rule.MatchingLanguages("Gerber Image"), - regexp.MustCompile(`(?m)^[DGMT][0-9]{2}\*\r?\n`), + regex.MustCompileMultiline(`^[DGMT][0-9]{2}\*\r?\n`), ), }, ".sql": &Heuristics{ rule.Or( rule.MatchingLanguages("PLpgSQL"), - regexp.MustCompile(`(?m)(?i:^\\i\b|AS\s+\$\$|LANGUAGE\s+'?plpgsql'?|BEGIN(\s+WORK)?\s*;)`), + regex.MustCompileMultiline(`(?i:^\\i\b|AS\s+\$\$|LANGUAGE\s+'?plpgsql'?|BEGIN(\s+WORK)?\s*;)`), ), rule.Or( rule.MatchingLanguages("SQLPL"), - regexp.MustCompile(`(?m)(?i:ALTER\s+MODULE|MODE\s+DB2SQL|\bSYS(CAT|PROC)\.|ASSOCIATE\s+RESULT\s+SET|\bEND!\s*$)`), + regex.MustCompileMultiline(`(?i:ALTER\s+MODULE|MODE\s+DB2SQL|\bSYS(CAT|PROC)\.|ASSOCIATE\s+RESULT\s+SET|\bEND!\s*$)`), ), rule.Or( rule.MatchingLanguages("PLSQL"), - regexp.MustCompile(`(?m)(?i:\$\$PLSQL_|XMLTYPE|systimestamp|\.nextval|CONNECT\s+BY|AUTHID\s+(DEFINER|CURRENT_USER)|constructor\W+function)`), + regex.MustCompileMultiline(`(?i:\$\$PLSQL_|XMLTYPE|systimestamp|\.nextval|CONNECT\s+BY|AUTHID\s+(DEFINER|CURRENT_USER)|constructor\W+function)`), ), rule.Or( rule.MatchingLanguages("TSQL"), - regexp.MustCompile(`(?m)(?i:^\s*GO\b|BEGIN(\s+TRY|\s+CATCH)|OUTPUT\s+INSERTED|DECLARE\s+@|\[dbo\])`), + regex.MustCompileMultiline(`(?i:^\s*GO\b|BEGIN(\s+TRY|\s+CATCH)|OUTPUT\s+INSERTED|DECLARE\s+@|\[dbo\])`), ), rule.Always( rule.MatchingLanguages("SQL"), @@ -1468,53 +1520,62 @@ var ContentHeuristics = map[string]*Heuristics{ ".srt": &Heuristics{ rule.Or( rule.MatchingLanguages("SubRip Text"), - regexp.MustCompile(`(?m)^(\d{2}:\d{2}:\d{2},\d{3})\s*(-->)\s*(\d{2}:\d{2}:\d{2},\d{3})$`), + regex.MustCompileMultiline(`^(\d{2}:\d{2}:\d{2},\d{3})\s*(-->)\s*(\d{2}:\d{2}:\d{2},\d{3})$`), ), }, ".st": &Heuristics{ + rule.Or( + rule.MatchingLanguages("StringTemplate"), + regex.MustCompileRuby(`\$\w+[($]|(.)!\s*.+?\s*!\1||\[!\s*.+?\s*!\]|\{!\s*.+?\s*!\}`), + ), rule.Or( rule.MatchingLanguages("Smalltalk"), - regexp.MustCompile(`(?m)\A\s*[\[{(^"'\w#]|[a-zA-Z_]\w*\s*:=\s*[a-zA-Z_]\w*|class\s*>>\s*[a-zA-Z_]\w*|^[a-zA-Z_]\w*\s+[a-zA-Z_]\w*:|^Class\s*{|if(?:True|False):\s*\[`), + regex.MustCompileMultiline(`\A\s*[\[{(^"'\w#]|[a-zA-Z_]\w*\s*:=\s*[a-zA-Z_]\w*|class\s*>>\s*[a-zA-Z_]\w*|^[a-zA-Z_]\w*\s+[a-zA-Z_]\w*:|^Class\s*{|if(?:True|False):\s*\[`), ), }, ".star": &Heuristics{ rule.Or( rule.MatchingLanguages("STAR"), - regexp.MustCompile(`(?m)^loop_\s*$`), + regex.MustCompileMultiline(`^loop_\s*$`), ), rule.Always( rule.MatchingLanguages("Starlark"), ), }, - ".stl": &Heuristics{}, + ".stl": &Heuristics{ + rule.Or( + rule.MatchingLanguages("STL"), + regex.MustCompileRuby(`\A\s*solid(?=$|\s)(?m:.*?)\Rendsolid(?:$|\s)`), + ), + }, ".t": &Heuristics{ rule.Or( rule.MatchingLanguages("Perl"), - regexp.MustCompile(`(?m)\buse\s+(?:strict\b|v?5\.)`), + regex.MustCompileMultiline(`\buse\s+(?:strict\b|v?5\.)`), ), rule.Or( rule.MatchingLanguages("Raku"), - regexp.MustCompile(`(?m)^\s*(?:use\s+v6\b|\bmodule\b|\bmy\s+class\b)`), + regex.MustCompileMultiline(`^\s*(?:use\s+v6\b|\bmodule\b|\bmy\s+class\b)`), ), rule.Or( rule.MatchingLanguages("Turing"), - regexp.MustCompile(`(?m)^\s*%[ \t]+|^\s*var\s+\w+(\s*:\s*\w+)?\s*:=\s*\w+`), + regex.MustCompileMultiline(`^\s*%[ \t]+|^\s*var\s+\w+(\s*:\s*\w+)?\s*:=\s*\w+`), ), }, ".toc": &Heuristics{ rule.Or( rule.MatchingLanguages("World of Warcraft Addon Data"), - regexp.MustCompile(`(?m)^## |@no-lib-strip@`), + regex.MustCompileMultiline(`^## |@no-lib-strip@`), ), rule.Or( rule.MatchingLanguages("TeX"), - regexp.MustCompile(`(?m)^\\(contentsline|defcounter|beamer|boolfalse)`), + regex.MustCompileMultiline(`^\\(contentsline|defcounter|beamer|boolfalse)`), ), }, ".ts": &Heuristics{ rule.Or( rule.MatchingLanguages("XML"), - regexp.MustCompile(`(?m) `), + regex.MustCompileMultiline(`gap> `), ), rule.Always( rule.MatchingLanguages("Scilab"), @@ -1532,37 +1593,46 @@ var ContentHeuristics = map[string]*Heuristics{ ".tsx": &Heuristics{ rule.Or( rule.MatchingLanguages("TSX"), - regexp.MustCompile(`(?m)^\s*(import.+(from\s+|require\()['"]react|\/\/\/\s*]?[0-9]+|m)?|[ \t]ex)(?=:(?=[ \t]*set?[ \t][^\r\n:]+:)|:(?![ \t]*set?[ \t]))(?:(?:[ \t]*:[ \t]*|[ \t])\w*(?:[ \t]*=(?:[^\\\s]|\\.)*)?)*[ \t:](?:filetype|ft|syntax)[ \t]*=(help)(?=$|\s|:)`), + ), rule.Always( rule.MatchingLanguages("Text"), ), }, - ".url": &Heuristics{}, + ".url": &Heuristics{ + rule.Or( + rule.MatchingLanguages("INI"), + regex.MustCompileRuby(`^\[InternetShortcut\]\R(?>[^\s\[][^\n]*\R)*URL=`), + ), + }, ".v": &Heuristics{ rule.Or( rule.MatchingLanguages("Coq"), - regexp.MustCompile(`(?m)(?:^|\s)(?:Proof|Qed)\.(?:$|\s)|(?:^|\s)Require[ \t]+(Import|Export)\s`), + regex.MustCompileMultiline(`(?:^|\s)(?:Proof|Qed)\.(?:$|\s)|(?:^|\s)Require[ \t]+(Import|Export)\s`), ), rule.Or( rule.MatchingLanguages("Verilog"), - regexp.MustCompile(`(?m)^[ \t]*module\s+[^\s()]+\s+\#?\(|^[ \t]*`+"`"+`(?:define|ifdef|ifndef|include|timescale)|^[ \t]*always[ \t]+@|^[ \t]*initial[ \t]+(begin|@)`), + regex.MustCompileMultiline(`^[ \t]*module\s+[^\s()]+\s+\#?\(|^[ \t]*`+"`"+`(?:define|ifdef|ifndef|include|timescale)|^[ \t]*always[ \t]+@|^[ \t]*initial[ \t]+(begin|@)`), ), rule.Or( rule.MatchingLanguages("V"), - regexp.MustCompile(`(?m)\$(?:if|else)[ \t]|^[ \t]*fn\s+[^\s()]+\(.*?\).*?\{|^[ \t]*for\s*\{`), + regex.MustCompileMultiline(`\$(?:if|else)[ \t]|^[ \t]*fn\s+[^\s()]+\(.*?\).*?\{|^[ \t]*for\s*\{`), ), }, ".vba": &Heuristics{ rule.Or( rule.MatchingLanguages("Vim Script"), - regexp.MustCompile(`(?m)^UseVimball`), + regex.MustCompileMultiline(`^UseVimball`), ), rule.Always( rule.MatchingLanguages("VBA"), @@ -1571,35 +1641,35 @@ var ContentHeuristics = map[string]*Heuristics{ ".w": &Heuristics{ rule.Or( rule.MatchingLanguages("OpenEdge ABL"), - regexp.MustCompile(`(?m)&ANALYZE-SUSPEND _UIB-CODE-BLOCK _CUSTOM _DEFINITIONS`), + regex.MustCompileMultiline(`&ANALYZE-SUSPEND _UIB-CODE-BLOCK _CUSTOM _DEFINITIONS`), ), rule.Or( rule.MatchingLanguages("CWeb"), - regexp.MustCompile(`(?m)^@(<|\w+\.)`), + regex.MustCompileMultiline(`^@(<|\w+\.)`), ), }, ".x": &Heuristics{ rule.Or( rule.MatchingLanguages("DirectX 3D File"), - regexp.MustCompile(`(?m)^xof 030(2|3)(?:txt|bin|tzip|bzip)\b`), + regex.MustCompileMultiline(`^xof 030(2|3)(?:txt|bin|tzip|bzip)\b`), ), rule.Or( rule.MatchingLanguages("RPC"), - regexp.MustCompile(`(?m)\b(program|version)\s+\w+\s*{|\bunion\s+\w+\s+switch\s*\(`), + regex.MustCompileMultiline(`\b(program|version)\s+\w+\s*{|\bunion\s+\w+\s+switch\s*\(`), ), rule.Or( rule.MatchingLanguages("Logos"), - regexp.MustCompile(`(?m)^%(end|ctor|hook|group)\b`), + regex.MustCompileMultiline(`^%(end|ctor|hook|group)\b`), ), rule.Or( rule.MatchingLanguages("Linker Script"), - regexp.MustCompile(`(?m)OUTPUT_ARCH\(|OUTPUT_FORMAT\(|SECTIONS`), + regex.MustCompileMultiline(`OUTPUT_ARCH\(|OUTPUT_FORMAT\(|SECTIONS`), ), }, ".yaml": &Heuristics{ rule.Or( rule.MatchingLanguages("MiniYAML"), - regexp.MustCompile(`(?m)^\t+.*?[^\s:].*?:`), + regex.MustCompileMultiline(`^\t+.*?[^\s:].*?:`), ), rule.Always( rule.MatchingLanguages("YAML"), @@ -1608,7 +1678,7 @@ var ContentHeuristics = map[string]*Heuristics{ ".yy": &Heuristics{ rule.Or( rule.MatchingLanguages("JSON"), - regexp.MustCompile(`(?m)\"modelName\"\:\s*\"GM`), + regex.MustCompileMultiline(`\"modelName\"\:\s*\"GM`), ), rule.Always( rule.MatchingLanguages("Yacc"), diff --git a/internal/code-generator/generator/vendor.go b/internal/code-generator/generator/vendor.go index e239d10..fff86e2 100644 --- a/internal/code-generator/generator/vendor.go +++ b/internal/code-generator/generator/vendor.go @@ -5,6 +5,7 @@ import ( "fmt" "io" "io/ioutil" + "log" "sort" "strings" "text/template" @@ -25,6 +26,12 @@ func Vendor(fileToParse, samplesDir, outPath, tmplPath, tmplName, commit string) return fmt.Errorf("failed to parse YAML %s, %q", fileToParse, err) } + for _, re := range regexps { + if !isRE2(re) { + log.Printf("RE2 incompatible syntax for vendor:'%s'\n", re) + } + } + buf := &bytes.Buffer{} if err := executeVendorTemplate(buf, regexps, tmplPath, tmplName, commit); err != nil { return err @@ -34,34 +41,14 @@ func Vendor(fileToParse, samplesDir, outPath, tmplPath, tmplName, commit string) } func executeVendorTemplate(out io.Writer, regexps []string, tmplPath, tmplName, commit string) error { - funcs := template.FuncMap{"optimize": collateAllMatchers} + funcs := template.FuncMap{"collateAllRegexps": collateAllRegexps} return executeTemplate(out, tmplName, tmplPath, commit, funcs, regexps) } -func collateAllMatchers(regexps []string) string { - // We now collate all regexps from VendorMatchers to a single large regexp +// collateAllRegexps all regexps to a single large regexp. +func collateAllRegexps(regexps []string) string { // which is at least twice as fast to test than simply iterating & matching. // - // --- - // - // We could test each matcher from VendorMatchers in turn i.e. - // - // func IsVendor(filename string) bool { - // for _, matcher := range data.VendorMatchers { - // if matcher.MatchString(filename) { - // return true - // } - // } - // return false - // } - // - // Or naïvely concatentate all these regexps using groups i.e. - // - // `(regexp1)|(regexp2)|(regexp3)|...` - // - // However, both of these are relatively slow and don't take advantage - // of the inherent structure within our regexps. - // // Imperical observation: by looking at the regexps, we only have 3 types. // 1. Those that start with `^` // 2. Those that start with `(^|/)` @@ -81,8 +68,8 @@ func collateAllMatchers(regexps []string) string { sort.Strings(regexps) + // Check prefix, group expressions var caretPrefixed, caretOrSlashPrefixed, theRest []string - // Check prefix, add to the respective group slices for _, re := range regexps { if strings.HasPrefix(re, caret) { caretPrefixed = append(caretPrefixed, re[len(caret):]) @@ -92,6 +79,7 @@ func collateAllMatchers(regexps []string) string { theRest = append(theRest, re) } } + var sb strings.Builder appendGroupWithCommonPrefix(&sb, "^", caretPrefixed) sb.WriteString("|") diff --git a/internal/code-generator/main.go b/internal/code-generator/main.go index 5eda1da..4fa40f4 100644 --- a/internal/code-generator/main.go +++ b/internal/code-generator/main.go @@ -134,7 +134,7 @@ func main() { for _, file := range fileList { if err := file.generate(file.fileToParse, file.samplesDir, file.outPath, file.tmplPath, file.tmplName, file.commit); err != nil { - log.Fatalf("error generating template %q to %q: %+v", file.tmplPath, file.outPath, err) + log.Fatalf("failed to generate %q from %q - %+v", file.outPath, file.tmplPath, err) } } } diff --git a/regex/oniguruma.go b/regex/oniguruma.go index 0c9660e..c152739 100644 --- a/regex/oniguruma.go +++ b/regex/oniguruma.go @@ -1,3 +1,4 @@ +//go:build oniguruma // +build oniguruma package regex @@ -6,10 +7,21 @@ import ( rubex "github.com/go-enry/go-oniguruma" ) +const Name = Oniguruma + type EnryRegexp = *rubex.Regexp -func MustCompile(str string) EnryRegexp { - return rubex.MustCompileASCII(str) +func MustCompile(s string) EnryRegexp { + return rubex.MustCompileASCII(s) +} + +// MustCompileMultiline matches in multi-line mode by default with Oniguruma. +func MustCompileMultiline(s string) EnryRegexp { + return MustCompile(s) +} + +func MustCompileRuby(s string) EnryRegexp { + return MustCompile(s) } func QuoteMeta(s string) string { diff --git a/regex/regex.go b/regex/regex.go new file mode 100644 index 0000000..6d38f3f --- /dev/null +++ b/regex/regex.go @@ -0,0 +1,9 @@ +package regex + +// Package regex abstracts regular expression engine +// that can be chosen at compile-time by a build tag. + +const ( + RE2 = "RE2" + Oniguruma = "Oniguruma" +) diff --git a/regex/standard.go b/regex/standard.go index b242403..e9ccdb9 100644 --- a/regex/standard.go +++ b/regex/standard.go @@ -1,3 +1,4 @@ +//go:build !oniguruma // +build !oniguruma package regex @@ -6,12 +7,32 @@ import ( "regexp" ) +const Name = RE2 + type EnryRegexp = *regexp.Regexp func MustCompile(str string) EnryRegexp { return regexp.MustCompile(str) } +// MustCompileMultiline mimics Ruby defaults for regexp, where ^$ matches begin/end of line. +// I.e. it converts Ruby regexp syntaxt to RE2 equivalent +func MustCompileMultiline(s string) EnryRegexp { + const multilineModeFlag = "(?m)" + return regexp.MustCompile(multilineModeFlag + s) +} + +// MustCompileRuby used for expressions with syntax not supported by RE2. +// Now it's confusing as we use the result as [data/rule.Matcher] and +// +// (*Matcher)(nil) != nil +// +// What is a better way for an expression to indicate unsupported syntax? +// e.g. add .IsValidSyntax() to both, Matcher interface and EnryRegexp implementations? +func MustCompileRuby(s string) EnryRegexp { + return nil +} + func QuoteMeta(s string) string { return regexp.QuoteMeta(s) } diff --git a/regex/standard_test.go b/regex/standard_test.go new file mode 100644 index 0000000..9e7755b --- /dev/null +++ b/regex/standard_test.go @@ -0,0 +1,27 @@ +//go:build !oniguruma +// +build !oniguruma + +package regex + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestMustCompileMultiline(t *testing.T) { + const re = `^\.(.*)!$` + want := MustCompileMultiline(re) + assert.Equal(t, "(?m)"+re, want.String()) + + const s = `.one +.two! +thre!` + if !want.MatchString(s) { + t.Fatalf("MustCompileMultiline(`%s`) must match multiline %q\n", re, s) + } +} + +func TestMustCompileRuby(t *testing.T) { + assert.Nil(t, MustCompileRuby(``)) +} diff --git a/utils.go b/utils.go index fb3d6f4..5e18300 100644 --- a/utils.go +++ b/utils.go @@ -63,7 +63,21 @@ func IsDotFile(path string) bool { // IsVendor returns whether or not path is a vendor path. func IsVendor(path string) bool { - return data.FastVendorMatcher.MatchString(path) + // fast path: single collatated regex, if the engine supports its syntax + if data.FastVendorMatcher != nil { + return data.FastVendorMatcher.MatchString(path) + } + + // slow path: skip individual rules with unsupported syntax + for _, matcher := range data.VendorMatchers { + if matcher == nil { + continue + } + if matcher.MatchString(path) { + return true + } + } + return false } // IsTest returns whether or not path is a test path. diff --git a/utils_test.go b/utils_test.go index 0a26069..75f2a22 100644 --- a/utils_test.go +++ b/utils_test.go @@ -7,57 +7,62 @@ import ( "path/filepath" "testing" + "github.com/go-enry/go-enry/v2/regex" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) -//TODO(bzz): port all from test/test_file_blob.rb test_vendored() -//https://github.com/github/linguist/blob/86adc140d3e8903980565a2984f5532edf4ae875/test/test_file_blob.rb#L270-L583 +// TODO(bzz): port all from test/test_file_blob.rb test_vendored() +// https://github.com/github/linguist/blob/86adc140d3e8903980565a2984f5532edf4ae875/test/test_file_blob.rb#L270-L583 var vendorTests = []struct { - path string - expected bool + skipOnRE2 bool // some rules are (present in code but) missing at runtime on RE2 + path string + expected bool }{ - {"cache/", true}, - {"something_cache/", false}, - {"random/cache/", true}, - {"cache", false}, - {"dependencies/", true}, - {"Dependencies/", true}, - {"dependency/", false}, - {"dist/", true}, - {"dist", false}, - {"random/dist/", true}, - {"random/dist", false}, - {"deps/", true}, - {"foodeps/", false}, - {"configure", true}, - {"a/configure", true}, - {"config.guess", true}, - {"config.guess/", false}, - {".vscode/", true}, - {"doc/_build/", true}, - {"a/docs/_build/", true}, - {"a/dasdocs/_build-vsdoc.js", true}, - {"a/dasdocs/_build-vsdoc.j", false}, - {"foo/bar", false}, - {".sublime-project", true}, - {"foo/vendor/foo", true}, - {"leaflet.draw-src.js", true}, - {"foo/bar/MochiKit.js", true}, - {"foo/bar/dojo.js", true}, - {"foo/env/whatever", true}, - {"some/python/venv/", false}, - {"foo/.imageset/bar", true}, - {"Vagrantfile", true}, - {"src/bootstrap-custom.js", true}, - // {"/css/bootstrap.rtl.css", true}, // from linguist v7.23 + {path: "cache/", expected: true}, + {false, "something_cache/", false}, + {false, "random/cache/", true}, + {false, "cache", false}, + {false, "dependencies/", true}, + {false, "Dependencies/", true}, + {false, "dependency/", false}, + {false, "dist/", true}, + {false, "dist", false}, + {false, "random/dist/", true}, + {false, "random/dist", false}, + {false, "deps/", true}, + {false, "foodeps/", false}, + {false, "configure", true}, + {false, "a/configure", true}, + {false, "config.guess", true}, + {false, "config.guess/", false}, + {false, ".vscode/", true}, + {false, "doc/_build/", true}, + {false, "a/docs/_build/", true}, + {false, "a/dasdocs/_build-vsdoc.js", true}, + {false, "a/dasdocs/_build-vsdoc.j", false}, + {false, "foo/bar", false}, + {false, ".sublime-project", true}, + {false, "foo/vendor/foo", true}, + {false, "leaflet.draw-src.js", true}, + {false, "foo/bar/MochiKit.js", true}, + {false, "foo/bar/dojo.js", true}, + {false, "foo/env/whatever", true}, + {false, "some/python/venv/", false}, + {false, "foo/.imageset/bar", true}, + {false, "Vagrantfile", true}, + {true, "src/bootstrap-custom.js", true}, + // {true, "/css/bootstrap.rtl.css", true}, // from linguist v7.23 } func TestIsVendor(t *testing.T) { - for _, tt := range vendorTests { - t.Run(tt.path, func(t *testing.T) { - if got := IsVendor(tt.path); got != tt.expected { - t.Errorf("IsVendor(%q) = %v, expected %v", tt.path, got, tt.expected) + for _, test := range vendorTests { + t.Run(test.path, func(t *testing.T) { + if got := IsVendor(test.path); got != test.expected { + if regex.Name == regex.RE2 && test.skipOnRE2 { + return // skip + } + t.Errorf("IsVendor(%q) = %v, expected %v (usuing %s)", test.path, got, test.expected, regex.Name) } }) }