mirror of
https://github.com/ralsina/tartrazine.git
synced 2025-05-23 08:30:07 -03:00
content generator
This commit is contained in:
parent
f375b0df5e
commit
ba22a0a243
51
cli/slinguist-generate/assets/content.go.tmpl
Normal file
51
cli/slinguist-generate/assets/content.go.tmpl
Normal file
@ -0,0 +1,51 @@
|
||||
package slinguist
|
||||
|
||||
// CODE GENERATED AUTOMATICALLY WITH github.com/src-d/simple-linguist/cli/slinguist-generate
|
||||
// THIS FILE SHOULD NOT BE EDITED BY HAND
|
||||
// Extracted from github/linguist commit: {{ getCommit }}
|
||||
|
||||
import (
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func GetLanguageByContent(filename string, content []byte) (lang string, safe bool) {
|
||||
ext := strings.ToLower(filepath.Ext(filename))
|
||||
if fnMatcher, ok := matchers[ext]; ok {
|
||||
lang, safe = fnMatcher(content)
|
||||
return
|
||||
}
|
||||
|
||||
return GetLanguageByExtension(filename)
|
||||
}
|
||||
|
||||
type languageMatcher func ([]byte) (string, bool)
|
||||
|
||||
var matchers = map[string]languageMatcher{
|
||||
{{ range $index, $disambiguator := . -}}
|
||||
{{ printf "%q" $disambiguator.Extension }}: func(i []byte) (string, bool) {
|
||||
{{ range $i, $language := $disambiguator.Languages -}}
|
||||
|
||||
{{- if not (avoidLanguage $language) }}
|
||||
{{- if gt (len $language.Heuristics) 0 }}
|
||||
{{- if gt $i 0 }} else {{ end -}}
|
||||
if {{- range $j, $heuristic := $language.Heuristics }} {{ $heuristic.Name }}.Match(i)
|
||||
{{- if lt $j (len $language.LogicRelations) }} {{index $language.LogicRelations $j}} {{- end -}} {{ end }} {
|
||||
return {{ printf "%q" $language.Language }}, true
|
||||
}
|
||||
|
||||
{{- end -}}
|
||||
{{- end -}}
|
||||
{{- end}}
|
||||
|
||||
return {{ returnLanguage $disambiguator.Languages }}, {{ safeLanguage $disambiguator.Languages }}
|
||||
},
|
||||
{{ end -}}
|
||||
}
|
||||
|
||||
var (
|
||||
{{ range $index, $heuristic := getAllHeuristics . -}}
|
||||
{{ $heuristic.Name }} = regexp.MustCompile(`{{ $heuristic.Regexp }}`)
|
||||
{{ end -}}
|
||||
)
|
488
cli/slinguist-generate/generator/heuristics.go
Normal file
488
cli/slinguist-generate/generator/heuristics.go
Normal file
@ -0,0 +1,488 @@
|
||||
package generator
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"text/template"
|
||||
)
|
||||
|
||||
// Heuristics reads from buf and builds content.go file from contentTmplPath.
|
||||
func Heuristics(heuristics []byte, contentTmplPath, contentTmplName, commit string) ([]byte, error) {
|
||||
disambiguators, err := getDisambiguators(heuristics)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
buf := &bytes.Buffer{}
|
||||
if err := executeContentTemplate(buf, disambiguators, contentTmplPath, contentTmplName, commit); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return buf.Bytes(), nil
|
||||
}
|
||||
|
||||
const unknownLanguage = "OtherLanguage"
|
||||
|
||||
var (
|
||||
disambLine = regexp.MustCompile(`^(\s*)disambiguate`)
|
||||
definedRegs = make(map[string]string)
|
||||
illegalCharacter = map[string]string{
|
||||
"#": "Sharp",
|
||||
"+": "Plus",
|
||||
"-": "Dash",
|
||||
}
|
||||
)
|
||||
|
||||
type disambiguator struct {
|
||||
Extension string `json:"extension,omitempty"`
|
||||
Languages []*languageHeuristics `json:"languages,omitempty"`
|
||||
}
|
||||
|
||||
func (d *disambiguator) setHeuristicsNames() {
|
||||
for _, lang := range d.Languages {
|
||||
for i, heuristic := range lang.Heuristics {
|
||||
name := buildName(d.Extension, lang.Language, i)
|
||||
heuristic.Name = name
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func buildName(extension, language string, id int) string {
|
||||
extension = strings.TrimPrefix(extension, `.`)
|
||||
language = strings.Join(strings.Fields(language), ``)
|
||||
name := strings.Join([]string{extension, language, "Matcher", strconv.Itoa(id)}, `_`)
|
||||
for k, v := range illegalCharacter {
|
||||
if strings.Contains(name, k) {
|
||||
name = strings.Replace(name, k, v, -1)
|
||||
}
|
||||
}
|
||||
|
||||
return name
|
||||
}
|
||||
|
||||
type languageHeuristics struct {
|
||||
Language string `json:"language,omitempty"`
|
||||
Heuristics []*heuristic `json:"heuristics,omitempty"`
|
||||
LogicRelations []string `json:"logic_relations,omitempty"`
|
||||
}
|
||||
|
||||
func (l *languageHeuristics) clone() (*languageHeuristics, error) {
|
||||
language := l.Language
|
||||
logicRels := make([]string, len(l.LogicRelations))
|
||||
if copy(logicRels, l.LogicRelations) != len(l.LogicRelations) {
|
||||
return nil, fmt.Errorf("error copying logic relations")
|
||||
}
|
||||
|
||||
heuristics := make([]*heuristic, 0, len(l.Heuristics))
|
||||
for _, h := range l.Heuristics {
|
||||
heuristic := *h
|
||||
heuristics = append(heuristics, &heuristic)
|
||||
}
|
||||
|
||||
clone := &languageHeuristics{
|
||||
Language: language,
|
||||
Heuristics: heuristics,
|
||||
LogicRelations: logicRels,
|
||||
}
|
||||
|
||||
return clone, nil
|
||||
}
|
||||
|
||||
type heuristic struct {
|
||||
Name string `json:"name,omitempty"`
|
||||
Regexp string `json:"regexp,omitempty"`
|
||||
}
|
||||
|
||||
// A disambiguate block looks like:
|
||||
// disambiguate ".mod", ".extension" do |data|
|
||||
// if data.include?('<!ENTITY ') && data.include?('patata')
|
||||
// Language["XML"]
|
||||
// elsif /^\s*MODULE [\w\.]+;/i.match(data) || /^\s*END [\w\.]+;/i.match(data) || data.empty?
|
||||
// Language["Modula-2"]
|
||||
// elsif (/^\s*import (scala|java)\./.match(data) || /^\s*val\s+\w+\s*=/.match(data) || /^\s*class\b/.match(data))
|
||||
// Language["Scala"]
|
||||
// elsif (data.include?("gap> "))
|
||||
// Language["GAP"]
|
||||
// else
|
||||
// [Language["Linux Kernel Module"], Language["AMPL"]]
|
||||
// end
|
||||
// end
|
||||
func getDisambiguators(heuristics []byte) ([]*disambiguator, error) {
|
||||
seenExtensions := map[string]bool{}
|
||||
buf := bufio.NewScanner(bytes.NewReader(heuristics))
|
||||
disambiguators := make([]*disambiguator, 0, 50)
|
||||
for buf.Scan() {
|
||||
line := buf.Text()
|
||||
if disambLine.MatchString(line) {
|
||||
d, err := parseDisambiguators(line, buf, seenExtensions)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
disambiguators = append(disambiguators, d...)
|
||||
}
|
||||
|
||||
lookForRegexpVariables(line)
|
||||
}
|
||||
|
||||
if err := buf.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return disambiguators, nil
|
||||
}
|
||||
|
||||
func lookForRegexpVariables(line string) {
|
||||
if strings.Contains(line, "ObjectiveCRegex = ") {
|
||||
line = strings.TrimSpace(line)
|
||||
reg := strings.TrimPrefix(line, "ObjectiveCRegex = ")
|
||||
definedRegs["ObjectiveCRegex"] = reg
|
||||
}
|
||||
|
||||
if strings.Contains(line, "fortran_rx = ") {
|
||||
line = strings.TrimSpace(line)
|
||||
reg := strings.TrimPrefix(line, "fortran_rx = ")
|
||||
definedRegs["fortran_rx"] = reg
|
||||
}
|
||||
}
|
||||
|
||||
func parseDisambiguators(line string, buf *bufio.Scanner, seenExtensions map[string]bool) ([]*disambiguator, error) {
|
||||
disambList := make([]*disambiguator, 0, 2)
|
||||
splitted := strings.Fields(line)
|
||||
|
||||
for _, v := range splitted {
|
||||
if strings.HasPrefix(v, `"`) {
|
||||
extension := strings.Trim(v, `",`)
|
||||
if _, ok := seenExtensions[extension]; !ok {
|
||||
d := &disambiguator{Extension: extension}
|
||||
disambList = append(disambList, d)
|
||||
seenExtensions[extension] = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
langsHeuristics, err := getLanguagesHeuristics(buf)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for i, disamb := range disambList {
|
||||
lh := langsHeuristics
|
||||
if i != 0 {
|
||||
lh = cloneLanguagesHeuristics(langsHeuristics)
|
||||
}
|
||||
|
||||
disamb.Languages = lh
|
||||
disamb.setHeuristicsNames()
|
||||
}
|
||||
|
||||
return disambList, nil
|
||||
}
|
||||
|
||||
func cloneLanguagesHeuristics(list []*languageHeuristics) []*languageHeuristics {
|
||||
cloneList := make([]*languageHeuristics, 0, len(list))
|
||||
for _, langHeu := range list {
|
||||
clone, _ := langHeu.clone()
|
||||
cloneList = append(cloneList, clone)
|
||||
}
|
||||
|
||||
return cloneList
|
||||
}
|
||||
|
||||
func getLanguagesHeuristics(buf *bufio.Scanner) ([]*languageHeuristics, error) {
|
||||
langsList := make([][]string, 0, 2)
|
||||
heuristicsList := make([][]*heuristic, 0, 1)
|
||||
logicRelsList := make([][]string, 0, 1)
|
||||
|
||||
lastWasMatch := false
|
||||
for buf.Scan() {
|
||||
line := buf.Text()
|
||||
if strings.TrimSpace(line) == "end" {
|
||||
break
|
||||
}
|
||||
|
||||
if hasRegExp(line) {
|
||||
line := cleanRegExpLine(line)
|
||||
|
||||
logicRels := getLogicRelations(line)
|
||||
heuristics := getHeuristics(line)
|
||||
if lastWasMatch {
|
||||
i := len(heuristicsList) - 1
|
||||
heuristicsList[i] = append(heuristicsList[i], heuristics...)
|
||||
i = len(logicRelsList) - 1
|
||||
logicRelsList[i] = append(logicRelsList[i], logicRels...)
|
||||
} else {
|
||||
heuristicsList = append(heuristicsList, heuristics)
|
||||
logicRelsList = append(logicRelsList, logicRels)
|
||||
}
|
||||
|
||||
lastWasMatch = true
|
||||
}
|
||||
|
||||
if strings.Contains(line, "Language") {
|
||||
langs := getLanguages(line)
|
||||
langsList = append(langsList, langs)
|
||||
lastWasMatch = false
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if err := buf.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
langsHeuristics := buildLanguagesHeuristics(langsList, heuristicsList, logicRelsList)
|
||||
return langsHeuristics, nil
|
||||
}
|
||||
|
||||
func hasRegExp(line string) bool {
|
||||
return strings.Contains(line, ".match") || strings.Contains(line, ".include?") || strings.Contains(line, ".empty?")
|
||||
}
|
||||
|
||||
func cleanRegExpLine(line string) string {
|
||||
if strings.Contains(line, "if ") {
|
||||
line = line[strings.Index(line, `if `)+3:]
|
||||
}
|
||||
|
||||
line = strings.TrimSpace(line)
|
||||
line = strings.TrimPrefix(line, `(`)
|
||||
if strings.Contains(line, "))") {
|
||||
line = strings.TrimSuffix(line, `)`)
|
||||
}
|
||||
|
||||
return line
|
||||
}
|
||||
|
||||
func getLogicRelations(line string) []string {
|
||||
rels := make([]string, 0)
|
||||
splitted := strings.Split(line, "||")
|
||||
for i, v := range splitted {
|
||||
if strings.Contains(v, "&&") {
|
||||
rels = append(rels, "&&")
|
||||
}
|
||||
|
||||
if i < len(splitted)-1 {
|
||||
rels = append(rels, "||")
|
||||
}
|
||||
}
|
||||
|
||||
if len(rels) == 0 {
|
||||
rels = nil
|
||||
}
|
||||
|
||||
return rels
|
||||
}
|
||||
|
||||
func getHeuristics(line string) []*heuristic {
|
||||
splitted := splitByLogicOps(line)
|
||||
heuristics := make([]*heuristic, 0, len(splitted))
|
||||
for _, v := range splitted {
|
||||
v = strings.TrimSpace(v)
|
||||
var reg string
|
||||
|
||||
if strings.Contains(v, ".match") {
|
||||
reg = v[:strings.Index(v, ".match")]
|
||||
reg = replaceRegexpVariables(reg)
|
||||
}
|
||||
|
||||
if strings.Contains(v, ".include?") {
|
||||
reg = includeToRegExp(v)
|
||||
}
|
||||
|
||||
if strings.Contains(v, ".empty?") {
|
||||
reg = `^$`
|
||||
}
|
||||
|
||||
if reg != "" {
|
||||
reg = convToValidRegexp(reg)
|
||||
heuristics = append(heuristics, &heuristic{Regexp: reg})
|
||||
}
|
||||
}
|
||||
|
||||
return heuristics
|
||||
}
|
||||
|
||||
func splitByLogicOps(line string) []string {
|
||||
splitted := make([]string, 0, 1)
|
||||
splitOr := strings.Split(line, "||")
|
||||
for _, v := range splitOr {
|
||||
splitAnd := strings.Split(v, "&&")
|
||||
splitted = append(splitted, splitAnd...)
|
||||
}
|
||||
|
||||
return splitted
|
||||
}
|
||||
|
||||
func replaceRegexpVariables(reg string) string {
|
||||
repl := reg
|
||||
if v, ok := definedRegs[reg]; ok {
|
||||
repl = v
|
||||
}
|
||||
|
||||
return repl
|
||||
}
|
||||
|
||||
func convToValidRegexp(reg string) string {
|
||||
// example: `/^(\s*)(<Project|<Import|<Property|<?xml|xmlns)/i``
|
||||
// Ruby modifier "m" matches multiple lines, recognizing newlines as normal characters, Go use flag "s" for that.
|
||||
const (
|
||||
caseSensitive = "i"
|
||||
matchEOL = "s"
|
||||
|
||||
rubyCaseSensitive = "i"
|
||||
rubyMultiLine = "m"
|
||||
)
|
||||
|
||||
reg = strings.TrimPrefix(reg, `/`)
|
||||
flags := "(?m"
|
||||
lastSlash := strings.LastIndex(reg, `/`)
|
||||
if lastSlash == -1 {
|
||||
return flags + ")" + reg
|
||||
}
|
||||
|
||||
specialChars := reg[lastSlash:]
|
||||
reg = reg[:lastSlash]
|
||||
if lastSlash == len(reg)-1 {
|
||||
return flags + ")" + reg
|
||||
}
|
||||
|
||||
if strings.Contains(specialChars, rubyCaseSensitive) {
|
||||
flags = flags + caseSensitive
|
||||
}
|
||||
|
||||
if strings.Contains(specialChars, rubyMultiLine) {
|
||||
flags = flags + matchEOL
|
||||
}
|
||||
|
||||
return flags + ")" + reg
|
||||
}
|
||||
|
||||
func includeToRegExp(include string) string {
|
||||
content := include[strings.Index(include, `(`)+1 : strings.Index(include, `)`)]
|
||||
content = strings.Trim(content, `"'`)
|
||||
return regexp.QuoteMeta(content)
|
||||
}
|
||||
|
||||
func getLanguages(line string) []string {
|
||||
languages := make([]string, 0)
|
||||
splitted := strings.Split(line, `,`)
|
||||
for _, lang := range splitted {
|
||||
lang = trimLanguage(lang)
|
||||
languages = append(languages, lang)
|
||||
}
|
||||
|
||||
return languages
|
||||
}
|
||||
|
||||
func trimLanguage(enclosedLang string) string {
|
||||
lang := strings.TrimSpace(enclosedLang)
|
||||
lang = lang[strings.Index(lang, `"`)+1:]
|
||||
lang = lang[:strings.Index(lang, `"`)]
|
||||
return lang
|
||||
}
|
||||
|
||||
func buildLanguagesHeuristics(langsList [][]string, heuristicsList [][]*heuristic, logicRelsList [][]string) []*languageHeuristics {
|
||||
langsHeuristics := make([]*languageHeuristics, 0, len(langsList))
|
||||
for i, langSlice := range langsList {
|
||||
var heuristics []*heuristic
|
||||
if i < len(heuristicsList) {
|
||||
heuristics = heuristicsList[i]
|
||||
}
|
||||
|
||||
var rels []string
|
||||
if i < len(logicRelsList) {
|
||||
rels = logicRelsList[i]
|
||||
}
|
||||
|
||||
for _, lang := range langSlice {
|
||||
lh := &languageHeuristics{
|
||||
Language: lang,
|
||||
Heuristics: heuristics,
|
||||
LogicRelations: rels,
|
||||
}
|
||||
|
||||
langsHeuristics = append(langsHeuristics, lh)
|
||||
}
|
||||
}
|
||||
|
||||
return langsHeuristics
|
||||
}
|
||||
|
||||
func executeContentTemplate(out io.Writer, disambiguators []*disambiguator, contentTmplPath, contentTmpl, commit string) error {
|
||||
fmap := template.FuncMap{
|
||||
"getCommit": func() string { return commit },
|
||||
"getAllHeuristics": getAllHeuristics,
|
||||
"returnLanguage": returnLanguage,
|
||||
"safeLanguage": safeLanguage,
|
||||
"avoidLanguage": avoidLanguage,
|
||||
}
|
||||
|
||||
t := template.Must(template.New(contentTmpl).Funcs(fmap).ParseFiles(contentTmplPath))
|
||||
if err := t.Execute(out, disambiguators); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func getAllHeuristics(disambiguators []*disambiguator) []*heuristic {
|
||||
heuristics := make([]*heuristic, 0)
|
||||
for _, disamb := range disambiguators {
|
||||
for _, lang := range disamb.Languages {
|
||||
if !avoidLanguage(lang) {
|
||||
heuristics = append(heuristics, lang.Heuristics...)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return heuristics
|
||||
}
|
||||
|
||||
func avoidLanguage(lang *languageHeuristics) bool {
|
||||
// necessary to avoid corner cases
|
||||
for _, heuristic := range lang.Heuristics {
|
||||
if containsInvalidRegexp(heuristic.Regexp) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func containsInvalidRegexp(reg string) bool {
|
||||
return strings.Contains(reg, `(?<`) || strings.Contains(reg, `\1`)
|
||||
}
|
||||
|
||||
func returnLanguage(langsHeuristics []*languageHeuristics) string {
|
||||
lang, _ := returnLangAndSafe(langsHeuristics)
|
||||
return lang
|
||||
}
|
||||
|
||||
func safeLanguage(langsHeuristics []*languageHeuristics) bool {
|
||||
_, safe := returnLangAndSafe(langsHeuristics)
|
||||
return safe
|
||||
}
|
||||
|
||||
func returnLangAndSafe(langsHeuristics []*languageHeuristics) (string, bool) {
|
||||
// at the moment, only returns one string although might be exists several language to return as a []string.
|
||||
langs := make([]string, 0)
|
||||
for _, langHeu := range langsHeuristics {
|
||||
if len(langHeu.Heuristics) == 0 {
|
||||
langs = append(langs, `"`+langHeu.Language+`"`)
|
||||
}
|
||||
}
|
||||
|
||||
lang := unknownLanguage
|
||||
safe := false
|
||||
if len(langs) != 0 {
|
||||
lang = langs[0]
|
||||
safe = len(langs) == 1
|
||||
}
|
||||
|
||||
return lang, safe
|
||||
}
|
81
cli/slinguist-generate/generator/test_files/content.gold
Normal file
81
cli/slinguist-generate/generator/test_files/content.gold
Normal file
@ -0,0 +1,81 @@
|
||||
package slinguist
|
||||
|
||||
// CODE GENERATED AUTOMATICALLY WITH github.com/src-d/simple-linguist/cli/slinguist-generate
|
||||
// THIS FILE SHOULD NOT BE EDITED BY HAND
|
||||
// Extracted from github/linguist commit: fe8b44ab8a225b1ffa75b983b916ea22fee5b6f7
|
||||
|
||||
import (
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func GetLanguageByContent(filename string, content []byte) (lang string, safe bool) {
|
||||
ext := strings.ToLower(filepath.Ext(filename))
|
||||
if fnMatcher, ok := matchers[ext]; ok {
|
||||
lang, safe = fnMatcher(content)
|
||||
return
|
||||
}
|
||||
|
||||
return GetLanguageByExtension(filename)
|
||||
}
|
||||
|
||||
type languageMatcher func ([]byte) (string, bool)
|
||||
|
||||
var matchers = map[string]languageMatcher{
|
||||
".asc": func(i []byte) (string, bool) {
|
||||
if asc_PublicKey_Matcher_0.Match(i) {
|
||||
return "Public Key", true
|
||||
} else if asc_AsciiDoc_Matcher_0.Match(i) {
|
||||
return "AsciiDoc", true
|
||||
} else if asc_AGSScript_Matcher_0.Match(i) {
|
||||
return "AGS Script", true
|
||||
}
|
||||
|
||||
return OtherLanguage, false
|
||||
},
|
||||
".ms": func(i []byte) (string, bool) {
|
||||
if ms_Groff_Matcher_0.Match(i) {
|
||||
return "Groff", true
|
||||
}
|
||||
|
||||
return "MAXScript", true
|
||||
},
|
||||
".mod": func(i []byte) (string, bool) {
|
||||
if mod_XML_Matcher_0.Match(i) {
|
||||
return "XML", true
|
||||
} else if mod_ModulaDash2_Matcher_0.Match(i) || mod_ModulaDash2_Matcher_1.Match(i) {
|
||||
return "Modula-2", true
|
||||
}
|
||||
|
||||
return "Linux Kernel Module", false
|
||||
},
|
||||
".pro": func(i []byte) (string, bool) {
|
||||
if pro_Prolog_Matcher_0.Match(i) {
|
||||
return "Prolog", true
|
||||
} else if pro_INI_Matcher_0.Match(i) {
|
||||
return "INI", true
|
||||
} else if pro_QMake_Matcher_0.Match(i) && pro_QMake_Matcher_1.Match(i) {
|
||||
return "QMake", true
|
||||
} else if pro_IDL_Matcher_0.Match(i) {
|
||||
return "IDL", true
|
||||
}
|
||||
|
||||
return OtherLanguage, false
|
||||
},
|
||||
}
|
||||
|
||||
var (
|
||||
asc_PublicKey_Matcher_0 = regexp.MustCompile(`(?m)^(----[- ]BEGIN|ssh-(rsa|dss)) `)
|
||||
asc_AsciiDoc_Matcher_0 = regexp.MustCompile(`(?m)^[=-]+(\s|\n)|{{[A-Za-z]`)
|
||||
asc_AGSScript_Matcher_0 = regexp.MustCompile(`(?m)^(\/\/.+|((import|export)\s+)?(function|int|float|char)\s+((room|repeatedly|on|game)_)?([A-Za-z]+[A-Za-z_0-9]+)\s*[;\(])`)
|
||||
ms_Groff_Matcher_0 = regexp.MustCompile(`(?mi)^[.'][a-z][a-z](\s|$)`)
|
||||
mod_XML_Matcher_0 = regexp.MustCompile(`(?m)<!ENTITY `)
|
||||
mod_ModulaDash2_Matcher_0 = regexp.MustCompile(`(?mi)^\s*MODULE [\w\.]+;`)
|
||||
mod_ModulaDash2_Matcher_1 = regexp.MustCompile(`(?mi)^\s*END [\w\.]+;`)
|
||||
pro_Prolog_Matcher_0 = regexp.MustCompile(`(?m)^[^#]+:-`)
|
||||
pro_INI_Matcher_0 = regexp.MustCompile(`(?m)last_client=`)
|
||||
pro_QMake_Matcher_0 = regexp.MustCompile(`(?m)HEADERS`)
|
||||
pro_QMake_Matcher_1 = regexp.MustCompile(`(?m)SOURCES`)
|
||||
pro_IDL_Matcher_0 = regexp.MustCompile(`(?m)^\s*function[ \w,]+$`)
|
||||
)
|
@ -0,0 +1,51 @@
|
||||
package slinguist
|
||||
|
||||
// CODE GENERATED AUTOMATICALLY WITH github.com/src-d/simple-linguist/cli/slinguist-generate
|
||||
// THIS FILE SHOULD NOT BE EDITED BY HAND
|
||||
// Extracted from github/linguist commit: {{ getCommit }}
|
||||
|
||||
import (
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func GetLanguageByContent(filename string, content []byte) (lang string, safe bool) {
|
||||
ext := strings.ToLower(filepath.Ext(filename))
|
||||
if fnMatcher, ok := matchers[ext]; ok {
|
||||
lang, safe = fnMatcher(content)
|
||||
return
|
||||
}
|
||||
|
||||
return GetLanguageByExtension(filename)
|
||||
}
|
||||
|
||||
type languageMatcher func ([]byte) (string, bool)
|
||||
|
||||
var matchers = map[string]languageMatcher{
|
||||
{{ range $index, $disambiguator := . -}}
|
||||
{{ printf "%q" $disambiguator.Extension }}: func(i []byte) (string, bool) {
|
||||
{{ range $i, $language := $disambiguator.Languages -}}
|
||||
|
||||
{{- if not (avoidLanguage $language) }}
|
||||
{{- if gt (len $language.Heuristics) 0 }}
|
||||
{{- if gt $i 0 }} else {{ end -}}
|
||||
if {{- range $j, $heuristic := $language.Heuristics }} {{ $heuristic.Name }}.Match(i)
|
||||
{{- if lt $j (len $language.LogicRelations) }} {{index $language.LogicRelations $j}} {{- end -}} {{ end }} {
|
||||
return {{ printf "%q" $language.Language }}, true
|
||||
}
|
||||
|
||||
{{- end -}}
|
||||
{{- end -}}
|
||||
{{- end}}
|
||||
|
||||
return {{ returnLanguage $disambiguator.Languages }}, {{ safeLanguage $disambiguator.Languages }}
|
||||
},
|
||||
{{ end -}}
|
||||
}
|
||||
|
||||
var (
|
||||
{{ range $index, $heuristic := getAllHeuristics . -}}
|
||||
{{ $heuristic.Name }} = regexp.MustCompile(`{{ $heuristic.Regexp }}`)
|
||||
{{ end -}}
|
||||
)
|
@ -0,0 +1,81 @@
|
||||
package slinguist
|
||||
|
||||
// CODE GENERATED AUTOMATICALLY WITH github.com/src-d/simple-linguist/cli/slinguist-generate
|
||||
// THIS FILE SHOULD NOT BE EDITED BY HAND
|
||||
// Extracted from github/linguist commit: fe8b44ab8a225b1ffa75b983b916ea22fee5b6f7
|
||||
|
||||
import (
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func GetLanguageByContent(filename string, content []byte) (lang string, safe bool) {
|
||||
ext := strings.ToLower(filepath.Ext(filename))
|
||||
if fnMatcher, ok := matchers[ext]; ok {
|
||||
lang, safe = fnMatcher(content)
|
||||
return
|
||||
}
|
||||
|
||||
return GetLanguageByExtension(filename)
|
||||
}
|
||||
|
||||
type languageMatcher func([]byte) (string, bool)
|
||||
|
||||
var matchers = map[string]languageMatcher{
|
||||
".asc": func(i []byte) (string, bool) {
|
||||
if asc_PublicKey_Matcher_0.Match(i) {
|
||||
return "Public Key", true
|
||||
} else if asc_AsciiDoc_Matcher_0.Match(i) {
|
||||
return "AsciiDoc", true
|
||||
} else if asc_AGSScript_Matcher_0.Match(i) {
|
||||
return "AGS Script", true
|
||||
}
|
||||
|
||||
return OtherLanguage, false
|
||||
},
|
||||
".ms": func(i []byte) (string, bool) {
|
||||
if ms_Groff_Matcher_0.Match(i) {
|
||||
return "Groff", true
|
||||
}
|
||||
|
||||
return "MAXScript", true
|
||||
},
|
||||
".mod": func(i []byte) (string, bool) {
|
||||
if mod_XML_Matcher_0.Match(i) {
|
||||
return "XML", true
|
||||
} else if mod_ModulaDash2_Matcher_0.Match(i) || mod_ModulaDash2_Matcher_1.Match(i) {
|
||||
return "Modula-2", true
|
||||
}
|
||||
|
||||
return "Linux Kernel Module", false
|
||||
},
|
||||
".pro": func(i []byte) (string, bool) {
|
||||
if pro_Prolog_Matcher_0.Match(i) {
|
||||
return "Prolog", true
|
||||
} else if pro_INI_Matcher_0.Match(i) {
|
||||
return "INI", true
|
||||
} else if pro_QMake_Matcher_0.Match(i) && pro_QMake_Matcher_1.Match(i) {
|
||||
return "QMake", true
|
||||
} else if pro_IDL_Matcher_0.Match(i) {
|
||||
return "IDL", true
|
||||
}
|
||||
|
||||
return OtherLanguage, false
|
||||
},
|
||||
}
|
||||
|
||||
var (
|
||||
asc_PublicKey_Matcher_0 = regexp.MustCompile(`(?m)^(----[- ]BEGIN|ssh-(rsa|dss)) `)
|
||||
asc_AsciiDoc_Matcher_0 = regexp.MustCompile(`(?m)^[=-]+(\s|\n)|{{[A-Za-z]`)
|
||||
asc_AGSScript_Matcher_0 = regexp.MustCompile(`(?m)^(\/\/.+|((import|export)\s+)?(function|int|float|char)\s+((room|repeatedly|on|game)_)?([A-Za-z]+[A-Za-z_0-9]+)\s*[;\(])`)
|
||||
ms_Groff_Matcher_0 = regexp.MustCompile(`(?mi)^[.'][a-z][a-z](\s|$)`)
|
||||
mod_XML_Matcher_0 = regexp.MustCompile(`(?m)<!ENTITY `)
|
||||
mod_ModulaDash2_Matcher_0 = regexp.MustCompile(`(?mi)^\s*MODULE [\w\.]+;`)
|
||||
mod_ModulaDash2_Matcher_1 = regexp.MustCompile(`(?mi)^\s*END [\w\.]+;`)
|
||||
pro_Prolog_Matcher_0 = regexp.MustCompile(`(?m)^[^#]+:-`)
|
||||
pro_INI_Matcher_0 = regexp.MustCompile(`(?m)last_client=`)
|
||||
pro_QMake_Matcher_0 = regexp.MustCompile(`(?m)HEADERS`)
|
||||
pro_QMake_Matcher_1 = regexp.MustCompile(`(?m)SOURCES`)
|
||||
pro_IDL_Matcher_0 = regexp.MustCompile(`(?m)^\s*function[ \w,]+$`)
|
||||
)
|
@ -0,0 +1,44 @@
|
||||
# Common heuristics
|
||||
ObjectiveCRegex = /^\s*(@(interface|class|protocol|property|end|synchronised|selector|implementation)\b|#import\s+.+\.h[">])/
|
||||
|
||||
disambiguate ".asc" do |data|
|
||||
if /^(----[- ]BEGIN|ssh-(rsa|dss)) /.match(data)
|
||||
Language["Public Key"]
|
||||
elsif /^[=-]+(\s|\n)|{{[A-Za-z]/.match(data)
|
||||
Language["AsciiDoc"]
|
||||
elsif /^(\/\/.+|((import|export)\s+)?(function|int|float|char)\s+((room|repeatedly|on|game)_)?([A-Za-z]+[A-Za-z_0-9]+)\s*[;\(])/.match(data)
|
||||
Language["AGS Script"]
|
||||
end
|
||||
end
|
||||
|
||||
disambiguate ".ms" do |data|
|
||||
if /^[.'][a-z][a-z](\s|$)/i.match(data)
|
||||
Language["Groff"]
|
||||
elsif /(?<!\S)\.(include|globa?l)\s/.match(data) || /(?<!\/\*)(\A|\n)\s*\.[A-Za-z]/.match(data.gsub(/"([^\\"]|\\.)*"|'([^\\']|\\.)*'|\\\s*(?:--.*)?\n/, ""))
|
||||
Language["GAS"]
|
||||
else
|
||||
Language["MAXScript"]
|
||||
end
|
||||
end
|
||||
|
||||
disambiguate ".mod" do |data|
|
||||
if data.include?('<!ENTITY ')
|
||||
Language["XML"]
|
||||
elsif /^\s*MODULE [\w\.]+;/i.match(data) || /^\s*END [\w\.]+;/i.match(data)
|
||||
Language["Modula-2"]
|
||||
else
|
||||
[Language["Linux Kernel Module"], Language["AMPL"]]
|
||||
end
|
||||
end
|
||||
|
||||
disambiguate ".pro" do |data|
|
||||
if /^[^#]+:-/.match(data)
|
||||
Language["Prolog"]
|
||||
elsif data.include?("last_client=")
|
||||
Language["INI"]
|
||||
elsif data.include?("HEADERS") && data.include?("SOURCES")
|
||||
Language["QMake"]
|
||||
elsif /^\s*function[ \w,]+$/.match(data)
|
||||
Language["IDL"]
|
||||
end
|
||||
end
|
730
content.go
730
content.go
@ -1,10 +1,13 @@
|
||||
package slinguist
|
||||
|
||||
// CODE GENERATED AUTOMATICALLY WITH github.com/src-d/simple-linguist/cli/slinguist-generate
|
||||
// THIS FILE SHOULD NOT BE EDITED BY HAND
|
||||
// Extracted from github/linguist commit: 8b2d15a2d527c363dead3efe5e4dc3cb74fa4613
|
||||
|
||||
import (
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"gopkg.in/toqueteos/substring.v1"
|
||||
)
|
||||
|
||||
func GetLanguageByContent(filename string, content []byte) (lang string, safe bool) {
|
||||
@ -20,466 +23,555 @@ func GetLanguageByContent(filename string, content []byte) (lang string, safe bo
|
||||
type languageMatcher func([]byte) (string, bool)
|
||||
|
||||
var matchers = map[string]languageMatcher{
|
||||
".bf": func(i []byte) (string, bool) {
|
||||
if substring.BytesRegexp(`(fprintf|function|return)`).Match(i) {
|
||||
return "HyPhy", true
|
||||
".asc": func(i []byte) (string, bool) {
|
||||
if asc_PublicKey_Matcher_0.Match(i) {
|
||||
return "Public Key", true
|
||||
} else if asc_AsciiDoc_Matcher_0.Match(i) {
|
||||
return "AsciiDoc", true
|
||||
} else if asc_AGSScript_Matcher_0.Match(i) {
|
||||
return "AGS Script", true
|
||||
}
|
||||
|
||||
return "Brainfuck", false
|
||||
},
|
||||
".b": func(i []byte) (string, bool) {
|
||||
if substring.BytesRegexp(`(include|modules)`).Match(i) {
|
||||
return "Limbo", true
|
||||
}
|
||||
|
||||
return "Brainfuck", false
|
||||
return OtherLanguage, false
|
||||
},
|
||||
".bb": func(i []byte) (string, bool) {
|
||||
if blitzBasicMatcher.Match(i) {
|
||||
if bb_BlitzBasic_Matcher_0.Match(i) || bb_BlitzBasic_Matcher_1.Match(i) {
|
||||
return "BlitzBasic", true
|
||||
} else if substring.BytesRegexp(`^\s*(# |include|require)\b`).Match(i) {
|
||||
} else if bb_BitBake_Matcher_0.Match(i) {
|
||||
return "BitBake", true
|
||||
}
|
||||
|
||||
return OtherLanguage, false
|
||||
},
|
||||
".builds": func(i []byte) (string, bool) {
|
||||
if builds_XML_Matcher_0.Match(i) {
|
||||
return "XML", true
|
||||
}
|
||||
|
||||
return "Text", true
|
||||
},
|
||||
".ch": func(i []byte) (string, bool) {
|
||||
if ch_xBase_Matcher_0.Match(i) {
|
||||
return "xBase", true
|
||||
}
|
||||
|
||||
return OtherLanguage, false
|
||||
},
|
||||
".cl": func(i []byte) (string, bool) {
|
||||
if commonLispMatcher.Match(i) {
|
||||
if cl_CommonLisp_Matcher_0.Match(i) {
|
||||
return "Common Lisp", true
|
||||
} else if coolMatcher.Match(i) {
|
||||
} else if cl_Cool_Matcher_0.Match(i) {
|
||||
return "Cool", true
|
||||
} else if openCLMatcher.Match(i) {
|
||||
} else if cl_OpenCL_Matcher_0.Match(i) {
|
||||
return "OpenCL", true
|
||||
}
|
||||
|
||||
return OtherLanguage, false
|
||||
},
|
||||
".cls": func(i []byte) (string, bool) {
|
||||
if apexMatcher.Match(i) {
|
||||
return "Apex", true
|
||||
} else if openEdgeABLMatcher.Match(i) {
|
||||
return "OpenEdge ABL", true
|
||||
} else if texMatcher.Match(i) {
|
||||
if cls_TeX_Matcher_0.Match(i) {
|
||||
return "TeX", true
|
||||
} else if visualBasicMatcher.Match(i) {
|
||||
return "Visual Basic", true
|
||||
|
||||
}
|
||||
|
||||
return OtherLanguage, false
|
||||
},
|
||||
".cs": func(i []byte) (string, bool) {
|
||||
if substring.BytesRegexp(`![\w\s]+methodsFor: `).Match(i) {
|
||||
if cs_Smalltalk_Matcher_0.Match(i) {
|
||||
return "Smalltalk", true
|
||||
} else if cs_CSharp_Matcher_0.Match(i) || cs_CSharp_Matcher_1.Match(i) {
|
||||
return "C#", true
|
||||
}
|
||||
|
||||
return "C#", true
|
||||
},
|
||||
".ch": func(i []byte) (string, bool) {
|
||||
if substring.BytesRegexp(`(?i)^\s*#\s*(if|ifdef|ifndef|define|command|xcommand|translate|xtranslate|include|pragma|undef)\b`).Match(i) {
|
||||
return "xBase", true
|
||||
}
|
||||
|
||||
return "Charity", true
|
||||
return OtherLanguage, false
|
||||
},
|
||||
".d": func(i []byte) (string, bool) {
|
||||
if substring.BytesRegexp(`^module `).Match(i) {
|
||||
if d_D_Matcher_0.Match(i) {
|
||||
return "D", true
|
||||
} else if substring.BytesRegexp(`((dtrace:::)?BEGIN|provider |#pragma (D (option|attributes)|ident)\s)`).Match(i) {
|
||||
} else if d_DTrace_Matcher_0.Match(i) {
|
||||
return "DTrace", true
|
||||
} else if substring.BytesRegexp(`(\/.*:( .* \\)$| : \\$|^ : |: \\$)`).Match(i) {
|
||||
} else if d_Makefile_Matcher_0.Match(i) {
|
||||
return "Makefile", true
|
||||
}
|
||||
|
||||
return OtherLanguage, true
|
||||
return OtherLanguage, false
|
||||
},
|
||||
".ecl": func(i []byte) (string, bool) {
|
||||
if substring.BytesRegexp(`^[^#]+:-`).Match(i) {
|
||||
if ecl_ECLiPSe_Matcher_0.Match(i) {
|
||||
return "ECLiPSe", true
|
||||
} else if substring.BytesHas(`:=`).Match(i) {
|
||||
} else if ecl_ECL_Matcher_0.Match(i) {
|
||||
return "ECL", true
|
||||
}
|
||||
|
||||
return OtherLanguage, true
|
||||
return OtherLanguage, false
|
||||
},
|
||||
".es": func(i []byte) (string, bool) {
|
||||
if substring.BytesRegexp(`\s*(?:%%|main\s*\(.*?\)\s*->)`).Match(i) {
|
||||
if es_Erlang_Matcher_0.Match(i) {
|
||||
return "Erlang", true
|
||||
} else if substring.BytesRegexp(`(?:\/\/|("|')use strict\\1|export\s+default\s|\/\*.*?\*\/)`).Match(i) {
|
||||
return "JavaScript", true
|
||||
}
|
||||
|
||||
return OtherLanguage, true
|
||||
return OtherLanguage, false
|
||||
},
|
||||
".f": func(i []byte) (string, bool) {
|
||||
if substring.BytesRegexp(`\n: `).Match(i) {
|
||||
if f_Forth_Matcher_0.Match(i) {
|
||||
return "Forth", true
|
||||
} else if substring.BytesRegexp(`(?i)^([c*][^abd-z]| (subroutine|program|end)\s|\s*!)`).Match(i) {
|
||||
} else if f_FilebenchWML_Matcher_0.Match(i) {
|
||||
return "Filebench WML", true
|
||||
} else if f_FORTRAN_Matcher_0.Match(i) {
|
||||
return "FORTRAN", true
|
||||
}
|
||||
|
||||
return OtherLanguage, false
|
||||
},
|
||||
".for": func(i []byte) (string, bool) {
|
||||
if for_Forth_Matcher_0.Match(i) {
|
||||
return "Forth", true
|
||||
} else if for_FORTRAN_Matcher_0.Match(i) {
|
||||
return "FORTRAN", true
|
||||
}
|
||||
|
||||
return OtherLanguage, false
|
||||
},
|
||||
".fr": func(i []byte) (string, bool) {
|
||||
if substring.BytesRegexp(`^(: |also |new-device|previous )`).Match(i) {
|
||||
if fr_Forth_Matcher_0.Match(i) {
|
||||
return "Forth", true
|
||||
} else if substring.BytesRegexp(`\s*(import|module|package|data|type)`).Match(i) {
|
||||
} else if fr_Frege_Matcher_0.Match(i) {
|
||||
return "Frege", true
|
||||
}
|
||||
|
||||
return "Text", false
|
||||
},
|
||||
".j": func(i []byte) (string, bool) {
|
||||
if objectiveCMatcher.Match(i) {
|
||||
return "Objective-J", true
|
||||
}
|
||||
|
||||
return "Jasmin", false
|
||||
},
|
||||
".inc": func(i []byte) (string, bool) {
|
||||
if substring.BytesRegexp(`^<\?(?:php)?`).Match(i) {
|
||||
return "PHP", true
|
||||
}
|
||||
|
||||
return OtherLanguage, true
|
||||
},
|
||||
".m": func(i []byte) (string, bool) {
|
||||
if objectiveCMatcher.Match(i) {
|
||||
return "Objective-C", true
|
||||
} else if substring.BytesHas(`:- module`).Match(i) {
|
||||
return "Mercury", true
|
||||
} else if substring.BytesRegexp(`\n: `).Match(i) {
|
||||
return "MUF", true
|
||||
} else if substring.BytesRegexp(`\n\s*;`).Match(i) {
|
||||
return "M", true
|
||||
} else if substring.BytesRegexp(`\n\s*\(\*`).Match(i) {
|
||||
return "Mathematica", true
|
||||
} else if substring.BytesRegexp(`\n\s*%`).Match(i) {
|
||||
return "Matlab", true
|
||||
} else if substring.BytesRegexp(`\w+\s*:\s*module\s*{`).Match(i) {
|
||||
return "Limbo", true
|
||||
}
|
||||
|
||||
return OtherLanguage, false
|
||||
},
|
||||
".ms": func(i []byte) (string, bool) {
|
||||
if substring.BytesRegexp(`[.'][a-z][a-z](\s|$)`).Match(i) {
|
||||
return "Groff", true
|
||||
}
|
||||
|
||||
return "MAXScript", true
|
||||
},
|
||||
".md": func(i []byte) (string, bool) {
|
||||
if substring.BytesRegexp(`\n[-a-z0-9=#!\*\[|]`).Match(i) {
|
||||
return "Markdown", true
|
||||
} else if substring.BytesRegexp(`\n(;;|\(define_)`).Match(i) {
|
||||
return "GCC Machine Description", true
|
||||
}
|
||||
|
||||
return OtherLanguage, false
|
||||
},
|
||||
".moo": func(i []byte) (string, bool) {
|
||||
if substring.BytesHas(`:- module`).Match(i) {
|
||||
return "Mercury", true
|
||||
}
|
||||
|
||||
return "Moocode", false
|
||||
},
|
||||
".e": func(i []byte) (string, bool) {
|
||||
if substring.BytesRegexp(`feature\s--`).Match(i) {
|
||||
return "Eiffel", true
|
||||
}
|
||||
|
||||
return "E", false
|
||||
return "Text", true
|
||||
},
|
||||
".fs": func(i []byte) (string, bool) {
|
||||
if substring.BytesRegexp(`\n(: |new-device)`).Match(i) {
|
||||
if fs_Forth_Matcher_0.Match(i) {
|
||||
return "Forth", true
|
||||
} else if substring.BytesRegexp(`\s*(#light|import|let|module|namespace|open|type)`).Match(i) {
|
||||
} else if fs_FSharp_Matcher_0.Match(i) {
|
||||
return "F#", true
|
||||
} else if substring.BytesRegexp(`(#version|precision|uniform|varying|vec[234])`).Match(i) {
|
||||
} else if fs_GLSL_Matcher_0.Match(i) {
|
||||
return "GLSL", true
|
||||
} else if substring.BytesRegexp(`#include|#pragma\s+(rs|version)|__attribute__`).Match(i) {
|
||||
} else if fs_Filterscript_Matcher_0.Match(i) {
|
||||
return "Filterscript", true
|
||||
}
|
||||
|
||||
return OtherLanguage, false
|
||||
},
|
||||
".gs": func(i []byte) (string, bool) {
|
||||
if substring.BytesRegexp(`uses java\.`).Match(i) {
|
||||
if gs_Gosu_Matcher_0.Match(i) {
|
||||
return "Gosu", true
|
||||
}
|
||||
|
||||
return "JavaScript", false
|
||||
return OtherLanguage, false
|
||||
},
|
||||
".h": func(i []byte) (string, bool) {
|
||||
if objectiveCMatcher.Match(i) {
|
||||
if h_ObjectiveDashC_Matcher_0.Match(i) {
|
||||
return "Objective-C", true
|
||||
} else if cPlusPlusMatcher.Match(i) {
|
||||
return "C++", true
|
||||
}
|
||||
|
||||
return "C", true
|
||||
},
|
||||
".hh": func(i []byte) (string, bool) {
|
||||
if substring.BytesRegexp(`^<\?(?:hh)?`).Match(i) {
|
||||
return "Hack", true
|
||||
} else if cPlusPlusMatcher.Match(i) {
|
||||
} else if h_CPlusPlus_Matcher_0.Match(i) || h_CPlusPlus_Matcher_1.Match(i) || h_CPlusPlus_Matcher_2.Match(i) || h_CPlusPlus_Matcher_3.Match(i) || h_CPlusPlus_Matcher_4.Match(i) || h_CPlusPlus_Matcher_5.Match(i) || h_CPlusPlus_Matcher_6.Match(i) {
|
||||
return "C++", true
|
||||
}
|
||||
|
||||
return OtherLanguage, false
|
||||
},
|
||||
".inc": func(i []byte) (string, bool) {
|
||||
if inc_PHP_Matcher_0.Match(i) {
|
||||
return "PHP", true
|
||||
} else if inc_POVDashRaySDL_Matcher_0.Match(i) {
|
||||
return "POV-Ray SDL", true
|
||||
}
|
||||
|
||||
return OtherLanguage, false
|
||||
},
|
||||
".l": func(i []byte) (string, bool) {
|
||||
if substring.BytesRegexp(`\(def(un|macro)\s`).Match(i) {
|
||||
if l_CommonLisp_Matcher_0.Match(i) {
|
||||
return "Common Lisp", true
|
||||
} else if substring.BytesRegexp(`(%[%{}]xs|<.*>)`).Match(i) {
|
||||
} else if l_Lex_Matcher_0.Match(i) {
|
||||
return "Lex", true
|
||||
} else if substring.BytesRegexp(`\.[a-z][a-z](\s|$)`).Match(i) {
|
||||
} else if l_Groff_Matcher_0.Match(i) {
|
||||
return "Groff", true
|
||||
} else if substring.BytesRegexp(`(de|class|rel|code|data|must)`).Match(i) {
|
||||
} else if l_PicoLisp_Matcher_0.Match(i) {
|
||||
return "PicoLisp", true
|
||||
}
|
||||
|
||||
return OtherLanguage, false
|
||||
},
|
||||
".ls": func(i []byte) (string, bool) {
|
||||
if substring.BytesRegexp(`\s*package\s*[\w\.\/\*\s]*\s*{`).Match(i) {
|
||||
if ls_LoomScript_Matcher_0.Match(i) {
|
||||
return "LoomScript", true
|
||||
}
|
||||
|
||||
return "LiveScript", false
|
||||
return "LiveScript", true
|
||||
},
|
||||
".lsp": func(i []byte) (string, bool) {
|
||||
if lsp_CommonLisp_Matcher_0.Match(i) {
|
||||
return "Common Lisp", true
|
||||
} else if lsp_NewLisp_Matcher_0.Match(i) {
|
||||
return "NewLisp", true
|
||||
}
|
||||
|
||||
return OtherLanguage, false
|
||||
},
|
||||
".lisp": func(i []byte) (string, bool) {
|
||||
if lisp_CommonLisp_Matcher_0.Match(i) {
|
||||
return "Common Lisp", true
|
||||
} else if lisp_NewLisp_Matcher_0.Match(i) {
|
||||
return "NewLisp", true
|
||||
}
|
||||
|
||||
return OtherLanguage, false
|
||||
},
|
||||
".m": func(i []byte) (string, bool) {
|
||||
if m_ObjectiveDashC_Matcher_0.Match(i) {
|
||||
return "Objective-C", true
|
||||
} else if m_Mercury_Matcher_0.Match(i) {
|
||||
return "Mercury", true
|
||||
} else if m_MUF_Matcher_0.Match(i) {
|
||||
return "MUF", true
|
||||
} else if m_M_Matcher_0.Match(i) {
|
||||
return "M", true
|
||||
} else if m_Mathematica_Matcher_0.Match(i) {
|
||||
return "Mathematica", true
|
||||
} else if m_Matlab_Matcher_0.Match(i) {
|
||||
return "Matlab", true
|
||||
} else if m_Limbo_Matcher_0.Match(i) {
|
||||
return "Limbo", true
|
||||
}
|
||||
|
||||
return OtherLanguage, false
|
||||
},
|
||||
".md": func(i []byte) (string, bool) {
|
||||
if md_Markdown_Matcher_0.Match(i) || md_Markdown_Matcher_1.Match(i) {
|
||||
return "Markdown", true
|
||||
} else if md_GCCmachinedescription_Matcher_0.Match(i) {
|
||||
return "GCC machine description", true
|
||||
}
|
||||
|
||||
return "Markdown", true
|
||||
},
|
||||
".ml": func(i []byte) (string, bool) {
|
||||
if ml_OCaml_Matcher_0.Match(i) {
|
||||
return "OCaml", true
|
||||
} else if ml_StandardML_Matcher_0.Match(i) {
|
||||
return "Standard ML", true
|
||||
}
|
||||
|
||||
return OtherLanguage, false
|
||||
},
|
||||
".mod": func(i []byte) (string, bool) {
|
||||
if mod_XML_Matcher_0.Match(i) {
|
||||
return "XML", true
|
||||
} else if mod_ModulaDash2_Matcher_0.Match(i) || mod_ModulaDash2_Matcher_1.Match(i) {
|
||||
return "Modula-2", true
|
||||
}
|
||||
|
||||
return "Linux Kernel Module", false
|
||||
},
|
||||
".ms": func(i []byte) (string, bool) {
|
||||
if ms_Groff_Matcher_0.Match(i) {
|
||||
return "Groff", true
|
||||
}
|
||||
|
||||
return "MAXScript", true
|
||||
},
|
||||
".n": func(i []byte) (string, bool) {
|
||||
if substring.BytesRegexp(`^[.']`).Match(i) {
|
||||
if n_Groff_Matcher_0.Match(i) {
|
||||
return "Groff", true
|
||||
} else if substring.BytesRegexp(`(module|namespace|using)`).Match(i) {
|
||||
} else if n_Nemerle_Matcher_0.Match(i) {
|
||||
return "Nemerle", true
|
||||
}
|
||||
|
||||
return OtherLanguage, false
|
||||
},
|
||||
".ncl": func(i []byte) (string, bool) {
|
||||
if substring.BytesHas("THE_TITLE").Match(i) {
|
||||
if ncl_Text_Matcher_0.Match(i) {
|
||||
return "Text", true
|
||||
}
|
||||
|
||||
return "NCL", true
|
||||
return OtherLanguage, false
|
||||
},
|
||||
".mod": func(i []byte) (string, bool) {
|
||||
if substring.BytesHas("<!ENTITY ").Match(i) {
|
||||
return "XML", true
|
||||
} else if substring.BytesRegexp(`MODULE\s\w+\s*;`).Match(i) || substring.BytesRegexp(`(?i)\s*END \w+;$`).Match(i) {
|
||||
return "Modula-2", true
|
||||
".nl": func(i []byte) (string, bool) {
|
||||
if nl_NL_Matcher_0.Match(i) {
|
||||
return "NL", true
|
||||
}
|
||||
|
||||
return "Linux Kernel Module", true
|
||||
return "NewLisp", true
|
||||
},
|
||||
".lisp": func(i []byte) (string, bool) {
|
||||
if commonLispMatcher.Match(i) {
|
||||
return "Common Lisp", true
|
||||
} else if substring.BytesRegexp(`\s*\(define `).Match(i) {
|
||||
return "NewLisp", true
|
||||
".php": func(i []byte) (string, bool) {
|
||||
if php_Hack_Matcher_0.Match(i) {
|
||||
return "Hack", true
|
||||
} else if php_PHP_Matcher_0.Match(i) {
|
||||
return "PHP", true
|
||||
}
|
||||
|
||||
return OtherLanguage, false
|
||||
},
|
||||
".pl": func(i []byte) (string, bool) {
|
||||
if pl_Prolog_Matcher_0.Match(i) {
|
||||
return "Prolog", true
|
||||
} else if pl_Perl_Matcher_0.Match(i) {
|
||||
return "Perl", true
|
||||
} else if pl_Perl6_Matcher_0.Match(i) {
|
||||
return "Perl6", true
|
||||
}
|
||||
|
||||
return OtherLanguage, false
|
||||
},
|
||||
".pm": func(i []byte) (string, bool) {
|
||||
if perlMatcher.Match(i) {
|
||||
if pm_Perl_Matcher_0.Match(i) {
|
||||
return "Perl", true
|
||||
} else if perl6Matcher.Match(i) {
|
||||
} else if pm_Perl6_Matcher_0.Match(i) {
|
||||
return "Perl6", true
|
||||
}
|
||||
|
||||
return "Perl", false
|
||||
},
|
||||
".pp": func(i []byte) (string, bool) {
|
||||
if pascalMatcher.Match(i) {
|
||||
return "Pascal", true
|
||||
}
|
||||
return "Puppet", false
|
||||
return OtherLanguage, false
|
||||
},
|
||||
".t": func(i []byte) (string, bool) {
|
||||
if perlMatcher.Match(i) {
|
||||
if t_Perl_Matcher_0.Match(i) {
|
||||
return "Perl", true
|
||||
} else if perl6Matcher.Match(i) {
|
||||
} else if t_Perl6_Matcher_0.Match(i) {
|
||||
return "Perl6", true
|
||||
} else if substring.BytesRegexp(`^\s*%|^\s*var\s+\w+\s*:\s*\w+`).Match(i) {
|
||||
return "Turing", true
|
||||
} else if substring.BytesRegexp(`^\s*use\s+v6\s*;`).Match(i) {
|
||||
return "Perl6", true
|
||||
} else if substring.BytesRegexp(`terra\s`).Match(i) {
|
||||
return "Terra", true
|
||||
}
|
||||
|
||||
return "Perl", false
|
||||
return OtherLanguage, false
|
||||
},
|
||||
".pod": func(i []byte) (string, bool) {
|
||||
if pod_Pod_Matcher_0.Match(i) {
|
||||
return "Pod", true
|
||||
}
|
||||
|
||||
return "Perl", true
|
||||
},
|
||||
".pro": func(i []byte) (string, bool) {
|
||||
if pro_Prolog_Matcher_0.Match(i) {
|
||||
return "Prolog", true
|
||||
} else if pro_INI_Matcher_0.Match(i) {
|
||||
return "INI", true
|
||||
} else if pro_QMake_Matcher_0.Match(i) && pro_QMake_Matcher_1.Match(i) {
|
||||
return "QMake", true
|
||||
} else if pro_IDL_Matcher_0.Match(i) {
|
||||
return "IDL", true
|
||||
}
|
||||
|
||||
return OtherLanguage, false
|
||||
},
|
||||
".props": func(i []byte) (string, bool) {
|
||||
if props_XML_Matcher_0.Match(i) {
|
||||
return "XML", true
|
||||
} else if props_INI_Matcher_0.Match(i) {
|
||||
return "INI", true
|
||||
}
|
||||
|
||||
return OtherLanguage, false
|
||||
},
|
||||
".r": func(i []byte) (string, bool) {
|
||||
if r_Rebol_Matcher_0.Match(i) {
|
||||
return "Rebol", true
|
||||
} else if r_R_Matcher_0.Match(i) {
|
||||
return "R", true
|
||||
}
|
||||
|
||||
return OtherLanguage, false
|
||||
},
|
||||
".rno": func(i []byte) (string, bool) {
|
||||
if rno_RUNOFF_Matcher_0.Match(i) {
|
||||
return "RUNOFF", true
|
||||
} else if rno_Groff_Matcher_0.Match(i) {
|
||||
return "Groff", true
|
||||
}
|
||||
|
||||
return OtherLanguage, false
|
||||
},
|
||||
".rpy": func(i []byte) (string, bool) {
|
||||
if rpy_Python_Matcher_0.Match(i) {
|
||||
return "Python", true
|
||||
}
|
||||
|
||||
return "Ren'Py", true
|
||||
},
|
||||
".rs": func(i []byte) (string, bool) {
|
||||
if rs_Rust_Matcher_0.Match(i) {
|
||||
return "Rust", true
|
||||
} else if rs_RenderScript_Matcher_0.Match(i) {
|
||||
return "RenderScript", true
|
||||
}
|
||||
|
||||
return OtherLanguage, false
|
||||
},
|
||||
".sc": func(i []byte) (string, bool) {
|
||||
if sc_SuperCollider_Matcher_0.Match(i) || sc_SuperCollider_Matcher_1.Match(i) || sc_SuperCollider_Matcher_2.Match(i) {
|
||||
return "SuperCollider", true
|
||||
} else if sc_Scala_Matcher_0.Match(i) || sc_Scala_Matcher_1.Match(i) || sc_Scala_Matcher_2.Match(i) {
|
||||
return "Scala", true
|
||||
}
|
||||
|
||||
return OtherLanguage, false
|
||||
},
|
||||
".sql": func(i []byte) (string, bool) {
|
||||
if sql_PLpgSQL_Matcher_0.Match(i) || sql_PLpgSQL_Matcher_1.Match(i) || sql_PLpgSQL_Matcher_2.Match(i) {
|
||||
return "PLpgSQL", true
|
||||
} else if sql_SQLPL_Matcher_0.Match(i) || sql_SQLPL_Matcher_1.Match(i) {
|
||||
return "SQLPL", true
|
||||
} else if sql_PLSQL_Matcher_0.Match(i) || sql_PLSQL_Matcher_1.Match(i) {
|
||||
return "PLSQL", true
|
||||
} else if sql_SQL_Matcher_0.Match(i) {
|
||||
return "SQL", true
|
||||
}
|
||||
|
||||
return OtherLanguage, false
|
||||
},
|
||||
".srt": func(i []byte) (string, bool) {
|
||||
if srt_SubRipText_Matcher_0.Match(i) {
|
||||
return "SubRip Text", true
|
||||
}
|
||||
|
||||
return OtherLanguage, false
|
||||
},
|
||||
".toc": func(i []byte) (string, bool) {
|
||||
if toc_WorldofWarcraftAddonData_Matcher_0.Match(i) {
|
||||
return "World of Warcraft Addon Data", true
|
||||
} else if toc_TeX_Matcher_0.Match(i) {
|
||||
return "TeX", true
|
||||
}
|
||||
|
||||
return OtherLanguage, false
|
||||
},
|
||||
".ts": func(i []byte) (string, bool) {
|
||||
if substring.BytesHas("</TS>").Match(i) {
|
||||
return "XML", true
|
||||
}
|
||||
|
||||
return "TypeScript", true
|
||||
},
|
||||
".tsx": func(i []byte) (string, bool) {
|
||||
if substring.BytesHas("</tileset>").Match(i) {
|
||||
if ts_XML_Matcher_0.Match(i) {
|
||||
return "XML", true
|
||||
}
|
||||
|
||||
return "TypeScript", true
|
||||
},
|
||||
".tst": func(i []byte) (string, bool) {
|
||||
if substring.BytesHas("gap> ").Match(i) {
|
||||
if tst_GAP_Matcher_0.Match(i) {
|
||||
return "GAP", true
|
||||
}
|
||||
|
||||
return "Scilab", true
|
||||
},
|
||||
".r": func(i []byte) (string, bool) {
|
||||
if substring.BytesRegexp(`(?i)\bRebol\b`).Match(i) {
|
||||
return "Rebol", true
|
||||
} else if substring.BytesHas("<-").Match(i) {
|
||||
return "R", true
|
||||
".tsx": func(i []byte) (string, bool) {
|
||||
if tsx_TypeScript_Matcher_0.Match(i) {
|
||||
return "TypeScript", true
|
||||
} else if tsx_XML_Matcher_0.Match(i) {
|
||||
return "XML", true
|
||||
}
|
||||
|
||||
return OtherLanguage, false
|
||||
},
|
||||
".rs": func(i []byte) (string, bool) {
|
||||
if substring.BytesRegexp(`\n(use |fn |mod |pub |macro_rules|impl|#!?\[)`).Match(i) {
|
||||
return "Rust", true
|
||||
} else if substring.BytesRegexp(`#include|#pragma\s+(rs|version)|__attribute__`).Match(i) {
|
||||
return "RenderScript", true
|
||||
}
|
||||
|
||||
return OtherLanguage, false
|
||||
},
|
||||
".rpy": func(i []byte) (string, bool) {
|
||||
if substring.BytesRegexp(`(import|from|class|def)\s`).Match(i) {
|
||||
return "Python", true
|
||||
}
|
||||
|
||||
return "Ren'Py", false
|
||||
},
|
||||
".v": func(i []byte) (string, bool) {
|
||||
if substring.BytesRegexp(`\nendmodule`).Match(i) {
|
||||
return "Verilog", true
|
||||
} else if substring.BytesRegexp(`(Require|Import)`).Match(i) {
|
||||
return "Coq", true
|
||||
}
|
||||
|
||||
return OtherLanguage, false
|
||||
},
|
||||
".pl": func(i []byte) (string, bool) {
|
||||
if prologMatcher.Match(i) {
|
||||
return "Prolog", true
|
||||
} else if perl6Matcher.Match(i) {
|
||||
return "Perl6", true
|
||||
}
|
||||
|
||||
return "Perl", false
|
||||
},
|
||||
".pro": func(i []byte) (string, bool) {
|
||||
if prologMatcher.Match(i) {
|
||||
return "Prolog", true
|
||||
}
|
||||
|
||||
return OtherLanguage, false
|
||||
},
|
||||
".pod": func(i []byte) (string, bool) {
|
||||
if substring.BytesRegexp(`=\w+\n`).Match(i) {
|
||||
return "Pod", true
|
||||
}
|
||||
|
||||
return "Perl", false
|
||||
},
|
||||
".toc": func(i []byte) (string, bool) {
|
||||
if substring.BytesRegexp("## |@no-lib-strip@").Match(i) {
|
||||
return "World of Warcraft Addon Data", true
|
||||
} else if substring.BytesRegexp("(contentsline|defcounter|beamer|boolfalse)").Match(i) {
|
||||
return "TeX", true
|
||||
}
|
||||
|
||||
return OtherLanguage, false
|
||||
},
|
||||
".sls": func(i []byte) (string, bool) {
|
||||
if schemeMatcher.Match(i) {
|
||||
return "Scheme", true
|
||||
}
|
||||
return "SaltStack", false
|
||||
},
|
||||
".sql": func(i []byte) (string, bool) {
|
||||
if pgSQLMatcher.Match(i) {
|
||||
return "PLpgSQL", true
|
||||
} else if db2SQLMatcher.Match(i) {
|
||||
return "SQLPL", true
|
||||
} else if oracleSQLMatcher.Match(i) {
|
||||
return "PLSQL", true
|
||||
}
|
||||
|
||||
return "SQL", false
|
||||
},
|
||||
}
|
||||
|
||||
func init() {
|
||||
matchers[".for"] = matchers[".f"]
|
||||
matchers[".lsp"] = matchers[".lisp"]
|
||||
}
|
||||
|
||||
var (
|
||||
blitzBasicMatcher = substring.BytesOr(
|
||||
substring.BytesHas(`End Function`),
|
||||
substring.BytesRegexp(`\\s*;`),
|
||||
)
|
||||
cPlusPlusMatcher = substring.BytesOr(
|
||||
substring.BytesRegexp(`\s*template\s*<`),
|
||||
substring.BytesRegexp(`\s*#\s*include <(cstdint|string|vector|map|list|array|bitset|queue|stack|forward_list|unordered_map|unordered_set|(i|o|io)stream)>`),
|
||||
substring.BytesRegexp(`\n[ \t]*try`),
|
||||
substring.BytesRegexp(`\n[ \t]*(class|(using[ \t]+)?namespace)\s+\w+`),
|
||||
substring.BytesRegexp(`\n[ \t]*(private|public|protected):\n`),
|
||||
substring.BytesRegexp(`std::\w+`),
|
||||
substring.BytesRegexp(`[ \t]*catch\s*`),
|
||||
)
|
||||
commonLispMatcher = substring.BytesRegexp("(?i)(defpackage|defun|in-package)")
|
||||
coolMatcher = substring.BytesRegexp("(?i)class")
|
||||
openCLMatcher = substring.BytesOr(
|
||||
substring.BytesHas("\n}"),
|
||||
substring.BytesHas("}\n"),
|
||||
substring.BytesHas(`/*`),
|
||||
substring.BytesHas(`//`),
|
||||
)
|
||||
apexMatcher = substring.BytesOr(
|
||||
substring.BytesHas("{\n"),
|
||||
substring.BytesHas("}\n"),
|
||||
)
|
||||
texMatcher = substring.BytesOr(
|
||||
substring.BytesHas(`%`),
|
||||
substring.BytesHas(`\`),
|
||||
)
|
||||
openEdgeABLMatcher = substring.BytesRegexp(`(?i)(class|define|interface|method|using)\b`)
|
||||
visualBasicMatcher = substring.BytesOr(
|
||||
substring.BytesHas("'*"),
|
||||
substring.BytesRegexp(`(?i)(attribute|option|sub|private|protected|public|friend)\b`),
|
||||
)
|
||||
mathematicaMatcher = substring.BytesHas(`^\s*\(\*`)
|
||||
matlabMatcher = substring.BytesRegexp(`\b(function\s*[\[a-zA-Z]+|pcolor|classdef|figure|end|elseif)\b`)
|
||||
objectiveCMatcher = substring.BytesRegexp(
|
||||
`@(interface|class|protocol|property|end|synchronised|selector|implementation)\b|#import\s+.+\.h[">]`)
|
||||
pascalMatcher = substring.BytesRegexp(`(?ims)^\s*(PROGRAM|UNIT|USES|FUNCTION)[\s\n]+.*?;`)
|
||||
prologMatcher = substring.BytesRegexp(`^[^#]+:-`)
|
||||
perlMatcher = substring.BytesRegexp(`use strict|use\s+v?5\.`)
|
||||
perl6Matcher = substring.BytesRegexp(`(use v6|(my )?class|module)`)
|
||||
pgSQLMatcher = substring.BytesOr(
|
||||
substring.BytesRegexp(`(?i)\\i\b|AS \$\$|LANGUAGE '?plpgsql'?`),
|
||||
substring.BytesRegexp(`(?i)SECURITY (DEFINER|INVOKER)`),
|
||||
substring.BytesRegexp(`BEGIN( WORK| TRANSACTION)?;`),
|
||||
)
|
||||
db2SQLMatcher = substring.BytesOr(
|
||||
substring.BytesRegexp(`(?i)(alter module)|(language sql)|(begin( NOT)+ atomic)`),
|
||||
substring.BytesRegexp(`(?i)signal SQLSTATE '[0-9]+'`),
|
||||
)
|
||||
oracleSQLMatcher = substring.BytesOr(
|
||||
substring.BytesRegexp(`(?i)\$\$PLSQL_|XMLTYPE|sysdate|systimestamp|\.nextval|connect by|AUTHID (DEFINER|CURRENT_USER)`),
|
||||
substring.BytesRegexp(`(?i)constructor\W+function`),
|
||||
)
|
||||
schemeMatcher = substring.BytesRegexp(`(?m)\A(^\s*;;.*$)*\s*\(`)
|
||||
asc_PublicKey_Matcher_0 = regexp.MustCompile(`(?m)^(----[- ]BEGIN|ssh-(rsa|dss)) `)
|
||||
asc_AsciiDoc_Matcher_0 = regexp.MustCompile(`(?m)^[=-]+(\s|\n)|{{[A-Za-z]`)
|
||||
asc_AGSScript_Matcher_0 = regexp.MustCompile(`(?m)^(\/\/.+|((import|export)\s+)?(function|int|float|char)\s+((room|repeatedly|on|game)_)?([A-Za-z]+[A-Za-z_0-9]+)\s*[;\(])`)
|
||||
bb_BlitzBasic_Matcher_0 = regexp.MustCompile(`(?m)^\s*; `)
|
||||
bb_BlitzBasic_Matcher_1 = regexp.MustCompile(`(?m)End Function`)
|
||||
bb_BitBake_Matcher_0 = regexp.MustCompile(`(?m)^\s*(# |include|require)\b`)
|
||||
builds_XML_Matcher_0 = regexp.MustCompile(`(?mi)^(\s*)(<Project|<Import|<Property|<?xml|xmlns)`)
|
||||
ch_xBase_Matcher_0 = regexp.MustCompile(`(?mi)^\s*#\s*(if|ifdef|ifndef|define|command|xcommand|translate|xtranslate|include|pragma|undef)\b`)
|
||||
cl_CommonLisp_Matcher_0 = regexp.MustCompile(`(?mi)^\s*\((defun|in-package|defpackage) `)
|
||||
cl_Cool_Matcher_0 = regexp.MustCompile(`(?m)^class`)
|
||||
cl_OpenCL_Matcher_0 = regexp.MustCompile(`(?m)\/\* |\/\/ |^\}`)
|
||||
cls_TeX_Matcher_0 = regexp.MustCompile(`(?m)\\\w+{`)
|
||||
cs_Smalltalk_Matcher_0 = regexp.MustCompile(`(?m)![\w\s]+methodsFor: `)
|
||||
cs_CSharp_Matcher_0 = regexp.MustCompile(`(?m)^\s*namespace\s*[\w\.]+\s*{`)
|
||||
cs_CSharp_Matcher_1 = regexp.MustCompile(`(?m)^\s*\/\/`)
|
||||
d_D_Matcher_0 = regexp.MustCompile(`(?m)^module\s+[\w.]*\s*;|import\s+[\w\s,.:]*;|\w+\s+\w+\s*\(.*\)(?:\(.*\))?\s*{[^}]*}|unittest\s*(?:\(.*\))?\s*{[^}]*}`)
|
||||
d_DTrace_Matcher_0 = regexp.MustCompile(`(?m)^(\w+:\w*:\w*:\w*|BEGIN|END|provider\s+|(tick|profile)-\w+\s+{[^}]*}|#pragma\s+D\s+(option|attributes|depends_on)\s|#pragma\s+ident\s)`)
|
||||
d_Makefile_Matcher_0 = regexp.MustCompile(`(?m)([\/\\].*:\s+.*\s\\$|: \\$|^ : |^[\w\s\/\\.]+\w+\.\w+\s*:\s+[\w\s\/\\.]+\w+\.\w+)`)
|
||||
ecl_ECLiPSe_Matcher_0 = regexp.MustCompile(`(?m)^[^#]+:-`)
|
||||
ecl_ECL_Matcher_0 = regexp.MustCompile(`(?m):=`)
|
||||
es_Erlang_Matcher_0 = regexp.MustCompile(`(?m)^\s*(?:%%|main\s*\(.*?\)\s*->)`)
|
||||
f_Forth_Matcher_0 = regexp.MustCompile(`(?m)^: `)
|
||||
f_FilebenchWML_Matcher_0 = regexp.MustCompile(`(?m)flowop`)
|
||||
f_FORTRAN_Matcher_0 = regexp.MustCompile(`(?mi)^([c*][^abd-z]| (subroutine|program|end|data)\s|\s*!)`)
|
||||
for_Forth_Matcher_0 = regexp.MustCompile(`(?m)^: `)
|
||||
for_FORTRAN_Matcher_0 = regexp.MustCompile(`(?mi)^([c*][^abd-z]| (subroutine|program|end|data)\s|\s*!)`)
|
||||
fr_Forth_Matcher_0 = regexp.MustCompile(`(?m)^(: |also |new-device|previous )`)
|
||||
fr_Frege_Matcher_0 = regexp.MustCompile(`(?m)^\s*(import|module|package|data|type) `)
|
||||
fs_Forth_Matcher_0 = regexp.MustCompile(`(?m)^(: |new-device)`)
|
||||
fs_FSharp_Matcher_0 = regexp.MustCompile(`(?m)^\s*(#light|import|let|module|namespace|open|type)`)
|
||||
fs_GLSL_Matcher_0 = regexp.MustCompile(`(?m)^\s*(#version|precision|uniform|varying|vec[234])`)
|
||||
fs_Filterscript_Matcher_0 = regexp.MustCompile(`(?m)#include|#pragma\s+(rs|version)|__attribute__`)
|
||||
gs_Gosu_Matcher_0 = regexp.MustCompile(`(?m)^uses java\.`)
|
||||
h_ObjectiveDashC_Matcher_0 = regexp.MustCompile(`(?m)^\s*(@(interface|class|protocol|property|end|synchronised|selector|implementation)\b|#import\s+.+\.h[">])`)
|
||||
h_CPlusPlus_Matcher_0 = regexp.MustCompile(`(?m)^\s*#\s*include <(cstdint|string|vector|map|list|array|bitset|queue|stack|forward_list|unordered_map|unordered_set|(i|o|io)stream)>`)
|
||||
h_CPlusPlus_Matcher_1 = regexp.MustCompile(`(?m)^\s*template\s*<`)
|
||||
h_CPlusPlus_Matcher_2 = regexp.MustCompile(`(?m)^[ \t]*try`)
|
||||
h_CPlusPlus_Matcher_3 = regexp.MustCompile(`(?m)^[ \t]*catch\s*\(`)
|
||||
h_CPlusPlus_Matcher_4 = regexp.MustCompile(`(?m)^[ \t]*(class|(using[ \t]+)?namespace)\s+\w+`)
|
||||
h_CPlusPlus_Matcher_5 = regexp.MustCompile(`(?m)^[ \t]*(private|public|protected):$`)
|
||||
h_CPlusPlus_Matcher_6 = regexp.MustCompile(`(?m)std::\w+`)
|
||||
inc_PHP_Matcher_0 = regexp.MustCompile(`(?m)^<\?(?:php)?`)
|
||||
inc_POVDashRaySDL_Matcher_0 = regexp.MustCompile(`(?m)^\s*#(declare|local|macro|while)\s`)
|
||||
l_CommonLisp_Matcher_0 = regexp.MustCompile(`(?m)\(def(un|macro)\s`)
|
||||
l_Lex_Matcher_0 = regexp.MustCompile(`(?m)^(%[%{}]xs|<.*>)`)
|
||||
l_Groff_Matcher_0 = regexp.MustCompile(`(?mi)^\.[a-z][a-z](\s|$)`)
|
||||
l_PicoLisp_Matcher_0 = regexp.MustCompile(`(?m)^\((de|class|rel|code|data|must)\s`)
|
||||
ls_LoomScript_Matcher_0 = regexp.MustCompile(`(?m)^\s*package\s*[\w\.\/\*\s]*\s*{`)
|
||||
lsp_CommonLisp_Matcher_0 = regexp.MustCompile(`(?mi)^\s*\((defun|in-package|defpackage) `)
|
||||
lsp_NewLisp_Matcher_0 = regexp.MustCompile(`(?m)^\s*\(define `)
|
||||
lisp_CommonLisp_Matcher_0 = regexp.MustCompile(`(?mi)^\s*\((defun|in-package|defpackage) `)
|
||||
lisp_NewLisp_Matcher_0 = regexp.MustCompile(`(?m)^\s*\(define `)
|
||||
m_ObjectiveDashC_Matcher_0 = regexp.MustCompile(`(?m)^\s*(@(interface|class|protocol|property|end|synchronised|selector|implementation)\b|#import\s+.+\.h[">])`)
|
||||
m_Mercury_Matcher_0 = regexp.MustCompile(`(?m):- module`)
|
||||
m_MUF_Matcher_0 = regexp.MustCompile(`(?m)^: `)
|
||||
m_M_Matcher_0 = regexp.MustCompile(`(?m)^\s*;`)
|
||||
m_Mathematica_Matcher_0 = regexp.MustCompile(`(?m)\*\)$`)
|
||||
m_Matlab_Matcher_0 = regexp.MustCompile(`(?m)^\s*%`)
|
||||
m_Limbo_Matcher_0 = regexp.MustCompile(`(?m)^\w+\s*:\s*module\s*{`)
|
||||
md_Markdown_Matcher_0 = regexp.MustCompile(`(?mi)(^[-a-z0-9=#!\*\[|>])|<\/`)
|
||||
md_Markdown_Matcher_1 = regexp.MustCompile(`(?m)^$`)
|
||||
md_GCCmachinedescription_Matcher_0 = regexp.MustCompile(`(?m)^(;;|\(define_)`)
|
||||
ml_OCaml_Matcher_0 = regexp.MustCompile(`(?m)(^\s*module)|let rec |match\s+(\S+\s)+with`)
|
||||
ml_StandardML_Matcher_0 = regexp.MustCompile(`(?m)=> |case\s+(\S+\s)+of`)
|
||||
mod_XML_Matcher_0 = regexp.MustCompile(`(?m)<!ENTITY `)
|
||||
mod_ModulaDash2_Matcher_0 = regexp.MustCompile(`(?mi)^\s*MODULE [\w\.]+;`)
|
||||
mod_ModulaDash2_Matcher_1 = regexp.MustCompile(`(?mi)^\s*END [\w\.]+;`)
|
||||
ms_Groff_Matcher_0 = regexp.MustCompile(`(?mi)^[.'][a-z][a-z](\s|$)`)
|
||||
n_Groff_Matcher_0 = regexp.MustCompile(`(?m)^[.']`)
|
||||
n_Nemerle_Matcher_0 = regexp.MustCompile(`(?m)^(module|namespace|using)\s`)
|
||||
ncl_Text_Matcher_0 = regexp.MustCompile(`(?m)THE_TITLE`)
|
||||
nl_NL_Matcher_0 = regexp.MustCompile(`(?m)^(b|g)[0-9]+ `)
|
||||
php_Hack_Matcher_0 = regexp.MustCompile(`(?m)<\?hh`)
|
||||
php_PHP_Matcher_0 = regexp.MustCompile(`(?m)<?[^h]`)
|
||||
pl_Prolog_Matcher_0 = regexp.MustCompile(`(?m)^[^#]*:-`)
|
||||
pl_Perl_Matcher_0 = regexp.MustCompile(`(?m)use strict|use\s+v?5\.`)
|
||||
pl_Perl6_Matcher_0 = regexp.MustCompile(`(?m)^(use v6|(my )?class|module)`)
|
||||
pm_Perl_Matcher_0 = regexp.MustCompile(`(?m)use strict|use\s+v?5\.`)
|
||||
pm_Perl6_Matcher_0 = regexp.MustCompile(`(?m)^(use v6|(my )?class|module)`)
|
||||
t_Perl_Matcher_0 = regexp.MustCompile(`(?m)use strict|use\s+v?5\.`)
|
||||
t_Perl6_Matcher_0 = regexp.MustCompile(`(?m)^(use v6|(my )?class|module)`)
|
||||
pod_Pod_Matcher_0 = regexp.MustCompile(`(?m)^=\w+\b`)
|
||||
pro_Prolog_Matcher_0 = regexp.MustCompile(`(?m)^[^#]+:-`)
|
||||
pro_INI_Matcher_0 = regexp.MustCompile(`(?m)last_client=`)
|
||||
pro_QMake_Matcher_0 = regexp.MustCompile(`(?m)HEADERS`)
|
||||
pro_QMake_Matcher_1 = regexp.MustCompile(`(?m)SOURCES`)
|
||||
pro_IDL_Matcher_0 = regexp.MustCompile(`(?m)^\s*function[ \w,]+$`)
|
||||
props_XML_Matcher_0 = regexp.MustCompile(`(?mi)^(\s*)(<Project|<Import|<Property|<?xml|xmlns)`)
|
||||
props_INI_Matcher_0 = regexp.MustCompile(`(?mi)\w+\s*=\s*`)
|
||||
r_Rebol_Matcher_0 = regexp.MustCompile(`(?mi)\bRebol\b`)
|
||||
r_R_Matcher_0 = regexp.MustCompile(`(?m)<-|^\s*#`)
|
||||
rno_RUNOFF_Matcher_0 = regexp.MustCompile(`(?mi)^\.!|^\.end lit(?:eral)?\b`)
|
||||
rno_Groff_Matcher_0 = regexp.MustCompile(`(?m)^\.\\" `)
|
||||
rpy_Python_Matcher_0 = regexp.MustCompile(`(?ms)(^(import|from|class|def)\s)`)
|
||||
rs_Rust_Matcher_0 = regexp.MustCompile(`(?m)^(use |fn |mod |pub |macro_rules|impl|#!?\[)`)
|
||||
rs_RenderScript_Matcher_0 = regexp.MustCompile(`(?m)#include|#pragma\s+(rs|version)|__attribute__`)
|
||||
sc_SuperCollider_Matcher_0 = regexp.MustCompile(`(?m)\^(this|super)\.`)
|
||||
sc_SuperCollider_Matcher_1 = regexp.MustCompile(`(?m)^\s*(\+|\*)\s*\w+\s*{`)
|
||||
sc_SuperCollider_Matcher_2 = regexp.MustCompile(`(?m)^\s*~\w+\s*=\.`)
|
||||
sc_Scala_Matcher_0 = regexp.MustCompile(`(?m)^\s*import (scala|java)\.`)
|
||||
sc_Scala_Matcher_1 = regexp.MustCompile(`(?m)^\s*val\s+\w+\s*=`)
|
||||
sc_Scala_Matcher_2 = regexp.MustCompile(`(?m)^\s*class\b`)
|
||||
sql_PLpgSQL_Matcher_0 = regexp.MustCompile(`(?mi)^\\i\b|AS \$\$|LANGUAGE '?plpgsql'?`)
|
||||
sql_PLpgSQL_Matcher_1 = regexp.MustCompile(`(?mi)SECURITY (DEFINER|INVOKER)`)
|
||||
sql_PLpgSQL_Matcher_2 = regexp.MustCompile(`(?mi)BEGIN( WORK| TRANSACTION)?;`)
|
||||
sql_SQLPL_Matcher_0 = regexp.MustCompile(`(?mi)(alter module)|(language sql)|(begin( NOT)+ atomic)`)
|
||||
sql_SQLPL_Matcher_1 = regexp.MustCompile(`(?mi)signal SQLSTATE '[0-9]+'`)
|
||||
sql_PLSQL_Matcher_0 = regexp.MustCompile(`(?mi)\$\$PLSQL_|XMLTYPE|sysdate|systimestamp|\.nextval|connect by|AUTHID (DEFINER|CURRENT_USER)`)
|
||||
sql_PLSQL_Matcher_1 = regexp.MustCompile(`(?mi)constructor\W+function`)
|
||||
sql_SQL_Matcher_0 = regexp.MustCompile(`(?mi)! /begin|boolean|package|exception`)
|
||||
srt_SubRipText_Matcher_0 = regexp.MustCompile(`(?m)^(\d{2}:\d{2}:\d{2},\d{3})\s*(-->)\s*(\d{2}:\d{2}:\d{2},\d{3})$`)
|
||||
toc_WorldofWarcraftAddonData_Matcher_0 = regexp.MustCompile(`(?m)^## |@no-lib-strip@`)
|
||||
toc_TeX_Matcher_0 = regexp.MustCompile(`(?m)^\\(contentsline|defcounter|beamer|boolfalse)`)
|
||||
ts_XML_Matcher_0 = regexp.MustCompile(`(?m)<TS`)
|
||||
tst_GAP_Matcher_0 = regexp.MustCompile(`(?m)gap> `)
|
||||
tsx_TypeScript_Matcher_0 = regexp.MustCompile(`(?m)^\s*(import.+(from\s+|require\()['"]react|\/\/\/\s*<reference\s)`)
|
||||
tsx_XML_Matcher_0 = regexp.MustCompile(`(?mi)^\s*<\?xml\s+version`)
|
||||
)
|
||||
|
@ -11,8 +11,6 @@ import (
|
||||
)
|
||||
|
||||
func (s *TSuite) TestGetLanguageByContentLinguistCorpus(c *C) {
|
||||
c.Skip("report")
|
||||
|
||||
var total, failed, ok, other, unsafe int
|
||||
|
||||
w := new(tabwriter.Writer)
|
||||
|
@ -20,16 +20,30 @@ const (
|
||||
languagesTestTmplPath = "test_files/languages.test.tmpl"
|
||||
languagesTestTmplName = "languages.test.tmpl"
|
||||
commitLangTest = "fe8b44ab8a225b1ffa75b983b916ea22fee5b6f7"
|
||||
|
||||
// Heuristics test
|
||||
heuristicsTestFile = "test_files/heuristics.test.rb"
|
||||
contentGold = "test_files/content.gold"
|
||||
contentTestTmplPath = "test_files/content.test.go.tmpl"
|
||||
contentTestTmplName = "content.test.go.tmpl"
|
||||
commitHeuristicsTest = "fe8b44ab8a225b1ffa75b983b916ea22fee5b6f7"
|
||||
)
|
||||
|
||||
func TestFromFile(t *testing.T) {
|
||||
goldLang, err := ioutil.ReadFile(formatedLangGold)
|
||||
assert.NoError(t, err)
|
||||
|
||||
goldContent, err := ioutil.ReadFile(formatedContentGold)
|
||||
assert.NoError(t, err)
|
||||
|
||||
outPathLang, err := ioutil.TempFile("/tmp", "generator-test-")
|
||||
assert.NoError(t, err)
|
||||
defer os.Remove(outPathLang.Name())
|
||||
|
||||
outPathContent, err := ioutil.TempFile("/tmp", "generator-test-")
|
||||
assert.NoError(t, err)
|
||||
defer os.Remove(outPathContent.Name())
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
fileToParse string
|
||||
@ -50,6 +64,16 @@ func TestFromFile(t *testing.T) {
|
||||
generate: Languages,
|
||||
wantOut: goldLang,
|
||||
},
|
||||
{
|
||||
name: "TestFromFile_Heuristics",
|
||||
fileToParse: heuristicsTestFile,
|
||||
outPath: outPathContent.Name(),
|
||||
tmplPath: contentTestTmplPath,
|
||||
tmplName: contentTestTmplName,
|
||||
commit: commitHeuristicsTest,
|
||||
generate: Heuristics,
|
||||
wantOut: goldContent,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
@ -96,3 +120,37 @@ func TestLanguages(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestHeuristics(t *testing.T) {
|
||||
gold, err := ioutil.ReadFile(contentGold)
|
||||
assert.NoError(t, err)
|
||||
|
||||
input, err := ioutil.ReadFile(heuristicsTestFile)
|
||||
assert.NoError(t, err)
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
input []byte
|
||||
tmplPath string
|
||||
tmplName string
|
||||
commit string
|
||||
wantOut []byte
|
||||
}{
|
||||
{
|
||||
name: "TestHeuristics",
|
||||
input: input,
|
||||
tmplPath: contentTestTmplPath,
|
||||
tmplName: contentTestTmplName,
|
||||
commit: commitHeuristicsTest,
|
||||
wantOut: gold,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
out, err := Heuristics(tt.input, tt.tmplPath, tt.tmplName, tt.commit)
|
||||
assert.NoError(t, err)
|
||||
assert.EqualValues(t, tt.wantOut, out, fmt.Sprintf("Heuristics() = %v, want %v", string(out), string(tt.wantOut)))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
@ -30,6 +30,10 @@ func main() {
|
||||
if err := generator.FromFile(languagesYAML, langFile, languagesTmplPath, languagesTmpl, commit, generator.Languages); err != nil {
|
||||
log.Println(err)
|
||||
}
|
||||
|
||||
if err := generator.FromFile(heuristicsRuby, contentFile, contentTmplPath, contentTmpl, commit, generator.Heuristics); err != nil {
|
||||
log.Println(err)
|
||||
}
|
||||
}
|
||||
|
||||
func getCommit(path string) (string, error) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user