content generator

This commit is contained in:
Manuel Carmona 2017-04-05 16:01:31 +02:00
parent f375b0df5e
commit ba22a0a243
10 changed files with 1269 additions and 321 deletions

View File

@ -0,0 +1,51 @@
package slinguist
// CODE GENERATED AUTOMATICALLY WITH github.com/src-d/simple-linguist/cli/slinguist-generate
// THIS FILE SHOULD NOT BE EDITED BY HAND
// Extracted from github/linguist commit: {{ getCommit }}
import (
"path/filepath"
"regexp"
"strings"
)
func GetLanguageByContent(filename string, content []byte) (lang string, safe bool) {
ext := strings.ToLower(filepath.Ext(filename))
if fnMatcher, ok := matchers[ext]; ok {
lang, safe = fnMatcher(content)
return
}
return GetLanguageByExtension(filename)
}
type languageMatcher func ([]byte) (string, bool)
var matchers = map[string]languageMatcher{
{{ range $index, $disambiguator := . -}}
{{ printf "%q" $disambiguator.Extension }}: func(i []byte) (string, bool) {
{{ range $i, $language := $disambiguator.Languages -}}
{{- if not (avoidLanguage $language) }}
{{- if gt (len $language.Heuristics) 0 }}
{{- if gt $i 0 }} else {{ end -}}
if {{- range $j, $heuristic := $language.Heuristics }} {{ $heuristic.Name }}.Match(i)
{{- if lt $j (len $language.LogicRelations) }} {{index $language.LogicRelations $j}} {{- end -}} {{ end }} {
return {{ printf "%q" $language.Language }}, true
}
{{- end -}}
{{- end -}}
{{- end}}
return {{ returnLanguage $disambiguator.Languages }}, {{ safeLanguage $disambiguator.Languages }}
},
{{ end -}}
}
var (
{{ range $index, $heuristic := getAllHeuristics . -}}
{{ $heuristic.Name }} = regexp.MustCompile(`{{ $heuristic.Regexp }}`)
{{ end -}}
)

View File

@ -0,0 +1,488 @@
package generator
import (
"bufio"
"bytes"
"fmt"
"io"
"regexp"
"strconv"
"strings"
"text/template"
)
// Heuristics reads from buf and builds content.go file from contentTmplPath.
func Heuristics(heuristics []byte, contentTmplPath, contentTmplName, commit string) ([]byte, error) {
disambiguators, err := getDisambiguators(heuristics)
if err != nil {
return nil, err
}
buf := &bytes.Buffer{}
if err := executeContentTemplate(buf, disambiguators, contentTmplPath, contentTmplName, commit); err != nil {
return nil, err
}
return buf.Bytes(), nil
}
const unknownLanguage = "OtherLanguage"
var (
disambLine = regexp.MustCompile(`^(\s*)disambiguate`)
definedRegs = make(map[string]string)
illegalCharacter = map[string]string{
"#": "Sharp",
"+": "Plus",
"-": "Dash",
}
)
type disambiguator struct {
Extension string `json:"extension,omitempty"`
Languages []*languageHeuristics `json:"languages,omitempty"`
}
func (d *disambiguator) setHeuristicsNames() {
for _, lang := range d.Languages {
for i, heuristic := range lang.Heuristics {
name := buildName(d.Extension, lang.Language, i)
heuristic.Name = name
}
}
}
func buildName(extension, language string, id int) string {
extension = strings.TrimPrefix(extension, `.`)
language = strings.Join(strings.Fields(language), ``)
name := strings.Join([]string{extension, language, "Matcher", strconv.Itoa(id)}, `_`)
for k, v := range illegalCharacter {
if strings.Contains(name, k) {
name = strings.Replace(name, k, v, -1)
}
}
return name
}
type languageHeuristics struct {
Language string `json:"language,omitempty"`
Heuristics []*heuristic `json:"heuristics,omitempty"`
LogicRelations []string `json:"logic_relations,omitempty"`
}
func (l *languageHeuristics) clone() (*languageHeuristics, error) {
language := l.Language
logicRels := make([]string, len(l.LogicRelations))
if copy(logicRels, l.LogicRelations) != len(l.LogicRelations) {
return nil, fmt.Errorf("error copying logic relations")
}
heuristics := make([]*heuristic, 0, len(l.Heuristics))
for _, h := range l.Heuristics {
heuristic := *h
heuristics = append(heuristics, &heuristic)
}
clone := &languageHeuristics{
Language: language,
Heuristics: heuristics,
LogicRelations: logicRels,
}
return clone, nil
}
type heuristic struct {
Name string `json:"name,omitempty"`
Regexp string `json:"regexp,omitempty"`
}
// A disambiguate block looks like:
// disambiguate ".mod", ".extension" do |data|
// if data.include?('<!ENTITY ') && data.include?('patata')
// Language["XML"]
// elsif /^\s*MODULE [\w\.]+;/i.match(data) || /^\s*END [\w\.]+;/i.match(data) || data.empty?
// Language["Modula-2"]
// elsif (/^\s*import (scala|java)\./.match(data) || /^\s*val\s+\w+\s*=/.match(data) || /^\s*class\b/.match(data))
// Language["Scala"]
// elsif (data.include?("gap> "))
// Language["GAP"]
// else
// [Language["Linux Kernel Module"], Language["AMPL"]]
// end
// end
func getDisambiguators(heuristics []byte) ([]*disambiguator, error) {
seenExtensions := map[string]bool{}
buf := bufio.NewScanner(bytes.NewReader(heuristics))
disambiguators := make([]*disambiguator, 0, 50)
for buf.Scan() {
line := buf.Text()
if disambLine.MatchString(line) {
d, err := parseDisambiguators(line, buf, seenExtensions)
if err != nil {
return nil, err
}
disambiguators = append(disambiguators, d...)
}
lookForRegexpVariables(line)
}
if err := buf.Err(); err != nil {
return nil, err
}
return disambiguators, nil
}
func lookForRegexpVariables(line string) {
if strings.Contains(line, "ObjectiveCRegex = ") {
line = strings.TrimSpace(line)
reg := strings.TrimPrefix(line, "ObjectiveCRegex = ")
definedRegs["ObjectiveCRegex"] = reg
}
if strings.Contains(line, "fortran_rx = ") {
line = strings.TrimSpace(line)
reg := strings.TrimPrefix(line, "fortran_rx = ")
definedRegs["fortran_rx"] = reg
}
}
func parseDisambiguators(line string, buf *bufio.Scanner, seenExtensions map[string]bool) ([]*disambiguator, error) {
disambList := make([]*disambiguator, 0, 2)
splitted := strings.Fields(line)
for _, v := range splitted {
if strings.HasPrefix(v, `"`) {
extension := strings.Trim(v, `",`)
if _, ok := seenExtensions[extension]; !ok {
d := &disambiguator{Extension: extension}
disambList = append(disambList, d)
seenExtensions[extension] = true
}
}
}
langsHeuristics, err := getLanguagesHeuristics(buf)
if err != nil {
return nil, err
}
for i, disamb := range disambList {
lh := langsHeuristics
if i != 0 {
lh = cloneLanguagesHeuristics(langsHeuristics)
}
disamb.Languages = lh
disamb.setHeuristicsNames()
}
return disambList, nil
}
func cloneLanguagesHeuristics(list []*languageHeuristics) []*languageHeuristics {
cloneList := make([]*languageHeuristics, 0, len(list))
for _, langHeu := range list {
clone, _ := langHeu.clone()
cloneList = append(cloneList, clone)
}
return cloneList
}
func getLanguagesHeuristics(buf *bufio.Scanner) ([]*languageHeuristics, error) {
langsList := make([][]string, 0, 2)
heuristicsList := make([][]*heuristic, 0, 1)
logicRelsList := make([][]string, 0, 1)
lastWasMatch := false
for buf.Scan() {
line := buf.Text()
if strings.TrimSpace(line) == "end" {
break
}
if hasRegExp(line) {
line := cleanRegExpLine(line)
logicRels := getLogicRelations(line)
heuristics := getHeuristics(line)
if lastWasMatch {
i := len(heuristicsList) - 1
heuristicsList[i] = append(heuristicsList[i], heuristics...)
i = len(logicRelsList) - 1
logicRelsList[i] = append(logicRelsList[i], logicRels...)
} else {
heuristicsList = append(heuristicsList, heuristics)
logicRelsList = append(logicRelsList, logicRels)
}
lastWasMatch = true
}
if strings.Contains(line, "Language") {
langs := getLanguages(line)
langsList = append(langsList, langs)
lastWasMatch = false
}
}
if err := buf.Err(); err != nil {
return nil, err
}
langsHeuristics := buildLanguagesHeuristics(langsList, heuristicsList, logicRelsList)
return langsHeuristics, nil
}
func hasRegExp(line string) bool {
return strings.Contains(line, ".match") || strings.Contains(line, ".include?") || strings.Contains(line, ".empty?")
}
func cleanRegExpLine(line string) string {
if strings.Contains(line, "if ") {
line = line[strings.Index(line, `if `)+3:]
}
line = strings.TrimSpace(line)
line = strings.TrimPrefix(line, `(`)
if strings.Contains(line, "))") {
line = strings.TrimSuffix(line, `)`)
}
return line
}
func getLogicRelations(line string) []string {
rels := make([]string, 0)
splitted := strings.Split(line, "||")
for i, v := range splitted {
if strings.Contains(v, "&&") {
rels = append(rels, "&&")
}
if i < len(splitted)-1 {
rels = append(rels, "||")
}
}
if len(rels) == 0 {
rels = nil
}
return rels
}
func getHeuristics(line string) []*heuristic {
splitted := splitByLogicOps(line)
heuristics := make([]*heuristic, 0, len(splitted))
for _, v := range splitted {
v = strings.TrimSpace(v)
var reg string
if strings.Contains(v, ".match") {
reg = v[:strings.Index(v, ".match")]
reg = replaceRegexpVariables(reg)
}
if strings.Contains(v, ".include?") {
reg = includeToRegExp(v)
}
if strings.Contains(v, ".empty?") {
reg = `^$`
}
if reg != "" {
reg = convToValidRegexp(reg)
heuristics = append(heuristics, &heuristic{Regexp: reg})
}
}
return heuristics
}
func splitByLogicOps(line string) []string {
splitted := make([]string, 0, 1)
splitOr := strings.Split(line, "||")
for _, v := range splitOr {
splitAnd := strings.Split(v, "&&")
splitted = append(splitted, splitAnd...)
}
return splitted
}
func replaceRegexpVariables(reg string) string {
repl := reg
if v, ok := definedRegs[reg]; ok {
repl = v
}
return repl
}
func convToValidRegexp(reg string) string {
// example: `/^(\s*)(<Project|<Import|<Property|<?xml|xmlns)/i``
// Ruby modifier "m" matches multiple lines, recognizing newlines as normal characters, Go use flag "s" for that.
const (
caseSensitive = "i"
matchEOL = "s"
rubyCaseSensitive = "i"
rubyMultiLine = "m"
)
reg = strings.TrimPrefix(reg, `/`)
flags := "(?m"
lastSlash := strings.LastIndex(reg, `/`)
if lastSlash == -1 {
return flags + ")" + reg
}
specialChars := reg[lastSlash:]
reg = reg[:lastSlash]
if lastSlash == len(reg)-1 {
return flags + ")" + reg
}
if strings.Contains(specialChars, rubyCaseSensitive) {
flags = flags + caseSensitive
}
if strings.Contains(specialChars, rubyMultiLine) {
flags = flags + matchEOL
}
return flags + ")" + reg
}
func includeToRegExp(include string) string {
content := include[strings.Index(include, `(`)+1 : strings.Index(include, `)`)]
content = strings.Trim(content, `"'`)
return regexp.QuoteMeta(content)
}
func getLanguages(line string) []string {
languages := make([]string, 0)
splitted := strings.Split(line, `,`)
for _, lang := range splitted {
lang = trimLanguage(lang)
languages = append(languages, lang)
}
return languages
}
func trimLanguage(enclosedLang string) string {
lang := strings.TrimSpace(enclosedLang)
lang = lang[strings.Index(lang, `"`)+1:]
lang = lang[:strings.Index(lang, `"`)]
return lang
}
func buildLanguagesHeuristics(langsList [][]string, heuristicsList [][]*heuristic, logicRelsList [][]string) []*languageHeuristics {
langsHeuristics := make([]*languageHeuristics, 0, len(langsList))
for i, langSlice := range langsList {
var heuristics []*heuristic
if i < len(heuristicsList) {
heuristics = heuristicsList[i]
}
var rels []string
if i < len(logicRelsList) {
rels = logicRelsList[i]
}
for _, lang := range langSlice {
lh := &languageHeuristics{
Language: lang,
Heuristics: heuristics,
LogicRelations: rels,
}
langsHeuristics = append(langsHeuristics, lh)
}
}
return langsHeuristics
}
func executeContentTemplate(out io.Writer, disambiguators []*disambiguator, contentTmplPath, contentTmpl, commit string) error {
fmap := template.FuncMap{
"getCommit": func() string { return commit },
"getAllHeuristics": getAllHeuristics,
"returnLanguage": returnLanguage,
"safeLanguage": safeLanguage,
"avoidLanguage": avoidLanguage,
}
t := template.Must(template.New(contentTmpl).Funcs(fmap).ParseFiles(contentTmplPath))
if err := t.Execute(out, disambiguators); err != nil {
return err
}
return nil
}
func getAllHeuristics(disambiguators []*disambiguator) []*heuristic {
heuristics := make([]*heuristic, 0)
for _, disamb := range disambiguators {
for _, lang := range disamb.Languages {
if !avoidLanguage(lang) {
heuristics = append(heuristics, lang.Heuristics...)
}
}
}
return heuristics
}
func avoidLanguage(lang *languageHeuristics) bool {
// necessary to avoid corner cases
for _, heuristic := range lang.Heuristics {
if containsInvalidRegexp(heuristic.Regexp) {
return true
}
}
return false
}
func containsInvalidRegexp(reg string) bool {
return strings.Contains(reg, `(?<`) || strings.Contains(reg, `\1`)
}
func returnLanguage(langsHeuristics []*languageHeuristics) string {
lang, _ := returnLangAndSafe(langsHeuristics)
return lang
}
func safeLanguage(langsHeuristics []*languageHeuristics) bool {
_, safe := returnLangAndSafe(langsHeuristics)
return safe
}
func returnLangAndSafe(langsHeuristics []*languageHeuristics) (string, bool) {
// at the moment, only returns one string although might be exists several language to return as a []string.
langs := make([]string, 0)
for _, langHeu := range langsHeuristics {
if len(langHeu.Heuristics) == 0 {
langs = append(langs, `"`+langHeu.Language+`"`)
}
}
lang := unknownLanguage
safe := false
if len(langs) != 0 {
lang = langs[0]
safe = len(langs) == 1
}
return lang, safe
}

View File

@ -0,0 +1,81 @@
package slinguist
// CODE GENERATED AUTOMATICALLY WITH github.com/src-d/simple-linguist/cli/slinguist-generate
// THIS FILE SHOULD NOT BE EDITED BY HAND
// Extracted from github/linguist commit: fe8b44ab8a225b1ffa75b983b916ea22fee5b6f7
import (
"path/filepath"
"regexp"
"strings"
)
func GetLanguageByContent(filename string, content []byte) (lang string, safe bool) {
ext := strings.ToLower(filepath.Ext(filename))
if fnMatcher, ok := matchers[ext]; ok {
lang, safe = fnMatcher(content)
return
}
return GetLanguageByExtension(filename)
}
type languageMatcher func ([]byte) (string, bool)
var matchers = map[string]languageMatcher{
".asc": func(i []byte) (string, bool) {
if asc_PublicKey_Matcher_0.Match(i) {
return "Public Key", true
} else if asc_AsciiDoc_Matcher_0.Match(i) {
return "AsciiDoc", true
} else if asc_AGSScript_Matcher_0.Match(i) {
return "AGS Script", true
}
return OtherLanguage, false
},
".ms": func(i []byte) (string, bool) {
if ms_Groff_Matcher_0.Match(i) {
return "Groff", true
}
return "MAXScript", true
},
".mod": func(i []byte) (string, bool) {
if mod_XML_Matcher_0.Match(i) {
return "XML", true
} else if mod_ModulaDash2_Matcher_0.Match(i) || mod_ModulaDash2_Matcher_1.Match(i) {
return "Modula-2", true
}
return "Linux Kernel Module", false
},
".pro": func(i []byte) (string, bool) {
if pro_Prolog_Matcher_0.Match(i) {
return "Prolog", true
} else if pro_INI_Matcher_0.Match(i) {
return "INI", true
} else if pro_QMake_Matcher_0.Match(i) && pro_QMake_Matcher_1.Match(i) {
return "QMake", true
} else if pro_IDL_Matcher_0.Match(i) {
return "IDL", true
}
return OtherLanguage, false
},
}
var (
asc_PublicKey_Matcher_0 = regexp.MustCompile(`(?m)^(----[- ]BEGIN|ssh-(rsa|dss)) `)
asc_AsciiDoc_Matcher_0 = regexp.MustCompile(`(?m)^[=-]+(\s|\n)|{{[A-Za-z]`)
asc_AGSScript_Matcher_0 = regexp.MustCompile(`(?m)^(\/\/.+|((import|export)\s+)?(function|int|float|char)\s+((room|repeatedly|on|game)_)?([A-Za-z]+[A-Za-z_0-9]+)\s*[;\(])`)
ms_Groff_Matcher_0 = regexp.MustCompile(`(?mi)^[.'][a-z][a-z](\s|$)`)
mod_XML_Matcher_0 = regexp.MustCompile(`(?m)<!ENTITY `)
mod_ModulaDash2_Matcher_0 = regexp.MustCompile(`(?mi)^\s*MODULE [\w\.]+;`)
mod_ModulaDash2_Matcher_1 = regexp.MustCompile(`(?mi)^\s*END [\w\.]+;`)
pro_Prolog_Matcher_0 = regexp.MustCompile(`(?m)^[^#]+:-`)
pro_INI_Matcher_0 = regexp.MustCompile(`(?m)last_client=`)
pro_QMake_Matcher_0 = regexp.MustCompile(`(?m)HEADERS`)
pro_QMake_Matcher_1 = regexp.MustCompile(`(?m)SOURCES`)
pro_IDL_Matcher_0 = regexp.MustCompile(`(?m)^\s*function[ \w,]+$`)
)

View File

@ -0,0 +1,51 @@
package slinguist
// CODE GENERATED AUTOMATICALLY WITH github.com/src-d/simple-linguist/cli/slinguist-generate
// THIS FILE SHOULD NOT BE EDITED BY HAND
// Extracted from github/linguist commit: {{ getCommit }}
import (
"path/filepath"
"regexp"
"strings"
)
func GetLanguageByContent(filename string, content []byte) (lang string, safe bool) {
ext := strings.ToLower(filepath.Ext(filename))
if fnMatcher, ok := matchers[ext]; ok {
lang, safe = fnMatcher(content)
return
}
return GetLanguageByExtension(filename)
}
type languageMatcher func ([]byte) (string, bool)
var matchers = map[string]languageMatcher{
{{ range $index, $disambiguator := . -}}
{{ printf "%q" $disambiguator.Extension }}: func(i []byte) (string, bool) {
{{ range $i, $language := $disambiguator.Languages -}}
{{- if not (avoidLanguage $language) }}
{{- if gt (len $language.Heuristics) 0 }}
{{- if gt $i 0 }} else {{ end -}}
if {{- range $j, $heuristic := $language.Heuristics }} {{ $heuristic.Name }}.Match(i)
{{- if lt $j (len $language.LogicRelations) }} {{index $language.LogicRelations $j}} {{- end -}} {{ end }} {
return {{ printf "%q" $language.Language }}, true
}
{{- end -}}
{{- end -}}
{{- end}}
return {{ returnLanguage $disambiguator.Languages }}, {{ safeLanguage $disambiguator.Languages }}
},
{{ end -}}
}
var (
{{ range $index, $heuristic := getAllHeuristics . -}}
{{ $heuristic.Name }} = regexp.MustCompile(`{{ $heuristic.Regexp }}`)
{{ end -}}
)

View File

@ -0,0 +1,81 @@
package slinguist
// CODE GENERATED AUTOMATICALLY WITH github.com/src-d/simple-linguist/cli/slinguist-generate
// THIS FILE SHOULD NOT BE EDITED BY HAND
// Extracted from github/linguist commit: fe8b44ab8a225b1ffa75b983b916ea22fee5b6f7
import (
"path/filepath"
"regexp"
"strings"
)
func GetLanguageByContent(filename string, content []byte) (lang string, safe bool) {
ext := strings.ToLower(filepath.Ext(filename))
if fnMatcher, ok := matchers[ext]; ok {
lang, safe = fnMatcher(content)
return
}
return GetLanguageByExtension(filename)
}
type languageMatcher func([]byte) (string, bool)
var matchers = map[string]languageMatcher{
".asc": func(i []byte) (string, bool) {
if asc_PublicKey_Matcher_0.Match(i) {
return "Public Key", true
} else if asc_AsciiDoc_Matcher_0.Match(i) {
return "AsciiDoc", true
} else if asc_AGSScript_Matcher_0.Match(i) {
return "AGS Script", true
}
return OtherLanguage, false
},
".ms": func(i []byte) (string, bool) {
if ms_Groff_Matcher_0.Match(i) {
return "Groff", true
}
return "MAXScript", true
},
".mod": func(i []byte) (string, bool) {
if mod_XML_Matcher_0.Match(i) {
return "XML", true
} else if mod_ModulaDash2_Matcher_0.Match(i) || mod_ModulaDash2_Matcher_1.Match(i) {
return "Modula-2", true
}
return "Linux Kernel Module", false
},
".pro": func(i []byte) (string, bool) {
if pro_Prolog_Matcher_0.Match(i) {
return "Prolog", true
} else if pro_INI_Matcher_0.Match(i) {
return "INI", true
} else if pro_QMake_Matcher_0.Match(i) && pro_QMake_Matcher_1.Match(i) {
return "QMake", true
} else if pro_IDL_Matcher_0.Match(i) {
return "IDL", true
}
return OtherLanguage, false
},
}
var (
asc_PublicKey_Matcher_0 = regexp.MustCompile(`(?m)^(----[- ]BEGIN|ssh-(rsa|dss)) `)
asc_AsciiDoc_Matcher_0 = regexp.MustCompile(`(?m)^[=-]+(\s|\n)|{{[A-Za-z]`)
asc_AGSScript_Matcher_0 = regexp.MustCompile(`(?m)^(\/\/.+|((import|export)\s+)?(function|int|float|char)\s+((room|repeatedly|on|game)_)?([A-Za-z]+[A-Za-z_0-9]+)\s*[;\(])`)
ms_Groff_Matcher_0 = regexp.MustCompile(`(?mi)^[.'][a-z][a-z](\s|$)`)
mod_XML_Matcher_0 = regexp.MustCompile(`(?m)<!ENTITY `)
mod_ModulaDash2_Matcher_0 = regexp.MustCompile(`(?mi)^\s*MODULE [\w\.]+;`)
mod_ModulaDash2_Matcher_1 = regexp.MustCompile(`(?mi)^\s*END [\w\.]+;`)
pro_Prolog_Matcher_0 = regexp.MustCompile(`(?m)^[^#]+:-`)
pro_INI_Matcher_0 = regexp.MustCompile(`(?m)last_client=`)
pro_QMake_Matcher_0 = regexp.MustCompile(`(?m)HEADERS`)
pro_QMake_Matcher_1 = regexp.MustCompile(`(?m)SOURCES`)
pro_IDL_Matcher_0 = regexp.MustCompile(`(?m)^\s*function[ \w,]+$`)
)

View File

@ -0,0 +1,44 @@
# Common heuristics
ObjectiveCRegex = /^\s*(@(interface|class|protocol|property|end|synchronised|selector|implementation)\b|#import\s+.+\.h[">])/
disambiguate ".asc" do |data|
if /^(----[- ]BEGIN|ssh-(rsa|dss)) /.match(data)
Language["Public Key"]
elsif /^[=-]+(\s|\n)|{{[A-Za-z]/.match(data)
Language["AsciiDoc"]
elsif /^(\/\/.+|((import|export)\s+)?(function|int|float|char)\s+((room|repeatedly|on|game)_)?([A-Za-z]+[A-Za-z_0-9]+)\s*[;\(])/.match(data)
Language["AGS Script"]
end
end
disambiguate ".ms" do |data|
if /^[.'][a-z][a-z](\s|$)/i.match(data)
Language["Groff"]
elsif /(?<!\S)\.(include|globa?l)\s/.match(data) || /(?<!\/\*)(\A|\n)\s*\.[A-Za-z]/.match(data.gsub(/"([^\\"]|\\.)*"|'([^\\']|\\.)*'|\\\s*(?:--.*)?\n/, ""))
Language["GAS"]
else
Language["MAXScript"]
end
end
disambiguate ".mod" do |data|
if data.include?('<!ENTITY ')
Language["XML"]
elsif /^\s*MODULE [\w\.]+;/i.match(data) || /^\s*END [\w\.]+;/i.match(data)
Language["Modula-2"]
else
[Language["Linux Kernel Module"], Language["AMPL"]]
end
end
disambiguate ".pro" do |data|
if /^[^#]+:-/.match(data)
Language["Prolog"]
elsif data.include?("last_client=")
Language["INI"]
elsif data.include?("HEADERS") && data.include?("SOURCES")
Language["QMake"]
elsif /^\s*function[ \w,]+$/.match(data)
Language["IDL"]
end
end

View File

@ -1,10 +1,13 @@
package slinguist
// CODE GENERATED AUTOMATICALLY WITH github.com/src-d/simple-linguist/cli/slinguist-generate
// THIS FILE SHOULD NOT BE EDITED BY HAND
// Extracted from github/linguist commit: 8b2d15a2d527c363dead3efe5e4dc3cb74fa4613
import (
"path/filepath"
"regexp"
"strings"
"gopkg.in/toqueteos/substring.v1"
)
func GetLanguageByContent(filename string, content []byte) (lang string, safe bool) {
@ -20,466 +23,555 @@ func GetLanguageByContent(filename string, content []byte) (lang string, safe bo
type languageMatcher func([]byte) (string, bool)
var matchers = map[string]languageMatcher{
".bf": func(i []byte) (string, bool) {
if substring.BytesRegexp(`(fprintf|function|return)`).Match(i) {
return "HyPhy", true
".asc": func(i []byte) (string, bool) {
if asc_PublicKey_Matcher_0.Match(i) {
return "Public Key", true
} else if asc_AsciiDoc_Matcher_0.Match(i) {
return "AsciiDoc", true
} else if asc_AGSScript_Matcher_0.Match(i) {
return "AGS Script", true
}
return "Brainfuck", false
},
".b": func(i []byte) (string, bool) {
if substring.BytesRegexp(`(include|modules)`).Match(i) {
return "Limbo", true
}
return "Brainfuck", false
return OtherLanguage, false
},
".bb": func(i []byte) (string, bool) {
if blitzBasicMatcher.Match(i) {
if bb_BlitzBasic_Matcher_0.Match(i) || bb_BlitzBasic_Matcher_1.Match(i) {
return "BlitzBasic", true
} else if substring.BytesRegexp(`^\s*(# |include|require)\b`).Match(i) {
} else if bb_BitBake_Matcher_0.Match(i) {
return "BitBake", true
}
return OtherLanguage, false
},
".builds": func(i []byte) (string, bool) {
if builds_XML_Matcher_0.Match(i) {
return "XML", true
}
return "Text", true
},
".ch": func(i []byte) (string, bool) {
if ch_xBase_Matcher_0.Match(i) {
return "xBase", true
}
return OtherLanguage, false
},
".cl": func(i []byte) (string, bool) {
if commonLispMatcher.Match(i) {
if cl_CommonLisp_Matcher_0.Match(i) {
return "Common Lisp", true
} else if coolMatcher.Match(i) {
} else if cl_Cool_Matcher_0.Match(i) {
return "Cool", true
} else if openCLMatcher.Match(i) {
} else if cl_OpenCL_Matcher_0.Match(i) {
return "OpenCL", true
}
return OtherLanguage, false
},
".cls": func(i []byte) (string, bool) {
if apexMatcher.Match(i) {
return "Apex", true
} else if openEdgeABLMatcher.Match(i) {
return "OpenEdge ABL", true
} else if texMatcher.Match(i) {
if cls_TeX_Matcher_0.Match(i) {
return "TeX", true
} else if visualBasicMatcher.Match(i) {
return "Visual Basic", true
}
return OtherLanguage, false
},
".cs": func(i []byte) (string, bool) {
if substring.BytesRegexp(`![\w\s]+methodsFor: `).Match(i) {
if cs_Smalltalk_Matcher_0.Match(i) {
return "Smalltalk", true
} else if cs_CSharp_Matcher_0.Match(i) || cs_CSharp_Matcher_1.Match(i) {
return "C#", true
}
return "C#", true
},
".ch": func(i []byte) (string, bool) {
if substring.BytesRegexp(`(?i)^\s*#\s*(if|ifdef|ifndef|define|command|xcommand|translate|xtranslate|include|pragma|undef)\b`).Match(i) {
return "xBase", true
}
return "Charity", true
return OtherLanguage, false
},
".d": func(i []byte) (string, bool) {
if substring.BytesRegexp(`^module `).Match(i) {
if d_D_Matcher_0.Match(i) {
return "D", true
} else if substring.BytesRegexp(`((dtrace:::)?BEGIN|provider |#pragma (D (option|attributes)|ident)\s)`).Match(i) {
} else if d_DTrace_Matcher_0.Match(i) {
return "DTrace", true
} else if substring.BytesRegexp(`(\/.*:( .* \\)$| : \\$|^ : |: \\$)`).Match(i) {
} else if d_Makefile_Matcher_0.Match(i) {
return "Makefile", true
}
return OtherLanguage, true
return OtherLanguage, false
},
".ecl": func(i []byte) (string, bool) {
if substring.BytesRegexp(`^[^#]+:-`).Match(i) {
if ecl_ECLiPSe_Matcher_0.Match(i) {
return "ECLiPSe", true
} else if substring.BytesHas(`:=`).Match(i) {
} else if ecl_ECL_Matcher_0.Match(i) {
return "ECL", true
}
return OtherLanguage, true
return OtherLanguage, false
},
".es": func(i []byte) (string, bool) {
if substring.BytesRegexp(`\s*(?:%%|main\s*\(.*?\)\s*->)`).Match(i) {
if es_Erlang_Matcher_0.Match(i) {
return "Erlang", true
} else if substring.BytesRegexp(`(?:\/\/|("|')use strict\\1|export\s+default\s|\/\*.*?\*\/)`).Match(i) {
return "JavaScript", true
}
return OtherLanguage, true
return OtherLanguage, false
},
".f": func(i []byte) (string, bool) {
if substring.BytesRegexp(`\n: `).Match(i) {
if f_Forth_Matcher_0.Match(i) {
return "Forth", true
} else if substring.BytesRegexp(`(?i)^([c*][^abd-z]| (subroutine|program|end)\s|\s*!)`).Match(i) {
} else if f_FilebenchWML_Matcher_0.Match(i) {
return "Filebench WML", true
} else if f_FORTRAN_Matcher_0.Match(i) {
return "FORTRAN", true
}
return OtherLanguage, false
},
".for": func(i []byte) (string, bool) {
if for_Forth_Matcher_0.Match(i) {
return "Forth", true
} else if for_FORTRAN_Matcher_0.Match(i) {
return "FORTRAN", true
}
return OtherLanguage, false
},
".fr": func(i []byte) (string, bool) {
if substring.BytesRegexp(`^(: |also |new-device|previous )`).Match(i) {
if fr_Forth_Matcher_0.Match(i) {
return "Forth", true
} else if substring.BytesRegexp(`\s*(import|module|package|data|type)`).Match(i) {
} else if fr_Frege_Matcher_0.Match(i) {
return "Frege", true
}
return "Text", false
},
".j": func(i []byte) (string, bool) {
if objectiveCMatcher.Match(i) {
return "Objective-J", true
}
return "Jasmin", false
},
".inc": func(i []byte) (string, bool) {
if substring.BytesRegexp(`^<\?(?:php)?`).Match(i) {
return "PHP", true
}
return OtherLanguage, true
},
".m": func(i []byte) (string, bool) {
if objectiveCMatcher.Match(i) {
return "Objective-C", true
} else if substring.BytesHas(`:- module`).Match(i) {
return "Mercury", true
} else if substring.BytesRegexp(`\n: `).Match(i) {
return "MUF", true
} else if substring.BytesRegexp(`\n\s*;`).Match(i) {
return "M", true
} else if substring.BytesRegexp(`\n\s*\(\*`).Match(i) {
return "Mathematica", true
} else if substring.BytesRegexp(`\n\s*%`).Match(i) {
return "Matlab", true
} else if substring.BytesRegexp(`\w+\s*:\s*module\s*{`).Match(i) {
return "Limbo", true
}
return OtherLanguage, false
},
".ms": func(i []byte) (string, bool) {
if substring.BytesRegexp(`[.'][a-z][a-z](\s|$)`).Match(i) {
return "Groff", true
}
return "MAXScript", true
},
".md": func(i []byte) (string, bool) {
if substring.BytesRegexp(`\n[-a-z0-9=#!\*\[|]`).Match(i) {
return "Markdown", true
} else if substring.BytesRegexp(`\n(;;|\(define_)`).Match(i) {
return "GCC Machine Description", true
}
return OtherLanguage, false
},
".moo": func(i []byte) (string, bool) {
if substring.BytesHas(`:- module`).Match(i) {
return "Mercury", true
}
return "Moocode", false
},
".e": func(i []byte) (string, bool) {
if substring.BytesRegexp(`feature\s--`).Match(i) {
return "Eiffel", true
}
return "E", false
return "Text", true
},
".fs": func(i []byte) (string, bool) {
if substring.BytesRegexp(`\n(: |new-device)`).Match(i) {
if fs_Forth_Matcher_0.Match(i) {
return "Forth", true
} else if substring.BytesRegexp(`\s*(#light|import|let|module|namespace|open|type)`).Match(i) {
} else if fs_FSharp_Matcher_0.Match(i) {
return "F#", true
} else if substring.BytesRegexp(`(#version|precision|uniform|varying|vec[234])`).Match(i) {
} else if fs_GLSL_Matcher_0.Match(i) {
return "GLSL", true
} else if substring.BytesRegexp(`#include|#pragma\s+(rs|version)|__attribute__`).Match(i) {
} else if fs_Filterscript_Matcher_0.Match(i) {
return "Filterscript", true
}
return OtherLanguage, false
},
".gs": func(i []byte) (string, bool) {
if substring.BytesRegexp(`uses java\.`).Match(i) {
if gs_Gosu_Matcher_0.Match(i) {
return "Gosu", true
}
return "JavaScript", false
return OtherLanguage, false
},
".h": func(i []byte) (string, bool) {
if objectiveCMatcher.Match(i) {
if h_ObjectiveDashC_Matcher_0.Match(i) {
return "Objective-C", true
} else if cPlusPlusMatcher.Match(i) {
return "C++", true
}
return "C", true
},
".hh": func(i []byte) (string, bool) {
if substring.BytesRegexp(`^<\?(?:hh)?`).Match(i) {
return "Hack", true
} else if cPlusPlusMatcher.Match(i) {
} else if h_CPlusPlus_Matcher_0.Match(i) || h_CPlusPlus_Matcher_1.Match(i) || h_CPlusPlus_Matcher_2.Match(i) || h_CPlusPlus_Matcher_3.Match(i) || h_CPlusPlus_Matcher_4.Match(i) || h_CPlusPlus_Matcher_5.Match(i) || h_CPlusPlus_Matcher_6.Match(i) {
return "C++", true
}
return OtherLanguage, false
},
".inc": func(i []byte) (string, bool) {
if inc_PHP_Matcher_0.Match(i) {
return "PHP", true
} else if inc_POVDashRaySDL_Matcher_0.Match(i) {
return "POV-Ray SDL", true
}
return OtherLanguage, false
},
".l": func(i []byte) (string, bool) {
if substring.BytesRegexp(`\(def(un|macro)\s`).Match(i) {
if l_CommonLisp_Matcher_0.Match(i) {
return "Common Lisp", true
} else if substring.BytesRegexp(`(%[%{}]xs|<.*>)`).Match(i) {
} else if l_Lex_Matcher_0.Match(i) {
return "Lex", true
} else if substring.BytesRegexp(`\.[a-z][a-z](\s|$)`).Match(i) {
} else if l_Groff_Matcher_0.Match(i) {
return "Groff", true
} else if substring.BytesRegexp(`(de|class|rel|code|data|must)`).Match(i) {
} else if l_PicoLisp_Matcher_0.Match(i) {
return "PicoLisp", true
}
return OtherLanguage, false
},
".ls": func(i []byte) (string, bool) {
if substring.BytesRegexp(`\s*package\s*[\w\.\/\*\s]*\s*{`).Match(i) {
if ls_LoomScript_Matcher_0.Match(i) {
return "LoomScript", true
}
return "LiveScript", false
return "LiveScript", true
},
".lsp": func(i []byte) (string, bool) {
if lsp_CommonLisp_Matcher_0.Match(i) {
return "Common Lisp", true
} else if lsp_NewLisp_Matcher_0.Match(i) {
return "NewLisp", true
}
return OtherLanguage, false
},
".lisp": func(i []byte) (string, bool) {
if lisp_CommonLisp_Matcher_0.Match(i) {
return "Common Lisp", true
} else if lisp_NewLisp_Matcher_0.Match(i) {
return "NewLisp", true
}
return OtherLanguage, false
},
".m": func(i []byte) (string, bool) {
if m_ObjectiveDashC_Matcher_0.Match(i) {
return "Objective-C", true
} else if m_Mercury_Matcher_0.Match(i) {
return "Mercury", true
} else if m_MUF_Matcher_0.Match(i) {
return "MUF", true
} else if m_M_Matcher_0.Match(i) {
return "M", true
} else if m_Mathematica_Matcher_0.Match(i) {
return "Mathematica", true
} else if m_Matlab_Matcher_0.Match(i) {
return "Matlab", true
} else if m_Limbo_Matcher_0.Match(i) {
return "Limbo", true
}
return OtherLanguage, false
},
".md": func(i []byte) (string, bool) {
if md_Markdown_Matcher_0.Match(i) || md_Markdown_Matcher_1.Match(i) {
return "Markdown", true
} else if md_GCCmachinedescription_Matcher_0.Match(i) {
return "GCC machine description", true
}
return "Markdown", true
},
".ml": func(i []byte) (string, bool) {
if ml_OCaml_Matcher_0.Match(i) {
return "OCaml", true
} else if ml_StandardML_Matcher_0.Match(i) {
return "Standard ML", true
}
return OtherLanguage, false
},
".mod": func(i []byte) (string, bool) {
if mod_XML_Matcher_0.Match(i) {
return "XML", true
} else if mod_ModulaDash2_Matcher_0.Match(i) || mod_ModulaDash2_Matcher_1.Match(i) {
return "Modula-2", true
}
return "Linux Kernel Module", false
},
".ms": func(i []byte) (string, bool) {
if ms_Groff_Matcher_0.Match(i) {
return "Groff", true
}
return "MAXScript", true
},
".n": func(i []byte) (string, bool) {
if substring.BytesRegexp(`^[.']`).Match(i) {
if n_Groff_Matcher_0.Match(i) {
return "Groff", true
} else if substring.BytesRegexp(`(module|namespace|using)`).Match(i) {
} else if n_Nemerle_Matcher_0.Match(i) {
return "Nemerle", true
}
return OtherLanguage, false
},
".ncl": func(i []byte) (string, bool) {
if substring.BytesHas("THE_TITLE").Match(i) {
if ncl_Text_Matcher_0.Match(i) {
return "Text", true
}
return "NCL", true
return OtherLanguage, false
},
".mod": func(i []byte) (string, bool) {
if substring.BytesHas("<!ENTITY ").Match(i) {
return "XML", true
} else if substring.BytesRegexp(`MODULE\s\w+\s*;`).Match(i) || substring.BytesRegexp(`(?i)\s*END \w+;$`).Match(i) {
return "Modula-2", true
".nl": func(i []byte) (string, bool) {
if nl_NL_Matcher_0.Match(i) {
return "NL", true
}
return "Linux Kernel Module", true
return "NewLisp", true
},
".lisp": func(i []byte) (string, bool) {
if commonLispMatcher.Match(i) {
return "Common Lisp", true
} else if substring.BytesRegexp(`\s*\(define `).Match(i) {
return "NewLisp", true
".php": func(i []byte) (string, bool) {
if php_Hack_Matcher_0.Match(i) {
return "Hack", true
} else if php_PHP_Matcher_0.Match(i) {
return "PHP", true
}
return OtherLanguage, false
},
".pl": func(i []byte) (string, bool) {
if pl_Prolog_Matcher_0.Match(i) {
return "Prolog", true
} else if pl_Perl_Matcher_0.Match(i) {
return "Perl", true
} else if pl_Perl6_Matcher_0.Match(i) {
return "Perl6", true
}
return OtherLanguage, false
},
".pm": func(i []byte) (string, bool) {
if perlMatcher.Match(i) {
if pm_Perl_Matcher_0.Match(i) {
return "Perl", true
} else if perl6Matcher.Match(i) {
} else if pm_Perl6_Matcher_0.Match(i) {
return "Perl6", true
}
return "Perl", false
},
".pp": func(i []byte) (string, bool) {
if pascalMatcher.Match(i) {
return "Pascal", true
}
return "Puppet", false
return OtherLanguage, false
},
".t": func(i []byte) (string, bool) {
if perlMatcher.Match(i) {
if t_Perl_Matcher_0.Match(i) {
return "Perl", true
} else if perl6Matcher.Match(i) {
} else if t_Perl6_Matcher_0.Match(i) {
return "Perl6", true
} else if substring.BytesRegexp(`^\s*%|^\s*var\s+\w+\s*:\s*\w+`).Match(i) {
return "Turing", true
} else if substring.BytesRegexp(`^\s*use\s+v6\s*;`).Match(i) {
return "Perl6", true
} else if substring.BytesRegexp(`terra\s`).Match(i) {
return "Terra", true
}
return "Perl", false
return OtherLanguage, false
},
".pod": func(i []byte) (string, bool) {
if pod_Pod_Matcher_0.Match(i) {
return "Pod", true
}
return "Perl", true
},
".pro": func(i []byte) (string, bool) {
if pro_Prolog_Matcher_0.Match(i) {
return "Prolog", true
} else if pro_INI_Matcher_0.Match(i) {
return "INI", true
} else if pro_QMake_Matcher_0.Match(i) && pro_QMake_Matcher_1.Match(i) {
return "QMake", true
} else if pro_IDL_Matcher_0.Match(i) {
return "IDL", true
}
return OtherLanguage, false
},
".props": func(i []byte) (string, bool) {
if props_XML_Matcher_0.Match(i) {
return "XML", true
} else if props_INI_Matcher_0.Match(i) {
return "INI", true
}
return OtherLanguage, false
},
".r": func(i []byte) (string, bool) {
if r_Rebol_Matcher_0.Match(i) {
return "Rebol", true
} else if r_R_Matcher_0.Match(i) {
return "R", true
}
return OtherLanguage, false
},
".rno": func(i []byte) (string, bool) {
if rno_RUNOFF_Matcher_0.Match(i) {
return "RUNOFF", true
} else if rno_Groff_Matcher_0.Match(i) {
return "Groff", true
}
return OtherLanguage, false
},
".rpy": func(i []byte) (string, bool) {
if rpy_Python_Matcher_0.Match(i) {
return "Python", true
}
return "Ren'Py", true
},
".rs": func(i []byte) (string, bool) {
if rs_Rust_Matcher_0.Match(i) {
return "Rust", true
} else if rs_RenderScript_Matcher_0.Match(i) {
return "RenderScript", true
}
return OtherLanguage, false
},
".sc": func(i []byte) (string, bool) {
if sc_SuperCollider_Matcher_0.Match(i) || sc_SuperCollider_Matcher_1.Match(i) || sc_SuperCollider_Matcher_2.Match(i) {
return "SuperCollider", true
} else if sc_Scala_Matcher_0.Match(i) || sc_Scala_Matcher_1.Match(i) || sc_Scala_Matcher_2.Match(i) {
return "Scala", true
}
return OtherLanguage, false
},
".sql": func(i []byte) (string, bool) {
if sql_PLpgSQL_Matcher_0.Match(i) || sql_PLpgSQL_Matcher_1.Match(i) || sql_PLpgSQL_Matcher_2.Match(i) {
return "PLpgSQL", true
} else if sql_SQLPL_Matcher_0.Match(i) || sql_SQLPL_Matcher_1.Match(i) {
return "SQLPL", true
} else if sql_PLSQL_Matcher_0.Match(i) || sql_PLSQL_Matcher_1.Match(i) {
return "PLSQL", true
} else if sql_SQL_Matcher_0.Match(i) {
return "SQL", true
}
return OtherLanguage, false
},
".srt": func(i []byte) (string, bool) {
if srt_SubRipText_Matcher_0.Match(i) {
return "SubRip Text", true
}
return OtherLanguage, false
},
".toc": func(i []byte) (string, bool) {
if toc_WorldofWarcraftAddonData_Matcher_0.Match(i) {
return "World of Warcraft Addon Data", true
} else if toc_TeX_Matcher_0.Match(i) {
return "TeX", true
}
return OtherLanguage, false
},
".ts": func(i []byte) (string, bool) {
if substring.BytesHas("</TS>").Match(i) {
return "XML", true
}
return "TypeScript", true
},
".tsx": func(i []byte) (string, bool) {
if substring.BytesHas("</tileset>").Match(i) {
if ts_XML_Matcher_0.Match(i) {
return "XML", true
}
return "TypeScript", true
},
".tst": func(i []byte) (string, bool) {
if substring.BytesHas("gap> ").Match(i) {
if tst_GAP_Matcher_0.Match(i) {
return "GAP", true
}
return "Scilab", true
},
".r": func(i []byte) (string, bool) {
if substring.BytesRegexp(`(?i)\bRebol\b`).Match(i) {
return "Rebol", true
} else if substring.BytesHas("<-").Match(i) {
return "R", true
".tsx": func(i []byte) (string, bool) {
if tsx_TypeScript_Matcher_0.Match(i) {
return "TypeScript", true
} else if tsx_XML_Matcher_0.Match(i) {
return "XML", true
}
return OtherLanguage, false
},
".rs": func(i []byte) (string, bool) {
if substring.BytesRegexp(`\n(use |fn |mod |pub |macro_rules|impl|#!?\[)`).Match(i) {
return "Rust", true
} else if substring.BytesRegexp(`#include|#pragma\s+(rs|version)|__attribute__`).Match(i) {
return "RenderScript", true
}
return OtherLanguage, false
},
".rpy": func(i []byte) (string, bool) {
if substring.BytesRegexp(`(import|from|class|def)\s`).Match(i) {
return "Python", true
}
return "Ren'Py", false
},
".v": func(i []byte) (string, bool) {
if substring.BytesRegexp(`\nendmodule`).Match(i) {
return "Verilog", true
} else if substring.BytesRegexp(`(Require|Import)`).Match(i) {
return "Coq", true
}
return OtherLanguage, false
},
".pl": func(i []byte) (string, bool) {
if prologMatcher.Match(i) {
return "Prolog", true
} else if perl6Matcher.Match(i) {
return "Perl6", true
}
return "Perl", false
},
".pro": func(i []byte) (string, bool) {
if prologMatcher.Match(i) {
return "Prolog", true
}
return OtherLanguage, false
},
".pod": func(i []byte) (string, bool) {
if substring.BytesRegexp(`=\w+\n`).Match(i) {
return "Pod", true
}
return "Perl", false
},
".toc": func(i []byte) (string, bool) {
if substring.BytesRegexp("## |@no-lib-strip@").Match(i) {
return "World of Warcraft Addon Data", true
} else if substring.BytesRegexp("(contentsline|defcounter|beamer|boolfalse)").Match(i) {
return "TeX", true
}
return OtherLanguage, false
},
".sls": func(i []byte) (string, bool) {
if schemeMatcher.Match(i) {
return "Scheme", true
}
return "SaltStack", false
},
".sql": func(i []byte) (string, bool) {
if pgSQLMatcher.Match(i) {
return "PLpgSQL", true
} else if db2SQLMatcher.Match(i) {
return "SQLPL", true
} else if oracleSQLMatcher.Match(i) {
return "PLSQL", true
}
return "SQL", false
},
}
func init() {
matchers[".for"] = matchers[".f"]
matchers[".lsp"] = matchers[".lisp"]
}
var (
blitzBasicMatcher = substring.BytesOr(
substring.BytesHas(`End Function`),
substring.BytesRegexp(`\\s*;`),
)
cPlusPlusMatcher = substring.BytesOr(
substring.BytesRegexp(`\s*template\s*<`),
substring.BytesRegexp(`\s*#\s*include <(cstdint|string|vector|map|list|array|bitset|queue|stack|forward_list|unordered_map|unordered_set|(i|o|io)stream)>`),
substring.BytesRegexp(`\n[ \t]*try`),
substring.BytesRegexp(`\n[ \t]*(class|(using[ \t]+)?namespace)\s+\w+`),
substring.BytesRegexp(`\n[ \t]*(private|public|protected):\n`),
substring.BytesRegexp(`std::\w+`),
substring.BytesRegexp(`[ \t]*catch\s*`),
)
commonLispMatcher = substring.BytesRegexp("(?i)(defpackage|defun|in-package)")
coolMatcher = substring.BytesRegexp("(?i)class")
openCLMatcher = substring.BytesOr(
substring.BytesHas("\n}"),
substring.BytesHas("}\n"),
substring.BytesHas(`/*`),
substring.BytesHas(`//`),
)
apexMatcher = substring.BytesOr(
substring.BytesHas("{\n"),
substring.BytesHas("}\n"),
)
texMatcher = substring.BytesOr(
substring.BytesHas(`%`),
substring.BytesHas(`\`),
)
openEdgeABLMatcher = substring.BytesRegexp(`(?i)(class|define|interface|method|using)\b`)
visualBasicMatcher = substring.BytesOr(
substring.BytesHas("'*"),
substring.BytesRegexp(`(?i)(attribute|option|sub|private|protected|public|friend)\b`),
)
mathematicaMatcher = substring.BytesHas(`^\s*\(\*`)
matlabMatcher = substring.BytesRegexp(`\b(function\s*[\[a-zA-Z]+|pcolor|classdef|figure|end|elseif)\b`)
objectiveCMatcher = substring.BytesRegexp(
`@(interface|class|protocol|property|end|synchronised|selector|implementation)\b|#import\s+.+\.h[">]`)
pascalMatcher = substring.BytesRegexp(`(?ims)^\s*(PROGRAM|UNIT|USES|FUNCTION)[\s\n]+.*?;`)
prologMatcher = substring.BytesRegexp(`^[^#]+:-`)
perlMatcher = substring.BytesRegexp(`use strict|use\s+v?5\.`)
perl6Matcher = substring.BytesRegexp(`(use v6|(my )?class|module)`)
pgSQLMatcher = substring.BytesOr(
substring.BytesRegexp(`(?i)\\i\b|AS \$\$|LANGUAGE '?plpgsql'?`),
substring.BytesRegexp(`(?i)SECURITY (DEFINER|INVOKER)`),
substring.BytesRegexp(`BEGIN( WORK| TRANSACTION)?;`),
)
db2SQLMatcher = substring.BytesOr(
substring.BytesRegexp(`(?i)(alter module)|(language sql)|(begin( NOT)+ atomic)`),
substring.BytesRegexp(`(?i)signal SQLSTATE '[0-9]+'`),
)
oracleSQLMatcher = substring.BytesOr(
substring.BytesRegexp(`(?i)\$\$PLSQL_|XMLTYPE|sysdate|systimestamp|\.nextval|connect by|AUTHID (DEFINER|CURRENT_USER)`),
substring.BytesRegexp(`(?i)constructor\W+function`),
)
schemeMatcher = substring.BytesRegexp(`(?m)\A(^\s*;;.*$)*\s*\(`)
asc_PublicKey_Matcher_0 = regexp.MustCompile(`(?m)^(----[- ]BEGIN|ssh-(rsa|dss)) `)
asc_AsciiDoc_Matcher_0 = regexp.MustCompile(`(?m)^[=-]+(\s|\n)|{{[A-Za-z]`)
asc_AGSScript_Matcher_0 = regexp.MustCompile(`(?m)^(\/\/.+|((import|export)\s+)?(function|int|float|char)\s+((room|repeatedly|on|game)_)?([A-Za-z]+[A-Za-z_0-9]+)\s*[;\(])`)
bb_BlitzBasic_Matcher_0 = regexp.MustCompile(`(?m)^\s*; `)
bb_BlitzBasic_Matcher_1 = regexp.MustCompile(`(?m)End Function`)
bb_BitBake_Matcher_0 = regexp.MustCompile(`(?m)^\s*(# |include|require)\b`)
builds_XML_Matcher_0 = regexp.MustCompile(`(?mi)^(\s*)(<Project|<Import|<Property|<?xml|xmlns)`)
ch_xBase_Matcher_0 = regexp.MustCompile(`(?mi)^\s*#\s*(if|ifdef|ifndef|define|command|xcommand|translate|xtranslate|include|pragma|undef)\b`)
cl_CommonLisp_Matcher_0 = regexp.MustCompile(`(?mi)^\s*\((defun|in-package|defpackage) `)
cl_Cool_Matcher_0 = regexp.MustCompile(`(?m)^class`)
cl_OpenCL_Matcher_0 = regexp.MustCompile(`(?m)\/\* |\/\/ |^\}`)
cls_TeX_Matcher_0 = regexp.MustCompile(`(?m)\\\w+{`)
cs_Smalltalk_Matcher_0 = regexp.MustCompile(`(?m)![\w\s]+methodsFor: `)
cs_CSharp_Matcher_0 = regexp.MustCompile(`(?m)^\s*namespace\s*[\w\.]+\s*{`)
cs_CSharp_Matcher_1 = regexp.MustCompile(`(?m)^\s*\/\/`)
d_D_Matcher_0 = regexp.MustCompile(`(?m)^module\s+[\w.]*\s*;|import\s+[\w\s,.:]*;|\w+\s+\w+\s*\(.*\)(?:\(.*\))?\s*{[^}]*}|unittest\s*(?:\(.*\))?\s*{[^}]*}`)
d_DTrace_Matcher_0 = regexp.MustCompile(`(?m)^(\w+:\w*:\w*:\w*|BEGIN|END|provider\s+|(tick|profile)-\w+\s+{[^}]*}|#pragma\s+D\s+(option|attributes|depends_on)\s|#pragma\s+ident\s)`)
d_Makefile_Matcher_0 = regexp.MustCompile(`(?m)([\/\\].*:\s+.*\s\\$|: \\$|^ : |^[\w\s\/\\.]+\w+\.\w+\s*:\s+[\w\s\/\\.]+\w+\.\w+)`)
ecl_ECLiPSe_Matcher_0 = regexp.MustCompile(`(?m)^[^#]+:-`)
ecl_ECL_Matcher_0 = regexp.MustCompile(`(?m):=`)
es_Erlang_Matcher_0 = regexp.MustCompile(`(?m)^\s*(?:%%|main\s*\(.*?\)\s*->)`)
f_Forth_Matcher_0 = regexp.MustCompile(`(?m)^: `)
f_FilebenchWML_Matcher_0 = regexp.MustCompile(`(?m)flowop`)
f_FORTRAN_Matcher_0 = regexp.MustCompile(`(?mi)^([c*][^abd-z]| (subroutine|program|end|data)\s|\s*!)`)
for_Forth_Matcher_0 = regexp.MustCompile(`(?m)^: `)
for_FORTRAN_Matcher_0 = regexp.MustCompile(`(?mi)^([c*][^abd-z]| (subroutine|program|end|data)\s|\s*!)`)
fr_Forth_Matcher_0 = regexp.MustCompile(`(?m)^(: |also |new-device|previous )`)
fr_Frege_Matcher_0 = regexp.MustCompile(`(?m)^\s*(import|module|package|data|type) `)
fs_Forth_Matcher_0 = regexp.MustCompile(`(?m)^(: |new-device)`)
fs_FSharp_Matcher_0 = regexp.MustCompile(`(?m)^\s*(#light|import|let|module|namespace|open|type)`)
fs_GLSL_Matcher_0 = regexp.MustCompile(`(?m)^\s*(#version|precision|uniform|varying|vec[234])`)
fs_Filterscript_Matcher_0 = regexp.MustCompile(`(?m)#include|#pragma\s+(rs|version)|__attribute__`)
gs_Gosu_Matcher_0 = regexp.MustCompile(`(?m)^uses java\.`)
h_ObjectiveDashC_Matcher_0 = regexp.MustCompile(`(?m)^\s*(@(interface|class|protocol|property|end|synchronised|selector|implementation)\b|#import\s+.+\.h[">])`)
h_CPlusPlus_Matcher_0 = regexp.MustCompile(`(?m)^\s*#\s*include <(cstdint|string|vector|map|list|array|bitset|queue|stack|forward_list|unordered_map|unordered_set|(i|o|io)stream)>`)
h_CPlusPlus_Matcher_1 = regexp.MustCompile(`(?m)^\s*template\s*<`)
h_CPlusPlus_Matcher_2 = regexp.MustCompile(`(?m)^[ \t]*try`)
h_CPlusPlus_Matcher_3 = regexp.MustCompile(`(?m)^[ \t]*catch\s*\(`)
h_CPlusPlus_Matcher_4 = regexp.MustCompile(`(?m)^[ \t]*(class|(using[ \t]+)?namespace)\s+\w+`)
h_CPlusPlus_Matcher_5 = regexp.MustCompile(`(?m)^[ \t]*(private|public|protected):$`)
h_CPlusPlus_Matcher_6 = regexp.MustCompile(`(?m)std::\w+`)
inc_PHP_Matcher_0 = regexp.MustCompile(`(?m)^<\?(?:php)?`)
inc_POVDashRaySDL_Matcher_0 = regexp.MustCompile(`(?m)^\s*#(declare|local|macro|while)\s`)
l_CommonLisp_Matcher_0 = regexp.MustCompile(`(?m)\(def(un|macro)\s`)
l_Lex_Matcher_0 = regexp.MustCompile(`(?m)^(%[%{}]xs|<.*>)`)
l_Groff_Matcher_0 = regexp.MustCompile(`(?mi)^\.[a-z][a-z](\s|$)`)
l_PicoLisp_Matcher_0 = regexp.MustCompile(`(?m)^\((de|class|rel|code|data|must)\s`)
ls_LoomScript_Matcher_0 = regexp.MustCompile(`(?m)^\s*package\s*[\w\.\/\*\s]*\s*{`)
lsp_CommonLisp_Matcher_0 = regexp.MustCompile(`(?mi)^\s*\((defun|in-package|defpackage) `)
lsp_NewLisp_Matcher_0 = regexp.MustCompile(`(?m)^\s*\(define `)
lisp_CommonLisp_Matcher_0 = regexp.MustCompile(`(?mi)^\s*\((defun|in-package|defpackage) `)
lisp_NewLisp_Matcher_0 = regexp.MustCompile(`(?m)^\s*\(define `)
m_ObjectiveDashC_Matcher_0 = regexp.MustCompile(`(?m)^\s*(@(interface|class|protocol|property|end|synchronised|selector|implementation)\b|#import\s+.+\.h[">])`)
m_Mercury_Matcher_0 = regexp.MustCompile(`(?m):- module`)
m_MUF_Matcher_0 = regexp.MustCompile(`(?m)^: `)
m_M_Matcher_0 = regexp.MustCompile(`(?m)^\s*;`)
m_Mathematica_Matcher_0 = regexp.MustCompile(`(?m)\*\)$`)
m_Matlab_Matcher_0 = regexp.MustCompile(`(?m)^\s*%`)
m_Limbo_Matcher_0 = regexp.MustCompile(`(?m)^\w+\s*:\s*module\s*{`)
md_Markdown_Matcher_0 = regexp.MustCompile(`(?mi)(^[-a-z0-9=#!\*\[|>])|<\/`)
md_Markdown_Matcher_1 = regexp.MustCompile(`(?m)^$`)
md_GCCmachinedescription_Matcher_0 = regexp.MustCompile(`(?m)^(;;|\(define_)`)
ml_OCaml_Matcher_0 = regexp.MustCompile(`(?m)(^\s*module)|let rec |match\s+(\S+\s)+with`)
ml_StandardML_Matcher_0 = regexp.MustCompile(`(?m)=> |case\s+(\S+\s)+of`)
mod_XML_Matcher_0 = regexp.MustCompile(`(?m)<!ENTITY `)
mod_ModulaDash2_Matcher_0 = regexp.MustCompile(`(?mi)^\s*MODULE [\w\.]+;`)
mod_ModulaDash2_Matcher_1 = regexp.MustCompile(`(?mi)^\s*END [\w\.]+;`)
ms_Groff_Matcher_0 = regexp.MustCompile(`(?mi)^[.'][a-z][a-z](\s|$)`)
n_Groff_Matcher_0 = regexp.MustCompile(`(?m)^[.']`)
n_Nemerle_Matcher_0 = regexp.MustCompile(`(?m)^(module|namespace|using)\s`)
ncl_Text_Matcher_0 = regexp.MustCompile(`(?m)THE_TITLE`)
nl_NL_Matcher_0 = regexp.MustCompile(`(?m)^(b|g)[0-9]+ `)
php_Hack_Matcher_0 = regexp.MustCompile(`(?m)<\?hh`)
php_PHP_Matcher_0 = regexp.MustCompile(`(?m)<?[^h]`)
pl_Prolog_Matcher_0 = regexp.MustCompile(`(?m)^[^#]*:-`)
pl_Perl_Matcher_0 = regexp.MustCompile(`(?m)use strict|use\s+v?5\.`)
pl_Perl6_Matcher_0 = regexp.MustCompile(`(?m)^(use v6|(my )?class|module)`)
pm_Perl_Matcher_0 = regexp.MustCompile(`(?m)use strict|use\s+v?5\.`)
pm_Perl6_Matcher_0 = regexp.MustCompile(`(?m)^(use v6|(my )?class|module)`)
t_Perl_Matcher_0 = regexp.MustCompile(`(?m)use strict|use\s+v?5\.`)
t_Perl6_Matcher_0 = regexp.MustCompile(`(?m)^(use v6|(my )?class|module)`)
pod_Pod_Matcher_0 = regexp.MustCompile(`(?m)^=\w+\b`)
pro_Prolog_Matcher_0 = regexp.MustCompile(`(?m)^[^#]+:-`)
pro_INI_Matcher_0 = regexp.MustCompile(`(?m)last_client=`)
pro_QMake_Matcher_0 = regexp.MustCompile(`(?m)HEADERS`)
pro_QMake_Matcher_1 = regexp.MustCompile(`(?m)SOURCES`)
pro_IDL_Matcher_0 = regexp.MustCompile(`(?m)^\s*function[ \w,]+$`)
props_XML_Matcher_0 = regexp.MustCompile(`(?mi)^(\s*)(<Project|<Import|<Property|<?xml|xmlns)`)
props_INI_Matcher_0 = regexp.MustCompile(`(?mi)\w+\s*=\s*`)
r_Rebol_Matcher_0 = regexp.MustCompile(`(?mi)\bRebol\b`)
r_R_Matcher_0 = regexp.MustCompile(`(?m)<-|^\s*#`)
rno_RUNOFF_Matcher_0 = regexp.MustCompile(`(?mi)^\.!|^\.end lit(?:eral)?\b`)
rno_Groff_Matcher_0 = regexp.MustCompile(`(?m)^\.\\" `)
rpy_Python_Matcher_0 = regexp.MustCompile(`(?ms)(^(import|from|class|def)\s)`)
rs_Rust_Matcher_0 = regexp.MustCompile(`(?m)^(use |fn |mod |pub |macro_rules|impl|#!?\[)`)
rs_RenderScript_Matcher_0 = regexp.MustCompile(`(?m)#include|#pragma\s+(rs|version)|__attribute__`)
sc_SuperCollider_Matcher_0 = regexp.MustCompile(`(?m)\^(this|super)\.`)
sc_SuperCollider_Matcher_1 = regexp.MustCompile(`(?m)^\s*(\+|\*)\s*\w+\s*{`)
sc_SuperCollider_Matcher_2 = regexp.MustCompile(`(?m)^\s*~\w+\s*=\.`)
sc_Scala_Matcher_0 = regexp.MustCompile(`(?m)^\s*import (scala|java)\.`)
sc_Scala_Matcher_1 = regexp.MustCompile(`(?m)^\s*val\s+\w+\s*=`)
sc_Scala_Matcher_2 = regexp.MustCompile(`(?m)^\s*class\b`)
sql_PLpgSQL_Matcher_0 = regexp.MustCompile(`(?mi)^\\i\b|AS \$\$|LANGUAGE '?plpgsql'?`)
sql_PLpgSQL_Matcher_1 = regexp.MustCompile(`(?mi)SECURITY (DEFINER|INVOKER)`)
sql_PLpgSQL_Matcher_2 = regexp.MustCompile(`(?mi)BEGIN( WORK| TRANSACTION)?;`)
sql_SQLPL_Matcher_0 = regexp.MustCompile(`(?mi)(alter module)|(language sql)|(begin( NOT)+ atomic)`)
sql_SQLPL_Matcher_1 = regexp.MustCompile(`(?mi)signal SQLSTATE '[0-9]+'`)
sql_PLSQL_Matcher_0 = regexp.MustCompile(`(?mi)\$\$PLSQL_|XMLTYPE|sysdate|systimestamp|\.nextval|connect by|AUTHID (DEFINER|CURRENT_USER)`)
sql_PLSQL_Matcher_1 = regexp.MustCompile(`(?mi)constructor\W+function`)
sql_SQL_Matcher_0 = regexp.MustCompile(`(?mi)! /begin|boolean|package|exception`)
srt_SubRipText_Matcher_0 = regexp.MustCompile(`(?m)^(\d{2}:\d{2}:\d{2},\d{3})\s*(-->)\s*(\d{2}:\d{2}:\d{2},\d{3})$`)
toc_WorldofWarcraftAddonData_Matcher_0 = regexp.MustCompile(`(?m)^## |@no-lib-strip@`)
toc_TeX_Matcher_0 = regexp.MustCompile(`(?m)^\\(contentsline|defcounter|beamer|boolfalse)`)
ts_XML_Matcher_0 = regexp.MustCompile(`(?m)<TS`)
tst_GAP_Matcher_0 = regexp.MustCompile(`(?m)gap> `)
tsx_TypeScript_Matcher_0 = regexp.MustCompile(`(?m)^\s*(import.+(from\s+|require\()['"]react|\/\/\/\s*<reference\s)`)
tsx_XML_Matcher_0 = regexp.MustCompile(`(?mi)^\s*<\?xml\s+version`)
)

View File

@ -11,8 +11,6 @@ import (
)
func (s *TSuite) TestGetLanguageByContentLinguistCorpus(c *C) {
c.Skip("report")
var total, failed, ok, other, unsafe int
w := new(tabwriter.Writer)

View File

@ -20,16 +20,30 @@ const (
languagesTestTmplPath = "test_files/languages.test.tmpl"
languagesTestTmplName = "languages.test.tmpl"
commitLangTest = "fe8b44ab8a225b1ffa75b983b916ea22fee5b6f7"
// Heuristics test
heuristicsTestFile = "test_files/heuristics.test.rb"
contentGold = "test_files/content.gold"
contentTestTmplPath = "test_files/content.test.go.tmpl"
contentTestTmplName = "content.test.go.tmpl"
commitHeuristicsTest = "fe8b44ab8a225b1ffa75b983b916ea22fee5b6f7"
)
func TestFromFile(t *testing.T) {
goldLang, err := ioutil.ReadFile(formatedLangGold)
assert.NoError(t, err)
goldContent, err := ioutil.ReadFile(formatedContentGold)
assert.NoError(t, err)
outPathLang, err := ioutil.TempFile("/tmp", "generator-test-")
assert.NoError(t, err)
defer os.Remove(outPathLang.Name())
outPathContent, err := ioutil.TempFile("/tmp", "generator-test-")
assert.NoError(t, err)
defer os.Remove(outPathContent.Name())
tests := []struct {
name string
fileToParse string
@ -50,6 +64,16 @@ func TestFromFile(t *testing.T) {
generate: Languages,
wantOut: goldLang,
},
{
name: "TestFromFile_Heuristics",
fileToParse: heuristicsTestFile,
outPath: outPathContent.Name(),
tmplPath: contentTestTmplPath,
tmplName: contentTestTmplName,
commit: commitHeuristicsTest,
generate: Heuristics,
wantOut: goldContent,
},
}
for _, tt := range tests {
@ -96,3 +120,37 @@ func TestLanguages(t *testing.T) {
})
}
}
func TestHeuristics(t *testing.T) {
gold, err := ioutil.ReadFile(contentGold)
assert.NoError(t, err)
input, err := ioutil.ReadFile(heuristicsTestFile)
assert.NoError(t, err)
tests := []struct {
name string
input []byte
tmplPath string
tmplName string
commit string
wantOut []byte
}{
{
name: "TestHeuristics",
input: input,
tmplPath: contentTestTmplPath,
tmplName: contentTestTmplName,
commit: commitHeuristicsTest,
wantOut: gold,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
out, err := Heuristics(tt.input, tt.tmplPath, tt.tmplName, tt.commit)
assert.NoError(t, err)
assert.EqualValues(t, tt.wantOut, out, fmt.Sprintf("Heuristics() = %v, want %v", string(out), string(tt.wantOut)))
})
}
}

View File

@ -30,6 +30,10 @@ func main() {
if err := generator.FromFile(languagesYAML, langFile, languagesTmplPath, languagesTmpl, commit, generator.Languages); err != nil {
log.Println(err)
}
if err := generator.FromFile(heuristicsRuby, contentFile, contentTmplPath, contentTmpl, commit, generator.Heuristics); err != nil {
log.Println(err)
}
}
func getCommit(path string) (string, error) {