Sync to linguist 7.2.0: heuristics.yml support (#189)

Sync \w Github Linguist v7.2.0

Includes new way of handling `heuristics.yml` and
all `./data/*` re-generated using Github Linguist [v7.2.0](https://github.com/github/linguist/releases/tag/v7.2.0)
release tag.

 - many new languages
 - better vendoring detection
 - update doc on update&known issues.
This commit is contained in:
Alexander
2019-02-14 12:47:45 +01:00
committed by GitHub
parent 13d3d66d37
commit 3499750785
45 changed files with 105155 additions and 74316 deletions

View File

@ -1,9 +1,34 @@
package data
// LanguagesByAlias keeps alias for different languages and use the name of the languages as an alias too.
import "strings"
// LanguageByAliasMap keeps alias for different languages and use the name of the languages as an alias too.
// All the keys (alias or not) are written in lower case and the whitespaces has been replaced by underscores.
var LanguagesByAlias = map[string]string{
var LanguageByAliasMap = map[string]string{
{{range $alias, $language := . -}}
"{{ $alias }}": {{ printf "%q" $language -}},
"{{ $alias }}": {{ printf "%q" $language -}},
{{end -}}
}
// LanguageByAlias looks up the language name by it's alias or name.
// It mirrors the logic of github linguist and is needed e.g for heuristcs.yml
// that mixes names and aliases in a language field (see XPM example).
func LanguageByAlias(langOrAlias string) (lang string, ok bool) {
k := convertToAliasKey(langOrAlias)
lang, ok = LanguageByAliasMap[k]
return
}
// convertToAliasKey converts language name to a key in LanguageByAliasMap.
// Following
// - internal.code-generator.generator.convertToAliasKey()
// - GetLanguageByAlias()
// conventions.
// It is here to avoid dependency on "generate" and "enry" packages.
func convertToAliasKey(langName string) string {
ak := strings.SplitN(langName, `,`, 2)[0]
ak = strings.Replace(ak, ` `, `_`, -1)
ak = strings.ToLower(ak)
return ak
}

View File

@ -1,33 +1,51 @@
package data
import "gopkg.in/toqueteos/substring.v1"
import (
"regexp"
type languageMatcher func ([]byte) []string
"gopkg.in/src-d/enry.v1/data/rule"
)
var ContentMatchers = map[string]languageMatcher{
{{ range $index, $disambiguator := . -}}
{{ printf "%q" $disambiguator.Extension }}: func(i []byte) []string {
{{ range $i, $language := $disambiguator.Languages -}}
{{- if not (avoidLanguage $language) }}
{{- if gt (len $language.Heuristics) 0 }}
{{- if gt $i 0 }} else {{ end -}}
if {{- range $j, $heuristic := $language.Heuristics }} {{ $heuristic.Name }}.Match(string(i))
{{- if lt $j (len $language.LogicRelations) }} {{index $language.LogicRelations $j}} {{- end -}} {{ end }} {
return []string{ {{- printf "%q" $language.Language -}} }
}
{{- end -}}
{{- end -}}
{{- end}}
return {{ returnLanguages $disambiguator.Languages | returnStringSlice }}
},
var ContentHeuristics = map[string]*Heuristics{
{{ range $ext, $rules := . -}}
{{ printf "%q" $ext }}: &Heuristics{
{{ range $rule := $rules -}}
{{template "Rule" $rule}}
{{ end -}}
},
{{ end -}}
}
var (
{{ range $index, $heuristic := getAllHeuristics . -}}
{{ $heuristic.Name }} = substring.Regexp(`{{ $heuristic.Regexp }}`)
{{ end -}}
)
{{ define "Rule" -}}
{{ if eq .Op "And" -}}
rule.And(
{{ template "Languages" .Langs -}}
{{ range $rule := .Rules -}}
{{template "Rule" $rule}}
{{ end -}}
),
{{- else if eq .Op "Or" -}}
rule.Or(
{{ template "Languages" .Langs -}}
regexp.MustCompile(`{{ .Pattern }}`),
),
{{- else if eq .Op "Not" -}}
rule.Not(
{{ template "Languages" .Langs -}}
regexp.MustCompile(`{{ .Pattern }}`),
),
{{- else if eq .Op "Always" -}}
rule.Always(
{{ template "Languages" .Langs -}}
),
{{ end -}}
{{ end -}}
{{define "Languages" -}}
{{with . -}}
rule.MatchingLanguages( {{range .}} {{printf "\"%s\"" .}}, {{end}} ),
{{ else -}}
rule.MatchingLanguages(""),
{{end -}}
{{end}}