Sync to linguist 7.2.0: heuristics.yml support (#189)

Sync \w Github Linguist v7.2.0

Includes new way of handling `heuristics.yml` and
all `./data/*` re-generated using Github Linguist [v7.2.0](https://github.com/github/linguist/releases/tag/v7.2.0)
release tag.

 - many new languages
 - better vendoring detection
 - update doc on update&known issues.
This commit is contained in:
Alexander
2019-02-14 12:47:45 +01:00
committed by GitHub
parent 13d3d66d37
commit 3499750785
45 changed files with 105155 additions and 74316 deletions

109
data/rule/rule.go Normal file
View File

@@ -0,0 +1,109 @@
// Package rule contains rule-based heuristic implementations.
// It is used in the generated code in content.go for disambiguation of languages
// with colliding extensions, based on regexps from Linguist data.
package rule
// Heuristic consist of (a number of) rules where each, if matches,
// identifes content as belonging to a programming language(s).
type Heuristic interface {
Matcher
Languages() []string
}
// Matcher checks if the data matches (number of) pattern.
// Every heuristic rule below implements this interface.
// A regexp.Regexp satisfies this interface and can be used instead.
type Matcher interface {
Match(data []byte) bool
}
// languages struct incapsulate data common to every Matcher: all languages
// that it identifies.
type languages struct {
langs []string
}
// Languages returns all languages, identified by this Matcher.
func (l languages) Languages() []string {
return l.langs
}
// MatchingLanguages is a helper to create new languages.
func MatchingLanguages(langs ...string) languages {
return languages{langs}
}
// Implements a Heuristic.
type or struct {
languages
pattern Matcher
}
// Or rule matches, if a single matching pattern exists.
// It recives only one pattern as it relies on compile-time optimization that
// represtes union with | inside a single regexp.
func Or(l languages, r Matcher) Heuristic {
return or{l, r}
}
// Match implements rule.Matcher.
func (r or) Match(data []byte) bool {
return r.pattern.Match(data)
}
// Implements a Heuristic.
type and struct {
languages
patterns []Matcher
}
// And rule matches, if each of the patterns does match.
func And(l languages, m ...Matcher) Heuristic {
return and{l, m}
}
// Match implements data.Matcher.
func (r and) Match(data []byte) bool {
for _, p := range r.patterns {
if !p.Match(data) {
return false
}
}
return true
}
// Implements a Heuristic.
type not struct {
languages
Patterns []Matcher
}
// Not rule matches if none of the patterns match.
func Not(l languages, r ...Matcher) Heuristic {
return not{l, r}
}
// Match implements data.Matcher.
func (r not) Match(data []byte) bool {
for _, p := range r.Patterns {
if p.Match(data) {
return false
}
}
return true
}
// Implements a Heuristic.
type always struct {
languages
}
// Always rule always matches. Often is used as a default fallback.
func Always(l languages) Heuristic {
return always{l}
}
// Match implements Matcher.
func (r always) Match(data []byte) bool {
return true
}

39
data/rule/rule_test.go Normal file
View File

@@ -0,0 +1,39 @@
package rule
import (
"regexp"
"testing"
"github.com/stretchr/testify/assert"
)
const lang = "ActionScript"
var fixtures = []struct {
name string
rule Heuristic
numLangs int
matching string
noMatch string
}{
{"Always", Always(MatchingLanguages(lang)), 1, "a", ""},
{"Not", Not(MatchingLanguages(lang), regexp.MustCompile(`a`)), 1, "b", "a"},
{"And", And(MatchingLanguages(lang), regexp.MustCompile(`a`), regexp.MustCompile(`b`)), 1, "ab", "a"},
{"Or", Or(MatchingLanguages(lang), regexp.MustCompile(`a|b`)), 1, "ab", "c"},
}
func TestRules(t *testing.T) {
for _, f := range fixtures {
t.Run(f.name, func(t *testing.T) {
assert.NotNil(t, f.rule)
assert.NotNil(t, f.rule.Languages())
assert.Equal(t, f.numLangs, len(f.rule.Languages()))
assert.Truef(t, f.rule.Match([]byte(f.matching)),
"'%s' is expected to .Match() by rule %s%v", f.matching, f.name, f.rule)
if f.noMatch != "" {
assert.Falsef(t, f.rule.Match([]byte(f.noMatch)),
"'%s' is expected NOT to .Match() by rule %s%v", f.noMatch, f.name, f.rule)
}
})
}
}