mirror of
https://github.com/ralsina/tartrazine.git
synced 2025-09-18 19:28:12 +00:00
Sync to linguist 7.2.0: heuristics.yml support (#189)
Sync \w Github Linguist v7.2.0 Includes new way of handling `heuristics.yml` and all `./data/*` re-generated using Github Linguist [v7.2.0](https://github.com/github/linguist/releases/tag/v7.2.0) release tag. - many new languages - better vendoring detection - update doc on update&known issues.
This commit is contained in:
109
data/rule/rule.go
Normal file
109
data/rule/rule.go
Normal file
@@ -0,0 +1,109 @@
|
||||
// Package rule contains rule-based heuristic implementations.
|
||||
// It is used in the generated code in content.go for disambiguation of languages
|
||||
// with colliding extensions, based on regexps from Linguist data.
|
||||
package rule
|
||||
|
||||
// Heuristic consist of (a number of) rules where each, if matches,
|
||||
// identifes content as belonging to a programming language(s).
|
||||
type Heuristic interface {
|
||||
Matcher
|
||||
Languages() []string
|
||||
}
|
||||
|
||||
// Matcher checks if the data matches (number of) pattern.
|
||||
// Every heuristic rule below implements this interface.
|
||||
// A regexp.Regexp satisfies this interface and can be used instead.
|
||||
type Matcher interface {
|
||||
Match(data []byte) bool
|
||||
}
|
||||
|
||||
// languages struct incapsulate data common to every Matcher: all languages
|
||||
// that it identifies.
|
||||
type languages struct {
|
||||
langs []string
|
||||
}
|
||||
|
||||
// Languages returns all languages, identified by this Matcher.
|
||||
func (l languages) Languages() []string {
|
||||
return l.langs
|
||||
}
|
||||
|
||||
// MatchingLanguages is a helper to create new languages.
|
||||
func MatchingLanguages(langs ...string) languages {
|
||||
return languages{langs}
|
||||
}
|
||||
|
||||
// Implements a Heuristic.
|
||||
type or struct {
|
||||
languages
|
||||
pattern Matcher
|
||||
}
|
||||
|
||||
// Or rule matches, if a single matching pattern exists.
|
||||
// It recives only one pattern as it relies on compile-time optimization that
|
||||
// represtes union with | inside a single regexp.
|
||||
func Or(l languages, r Matcher) Heuristic {
|
||||
return or{l, r}
|
||||
}
|
||||
|
||||
// Match implements rule.Matcher.
|
||||
func (r or) Match(data []byte) bool {
|
||||
return r.pattern.Match(data)
|
||||
}
|
||||
|
||||
// Implements a Heuristic.
|
||||
type and struct {
|
||||
languages
|
||||
patterns []Matcher
|
||||
}
|
||||
|
||||
// And rule matches, if each of the patterns does match.
|
||||
func And(l languages, m ...Matcher) Heuristic {
|
||||
return and{l, m}
|
||||
}
|
||||
|
||||
// Match implements data.Matcher.
|
||||
func (r and) Match(data []byte) bool {
|
||||
for _, p := range r.patterns {
|
||||
if !p.Match(data) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// Implements a Heuristic.
|
||||
type not struct {
|
||||
languages
|
||||
Patterns []Matcher
|
||||
}
|
||||
|
||||
// Not rule matches if none of the patterns match.
|
||||
func Not(l languages, r ...Matcher) Heuristic {
|
||||
return not{l, r}
|
||||
}
|
||||
|
||||
// Match implements data.Matcher.
|
||||
func (r not) Match(data []byte) bool {
|
||||
for _, p := range r.Patterns {
|
||||
if p.Match(data) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// Implements a Heuristic.
|
||||
type always struct {
|
||||
languages
|
||||
}
|
||||
|
||||
// Always rule always matches. Often is used as a default fallback.
|
||||
func Always(l languages) Heuristic {
|
||||
return always{l}
|
||||
}
|
||||
|
||||
// Match implements Matcher.
|
||||
func (r always) Match(data []byte) bool {
|
||||
return true
|
||||
}
|
39
data/rule/rule_test.go
Normal file
39
data/rule/rule_test.go
Normal file
@@ -0,0 +1,39 @@
|
||||
package rule
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
const lang = "ActionScript"
|
||||
|
||||
var fixtures = []struct {
|
||||
name string
|
||||
rule Heuristic
|
||||
numLangs int
|
||||
matching string
|
||||
noMatch string
|
||||
}{
|
||||
{"Always", Always(MatchingLanguages(lang)), 1, "a", ""},
|
||||
{"Not", Not(MatchingLanguages(lang), regexp.MustCompile(`a`)), 1, "b", "a"},
|
||||
{"And", And(MatchingLanguages(lang), regexp.MustCompile(`a`), regexp.MustCompile(`b`)), 1, "ab", "a"},
|
||||
{"Or", Or(MatchingLanguages(lang), regexp.MustCompile(`a|b`)), 1, "ab", "c"},
|
||||
}
|
||||
|
||||
func TestRules(t *testing.T) {
|
||||
for _, f := range fixtures {
|
||||
t.Run(f.name, func(t *testing.T) {
|
||||
assert.NotNil(t, f.rule)
|
||||
assert.NotNil(t, f.rule.Languages())
|
||||
assert.Equal(t, f.numLangs, len(f.rule.Languages()))
|
||||
assert.Truef(t, f.rule.Match([]byte(f.matching)),
|
||||
"'%s' is expected to .Match() by rule %s%v", f.matching, f.name, f.rule)
|
||||
if f.noMatch != "" {
|
||||
assert.Falsef(t, f.rule.Match([]byte(f.noMatch)),
|
||||
"'%s' is expected NOT to .Match() by rule %s%v", f.noMatch, f.name, f.rule)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user