changes in the API, ready to version 2

This commit is contained in:
Manuel Carmona 2017-05-31 12:07:46 +02:00
parent 5b304524d1
commit 0d5dff1979
23 changed files with 1772 additions and 1448 deletions

View File

@ -2,7 +2,7 @@ package slinguist
// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/simple-linguist.v1/internal/code-generator
// THIS FILE SHOULD NOT BE EDITED BY HAND
// Extracted from github/linguist commit: 60f864a138650dd17fafc94814be9ee2d3aaef8c
// Extracted from github/linguist commit: b6460f8ed6b249281ada099ca28bd8f1230b8892
// languagesByAlias keeps alias for different languages and use the name of the languages as an alias too.
// All the keys (alias or not) are written in lower case and the whitespaces has been replaced by underscores.
@ -608,9 +608,12 @@ var languagesByAlias = map[string]string{
"visual_basic": "Visual Basic",
"volt": "Volt",
"vue": "Vue",
"wasm": "WebAssembly",
"wast": "WebAssembly",
"wavefront_material": "Wavefront Material",
"wavefront_object": "Wavefront Object",
"web_ontology_language": "Web Ontology Language",
"webassembly": "WebAssembly",
"webidl": "WebIDL",
"winbatch": "Batchfile",
"wisp": "wisp",

View File

@ -6,36 +6,11 @@ import (
"gopkg.in/src-d/simple-linguist.v1/internal/tokenizer"
)
func getLanguageByClassifier(content []byte, candidates []string, classifier Classifier) string {
if classifier == nil {
classifier = DefaultClassifier
}
scores := classifier.Classify(content, candidates)
if len(scores) == 0 {
return OtherLanguage
}
return getLangugeHigherScore(scores)
}
func getLangugeHigherScore(scores map[string]float64) string {
var language string
higher := -math.MaxFloat64
for lang, score := range scores {
if higher < score {
language = lang
higher = score
}
}
return language
}
// Classifier is the interface that contains the method Classify which is in charge to assign scores to the possibles candidates.
// The scores must order the candidates so as the highest score be the most probably language of the content.
// The scores must order the candidates so as the highest score be the most probably language of the content. The candidates is
// a map which can be used to assign weights to languages dynamically.
type Classifier interface {
Classify(content []byte, candidates []string) map[string]float64
Classify(content []byte, candidates map[string]float64) map[string]float64
}
type classifier struct {
@ -44,36 +19,36 @@ type classifier struct {
tokensTotal float64
}
func (c *classifier) Classify(content []byte, candidates []string) map[string]float64 {
func (c *classifier) Classify(content []byte, candidates map[string]float64) map[string]float64 {
if len(content) == 0 {
return nil
}
var languages []string
var languages map[string]float64
if len(candidates) == 0 {
languages = c.knownLangs()
} else {
languages = make([]string, 0, len(candidates))
for _, candidate := range candidates {
languages = make(map[string]float64, len(candidates))
for candidate, weight := range candidates {
if lang, ok := GetLanguageByAlias(candidate); ok {
languages = append(languages, lang)
languages[lang] = weight
}
}
}
tokens := tokenizer.Tokenize(content)
scores := make(map[string]float64, len(languages))
for _, language := range languages {
for language := range languages {
scores[language] = c.tokensLogProbability(tokens, language) + c.languagesLogProbabilities[language]
}
return scores
}
func (c *classifier) knownLangs() []string {
langs := make([]string, 0, len(c.languagesLogProbabilities))
func (c *classifier) knownLangs() map[string]float64 {
langs := make(map[string]float64, len(c.languagesLogProbabilities))
for lang := range c.languagesLogProbabilities {
langs = append(langs, lang)
langs[lang]++
}
return langs

137
common.go
View File

@ -1,6 +1,7 @@
package slinguist
import (
"math"
"path/filepath"
"strings"
)
@ -8,100 +9,142 @@ import (
// OtherLanguage is used as a zero value when a function can not return a specific language.
const OtherLanguage = "Other"
// Strategy type fix the signature for the functions that can be used as a strategy.
type Strategy func(filename string, content []byte) (languages []string)
var strategies = []Strategy{
GetLanguagesByModeline,
GetLanguagesByFilename,
GetLanguagesByShebang,
GetLanguagesByExtension,
GetLanguagesByContent,
}
// GetLanguage applies a sequence of strategies based on the given filename and content
// to find out the most probably language to return.
func GetLanguage(filename string, content []byte) string {
if lang, safe := GetLanguageByModeline(content); safe {
return lang
candidates := map[string]float64{}
for _, strategy := range strategies {
languages := strategy(filename, content)
if len(languages) == 1 {
return languages[0]
}
if len(languages) > 0 {
for _, language := range languages {
candidates[language]++
}
}
}
if lang, safe := GetLanguageByFilename(filename); safe {
return lang
if len(candidates) == 0 {
return OtherLanguage
}
if lang, safe := GetLanguageByShebang(content); safe {
return lang
}
if lang, safe := GetLanguageByExtension(filename); safe {
return lang
}
if lang, safe := GetLanguageByContent(filename, content); safe {
return lang
}
lang := GetLanguageByClassifier(content, nil, nil)
lang := GetLanguageByClassifier(content, candidates, nil)
return lang
}
// GetLanguageByModeline returns the language of the given content looking for the modeline,
// and safe to indicate the sureness of returned language.
func GetLanguageByModeline(content []byte) (lang string, safe bool) {
return getLanguageByModeline(content)
return getLangAndSafe("", content, GetLanguagesByModeline)
}
// GetLanguageByFilename returns a language based on the given filename, and safe to indicate
// the sureness of returned language.
func GetLanguageByFilename(filename string) (lang string, safe bool) {
return getLanguageByFilename(filename)
return getLangAndSafe(filename, nil, GetLanguagesByFilename)
}
func getLanguageByFilename(filename string) (lang string, safe bool) {
lang, safe = languagesByFilename[filename]
if lang == "" {
lang = OtherLanguage
}
return
// GetLanguagesByFilename returns a slice of possible languages for the given filename, content will be ignored.
// It accomplish the signature to be a Strategy type.
func GetLanguagesByFilename(filename string, content []byte) []string {
return languagesByFilename[filename]
}
// GetLanguageByShebang returns the language of the given content looking for the shebang line,
// and safe to indicate the sureness of returned language.
func GetLanguageByShebang(content []byte) (lang string, safe bool) {
return getLanguageByShebang(content)
return getLangAndSafe("", content, GetLanguagesByShebang)
}
// GetLanguageByExtension returns a language based on the given filename, and safe to indicate
// the sureness of returned language.
func GetLanguageByExtension(filename string) (lang string, safe bool) {
return getLanguageByExtension(filename)
return getLangAndSafe(filename, nil, GetLanguagesByExtension)
}
func getLanguageByExtension(filename string) (lang string, safe bool) {
// GetLanguagesByExtension returns a slice of possible languages for the given filename, content will be ignored.
// It accomplish the signature to be a Strategy type.
func GetLanguagesByExtension(filename string, content []byte) []string {
ext := strings.ToLower(filepath.Ext(filename))
lang = OtherLanguage
langs, ok := languagesByExtension[ext]
if !ok {
return
}
lang = langs[0]
safe = len(langs) == 1
return
return languagesByExtension[ext]
}
// GetLanguageByContent returns a language based on the filename and heuristics applies to the content,
// and safe to indicate the sureness of returned language.
func GetLanguageByContent(filename string, content []byte) (lang string, safe bool) {
return getLanguageByContent(filename, content)
return getLangAndSafe(filename, content, GetLanguagesByContent)
}
func getLanguageByContent(filename string, content []byte) (lang string, safe bool) {
// GetLanguagesByContent returns a slice of possible languages for the given content, filename will be ignored.
// It accomplish the signature to be a Strategy type.
func GetLanguagesByContent(filename string, content []byte) []string {
ext := strings.ToLower(filepath.Ext(filename))
if fnMatcher, ok := contentMatchers[ext]; ok {
lang, safe = fnMatcher(content)
} else {
lang = OtherLanguage
fnMatcher, ok := contentMatchers[ext]
if !ok {
return nil
}
return fnMatcher(content)
}
func getLangAndSafe(filename string, content []byte, getLanguageByStrategy Strategy) (lang string, safe bool) {
languages := getLanguageByStrategy(filename, content)
if len(languages) == 0 {
lang = OtherLanguage
return
}
lang = languages[0]
safe = len(languages) == 1
return
}
// GetLanguageByClassifier takes in a content and a list of candidates, and apply the classifier's Classify method to
// get the most probably language. If classifier is null then DefaultClassfier will be used.
func GetLanguageByClassifier(content []byte, candidates []string, classifier Classifier) string {
return getLanguageByClassifier(content, candidates, classifier)
// get the most probably language. If classifier is null then DefaultClassfier will be used. If there aren't candidates
// OtherLanguage is returned.
func GetLanguageByClassifier(content []byte, candidates map[string]float64, classifier Classifier) string {
scores := GetLanguagesByClassifier(content, candidates, classifier)
if len(scores) == 0 {
return OtherLanguage
}
return getLangugeHigherScore(scores)
}
func getLangugeHigherScore(scores map[string]float64) string {
var language string
higher := -math.MaxFloat64
for lang, score := range scores {
if higher < score {
language = lang
higher = score
}
}
return language
}
// GetLanguagesByClassifier returns a map of possible languages as keys and a score as value based on content and candidates. The values can be ordered
// with the highest value as the most probably language. If classifier is null then DefaultClassfier will be used.
func GetLanguagesByClassifier(content []byte, candidates map[string]float64, classifier Classifier) map[string]float64 {
if classifier == nil {
classifier = DefaultClassifier
}
return classifier.Classify(content, candidates)
}
// GetLanguageExtensions returns the different extensions being used by the language.

View File

@ -34,7 +34,7 @@ func (s *SimpleLinguistTestSuite) TestGetLanguage() {
for _, test := range tests {
language := GetLanguage(test.filename, test.content)
assert.Equal(s.T(), language, test.expected, fmt.Sprintf("%v: %v, expected: %v", test.name, language, test.expected))
assert.Equal(s.T(), test.expected, language, fmt.Sprintf("%v: %v, expected: %v", test.name, language, test.expected))
}
}
@ -91,8 +91,8 @@ func (s *SimpleLinguistTestSuite) TestGetLanguageByModelineLinguist() {
assert.NoError(s.T(), err)
lang, safe := GetLanguageByModeline(content)
assert.Equal(s.T(), lang, test.expectedLang, fmt.Sprintf("%v: lang = %v, expected: %v", test.name, lang, test.expectedLang))
assert.Equal(s.T(), safe, test.expectedSafe, fmt.Sprintf("%v: safe = %v, expected: %v", test.name, safe, test.expectedSafe))
assert.Equal(s.T(), test.expectedLang, lang, fmt.Sprintf("%v: lang = %v, expected: %v", test.name, lang, test.expectedLang))
assert.Equal(s.T(), test.expectedSafe, safe, fmt.Sprintf("%v: safe = %v, expected: %v", test.name, safe, test.expectedSafe))
}
}
@ -116,8 +116,8 @@ func (s *SimpleLinguistTestSuite) TestGetLanguageByModeline() {
for _, test := range tests {
lang, safe := GetLanguageByModeline(test.content)
assert.Equal(s.T(), lang, test.expectedLang, fmt.Sprintf("%v: lang = %v, expected: %v", test.name, lang, test.expectedLang))
assert.Equal(s.T(), safe, test.expectedSafe, fmt.Sprintf("%v: safe = %v, expected: %v", test.name, safe, test.expectedSafe))
assert.Equal(s.T(), test.expectedLang, lang, fmt.Sprintf("%v: lang = %v, expected: %v", test.name, lang, test.expectedLang))
assert.Equal(s.T(), test.expectedSafe, safe, fmt.Sprintf("%v: safe = %v, expected: %v", test.name, safe, test.expectedSafe))
}
}
@ -140,8 +140,8 @@ func (s *SimpleLinguistTestSuite) TestGetLanguageByFilename() {
for _, test := range tests {
lang, safe := GetLanguageByFilename(test.filename)
assert.Equal(s.T(), lang, test.expectedLang, fmt.Sprintf("%v: lang = %v, expected: %v", test.name, lang, test.expectedLang))
assert.Equal(s.T(), safe, test.expectedSafe, fmt.Sprintf("%v: safe = %v, expected: %v", test.name, safe, test.expectedSafe))
assert.Equal(s.T(), test.expectedLang, lang, fmt.Sprintf("%v: lang = %v, expected: %v", test.name, lang, test.expectedLang))
assert.Equal(s.T(), test.expectedSafe, safe, fmt.Sprintf("%v: safe = %v, expected: %v", test.name, safe, test.expectedSafe))
}
}
@ -181,8 +181,8 @@ println("The shell script says ",vm.arglist.concat(" "));`
for _, test := range tests {
lang, safe := GetLanguageByShebang(test.content)
assert.Equal(s.T(), lang, test.expectedLang, fmt.Sprintf("%v: lang = %v, expected: %v", test.name, lang, test.expectedLang))
assert.Equal(s.T(), safe, test.expectedSafe, fmt.Sprintf("%v: safe = %v, expected: %v", test.name, safe, test.expectedSafe))
assert.Equal(s.T(), test.expectedLang, lang, fmt.Sprintf("%v: lang = %v, expected: %v", test.name, lang, test.expectedLang))
assert.Equal(s.T(), test.expectedSafe, safe, fmt.Sprintf("%v: safe = %v, expected: %v", test.name, safe, test.expectedSafe))
}
}
@ -200,8 +200,8 @@ func (s *SimpleLinguistTestSuite) TestGetLanguageByExtension() {
for _, test := range tests {
lang, safe := GetLanguageByExtension(test.filename)
assert.Equal(s.T(), lang, test.expectedLang, fmt.Sprintf("%v: lang = %v, expected: %v", test.name, lang, test.expectedLang))
assert.Equal(s.T(), safe, test.expectedSafe, fmt.Sprintf("%v: safe = %v, expected: %v", test.name, safe, test.expectedSafe))
assert.Equal(s.T(), test.expectedLang, lang, fmt.Sprintf("%v: lang = %v, expected: %v", test.name, lang, test.expectedLang))
assert.Equal(s.T(), test.expectedSafe, safe, fmt.Sprintf("%v: safe = %v, expected: %v", test.name, safe, test.expectedSafe))
}
}
@ -263,15 +263,15 @@ func (s *SimpleLinguistTestSuite) TestGetLanguageByClassifier() {
test := []struct {
name string
filename string
candidates []string
candidates map[string]float64
expected string
}{
{name: "TestGetLanguageByClassifier_1", filename: filepath.Join(samples, "C/blob.c"), candidates: []string{"python", "ruby", "c", "c++"}, expected: "C"},
{name: "TestGetLanguageByClassifier_1", filename: filepath.Join(samples, "C/blob.c"), candidates: map[string]float64{"python": 1.00, "ruby": 1.00, "c": 1.00, "c++": 1.00}, expected: "C"},
{name: "TestGetLanguageByClassifier_2", filename: filepath.Join(samples, "C/blob.c"), candidates: nil, expected: "C"},
{name: "TestGetLanguageByClassifier_3", filename: filepath.Join(samples, "C/main.c"), candidates: nil, expected: "C"},
{name: "TestGetLanguageByClassifier_4", filename: filepath.Join(samples, "C/blob.c"), candidates: []string{"python", "ruby", "c++"}, expected: "C++"},
{name: "TestGetLanguageByClassifier_5", filename: filepath.Join(samples, "C/blob.c"), candidates: []string{"ruby"}, expected: "Ruby"},
{name: "TestGetLanguageByClassifier_6", filename: filepath.Join(samples, "Python/django-models-base.py"), candidates: []string{"python", "ruby", "c", "c++"}, expected: "Python"},
{name: "TestGetLanguageByClassifier_4", filename: filepath.Join(samples, "C/blob.c"), candidates: map[string]float64{"python": 1.00, "ruby": 1.00, "c++": 1.00}, expected: "C++"},
{name: "TestGetLanguageByClassifier_5", filename: filepath.Join(samples, "C/blob.c"), candidates: map[string]float64{"ruby": 1.00}, expected: "Ruby"},
{name: "TestGetLanguageByClassifier_6", filename: filepath.Join(samples, "Python/django-models-base.py"), candidates: map[string]float64{"python": 1.00, "ruby": 1.00, "c": 1.00, "c++": 1.00}, expected: "Python"},
{name: "TestGetLanguageByClassifier_7", filename: filepath.Join(samples, "Python/django-models-base.py"), candidates: nil, expected: "Python"},
}
@ -280,7 +280,7 @@ func (s *SimpleLinguistTestSuite) TestGetLanguageByClassifier() {
assert.NoError(s.T(), err)
lang := GetLanguageByClassifier(content, test.candidates, nil)
assert.Equal(s.T(), lang, test.expected, fmt.Sprintf("%v: lang = %v, expected: %v", test.name, lang, test.expected))
assert.Equal(s.T(), test.expected, lang, fmt.Sprintf("%v: lang = %v, expected: %v", test.name, lang, test.expected))
}
}
@ -297,7 +297,7 @@ func (s *SimpleLinguistTestSuite) TestGetLanguageExtensions() {
for _, test := range tests {
extensions := GetLanguageExtensions(test.language)
assert.EqualValues(s.T(), extensions, test.expected, fmt.Sprintf("%v: extensions = %v, expected: %v", test.name, extensions, test.expected))
assert.EqualValues(s.T(), test.expected, extensions, fmt.Sprintf("%v: extensions = %v, expected: %v", test.name, extensions, test.expected))
}
}
@ -320,7 +320,7 @@ func (s *SimpleLinguistTestSuite) TestGetLanguageType() {
for _, test := range tests {
langType := GetLanguageType(test.language)
assert.Equal(s.T(), langType, test.expected, fmt.Sprintf("%v: langType = %v, expected: %v", test.name, langType, test.expected))
assert.Equal(s.T(), test.expected, langType, fmt.Sprintf("%v: langType = %v, expected: %v", test.name, langType, test.expected))
}
}
@ -345,7 +345,7 @@ func (s *SimpleLinguistTestSuite) TestGetLanguageByAlias() {
for _, test := range tests {
lang, ok := GetLanguageByAlias(test.alias)
assert.Equal(s.T(), lang, test.expectedLang, fmt.Sprintf("%v: lang = %v, expected: %v", test.name, lang, test.expectedLang))
assert.Equal(s.T(), ok, test.expectedOk, fmt.Sprintf("%v: ok = %v, expected: %v", test.name, ok, test.expectedOk))
assert.Equal(s.T(), test.expectedLang, lang, fmt.Sprintf("%v: lang = %v, expected: %v", test.name, lang, test.expectedLang))
assert.Equal(s.T(), test.expectedOk, ok, fmt.Sprintf("%v: ok = %v, expected: %v", test.name, ok, test.expectedOk))
}
}

View File

@ -2,447 +2,447 @@ package slinguist
// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/simple-linguist.v1/internal/code-generator
// THIS FILE SHOULD NOT BE EDITED BY HAND
// Extracted from github/linguist commit: 60f864a138650dd17fafc94814be9ee2d3aaef8c
// Extracted from github/linguist commit: b6460f8ed6b249281ada099ca28bd8f1230b8892
import (
"regexp"
)
type languageMatcher func([]byte) (string, bool)
type languageMatcher func([]byte) []string
var contentMatchers = map[string]languageMatcher{
".asc": func(i []byte) (string, bool) {
".asc": func(i []byte) []string {
if asc_PublicKey_Matcher_0.Match(i) {
return "Public Key", true
return []string{"Public Key"}
} else if asc_AsciiDoc_Matcher_0.Match(i) {
return "AsciiDoc", true
return []string{"AsciiDoc"}
} else if asc_AGSScript_Matcher_0.Match(i) {
return "AGS Script", true
return []string{"AGS Script"}
}
return OtherLanguage, false
return nil
},
".bb": func(i []byte) (string, bool) {
".bb": func(i []byte) []string {
if bb_BlitzBasic_Matcher_0.Match(i) || bb_BlitzBasic_Matcher_1.Match(i) {
return "BlitzBasic", true
return []string{"BlitzBasic"}
} else if bb_BitBake_Matcher_0.Match(i) {
return "BitBake", true
return []string{"BitBake"}
}
return OtherLanguage, false
return nil
},
".builds": func(i []byte) (string, bool) {
".builds": func(i []byte) []string {
if builds_XML_Matcher_0.Match(i) {
return "XML", true
return []string{"XML"}
}
return "Text", true
return []string{"Text"}
},
".ch": func(i []byte) (string, bool) {
".ch": func(i []byte) []string {
if ch_xBase_Matcher_0.Match(i) {
return "xBase", true
return []string{"xBase"}
}
return OtherLanguage, false
return nil
},
".cl": func(i []byte) (string, bool) {
".cl": func(i []byte) []string {
if cl_CommonLisp_Matcher_0.Match(i) {
return "Common Lisp", true
return []string{"Common Lisp"}
} else if cl_Cool_Matcher_0.Match(i) {
return "Cool", true
return []string{"Cool"}
} else if cl_OpenCL_Matcher_0.Match(i) {
return "OpenCL", true
return []string{"OpenCL"}
}
return OtherLanguage, false
return nil
},
".cls": func(i []byte) (string, bool) {
".cls": func(i []byte) []string {
if cls_TeX_Matcher_0.Match(i) {
return "TeX", true
return []string{"TeX"}
}
return OtherLanguage, false
return nil
},
".cs": func(i []byte) (string, bool) {
".cs": func(i []byte) []string {
if cs_Smalltalk_Matcher_0.Match(i) {
return "Smalltalk", true
return []string{"Smalltalk"}
} else if cs_CSharp_Matcher_0.Match(i) || cs_CSharp_Matcher_1.Match(i) {
return "C#", true
return []string{"C#"}
}
return OtherLanguage, false
return nil
},
".d": func(i []byte) (string, bool) {
".d": func(i []byte) []string {
if d_D_Matcher_0.Match(i) {
return "D", true
return []string{"D"}
} else if d_DTrace_Matcher_0.Match(i) {
return "DTrace", true
return []string{"DTrace"}
} else if d_Makefile_Matcher_0.Match(i) {
return "Makefile", true
return []string{"Makefile"}
}
return OtherLanguage, false
return nil
},
".ecl": func(i []byte) (string, bool) {
".ecl": func(i []byte) []string {
if ecl_ECLiPSe_Matcher_0.Match(i) {
return "ECLiPSe", true
return []string{"ECLiPSe"}
} else if ecl_ECL_Matcher_0.Match(i) {
return "ECL", true
return []string{"ECL"}
}
return OtherLanguage, false
return nil
},
".es": func(i []byte) (string, bool) {
".es": func(i []byte) []string {
if es_Erlang_Matcher_0.Match(i) {
return "Erlang", true
return []string{"Erlang"}
}
return OtherLanguage, false
return nil
},
".f": func(i []byte) (string, bool) {
".f": func(i []byte) []string {
if f_Forth_Matcher_0.Match(i) {
return "Forth", true
return []string{"Forth"}
} else if f_FilebenchWML_Matcher_0.Match(i) {
return "Filebench WML", true
return []string{"Filebench WML"}
} else if f_Fortran_Matcher_0.Match(i) {
return "Fortran", true
return []string{"Fortran"}
}
return OtherLanguage, false
return nil
},
".for": func(i []byte) (string, bool) {
".for": func(i []byte) []string {
if for_Forth_Matcher_0.Match(i) {
return "Forth", true
return []string{"Forth"}
} else if for_Fortran_Matcher_0.Match(i) {
return "Fortran", true
return []string{"Fortran"}
}
return OtherLanguage, false
return nil
},
".fr": func(i []byte) (string, bool) {
".fr": func(i []byte) []string {
if fr_Forth_Matcher_0.Match(i) {
return "Forth", true
return []string{"Forth"}
} else if fr_Frege_Matcher_0.Match(i) {
return "Frege", true
return []string{"Frege"}
}
return "Text", true
return []string{"Text"}
},
".fs": func(i []byte) (string, bool) {
".fs": func(i []byte) []string {
if fs_Forth_Matcher_0.Match(i) {
return "Forth", true
return []string{"Forth"}
} else if fs_FSharp_Matcher_0.Match(i) {
return "F#", true
return []string{"F#"}
} else if fs_GLSL_Matcher_0.Match(i) {
return "GLSL", true
return []string{"GLSL"}
} else if fs_Filterscript_Matcher_0.Match(i) {
return "Filterscript", true
return []string{"Filterscript"}
}
return OtherLanguage, false
return nil
},
".gs": func(i []byte) (string, bool) {
".gs": func(i []byte) []string {
if gs_Gosu_Matcher_0.Match(i) {
return "Gosu", true
return []string{"Gosu"}
}
return OtherLanguage, false
return nil
},
".h": func(i []byte) (string, bool) {
".h": func(i []byte) []string {
if h_ObjectiveDashC_Matcher_0.Match(i) {
return "Objective-C", true
return []string{"Objective-C"}
} else if h_CPlusPlus_Matcher_0.Match(i) || h_CPlusPlus_Matcher_1.Match(i) || h_CPlusPlus_Matcher_2.Match(i) || h_CPlusPlus_Matcher_3.Match(i) || h_CPlusPlus_Matcher_4.Match(i) || h_CPlusPlus_Matcher_5.Match(i) || h_CPlusPlus_Matcher_6.Match(i) {
return "C++", true
return []string{"C++"}
}
return OtherLanguage, false
return nil
},
".inc": func(i []byte) (string, bool) {
".inc": func(i []byte) []string {
if inc_PHP_Matcher_0.Match(i) {
return "PHP", true
return []string{"PHP"}
} else if inc_POVDashRaySDL_Matcher_0.Match(i) {
return "POV-Ray SDL", true
return []string{"POV-Ray SDL"}
}
return OtherLanguage, false
return nil
},
".l": func(i []byte) (string, bool) {
".l": func(i []byte) []string {
if l_CommonLisp_Matcher_0.Match(i) {
return "Common Lisp", true
return []string{"Common Lisp"}
} else if l_Lex_Matcher_0.Match(i) {
return "Lex", true
return []string{"Lex"}
} else if l_Roff_Matcher_0.Match(i) {
return "Roff", true
return []string{"Roff"}
} else if l_PicoLisp_Matcher_0.Match(i) {
return "PicoLisp", true
return []string{"PicoLisp"}
}
return OtherLanguage, false
return nil
},
".ls": func(i []byte) (string, bool) {
".ls": func(i []byte) []string {
if ls_LoomScript_Matcher_0.Match(i) {
return "LoomScript", true
return []string{"LoomScript"}
}
return "LiveScript", true
return []string{"LiveScript"}
},
".lsp": func(i []byte) (string, bool) {
".lsp": func(i []byte) []string {
if lsp_CommonLisp_Matcher_0.Match(i) {
return "Common Lisp", true
return []string{"Common Lisp"}
} else if lsp_NewLisp_Matcher_0.Match(i) {
return "NewLisp", true
return []string{"NewLisp"}
}
return OtherLanguage, false
return nil
},
".lisp": func(i []byte) (string, bool) {
".lisp": func(i []byte) []string {
if lisp_CommonLisp_Matcher_0.Match(i) {
return "Common Lisp", true
return []string{"Common Lisp"}
} else if lisp_NewLisp_Matcher_0.Match(i) {
return "NewLisp", true
return []string{"NewLisp"}
}
return OtherLanguage, false
return nil
},
".m": func(i []byte) (string, bool) {
".m": func(i []byte) []string {
if m_ObjectiveDashC_Matcher_0.Match(i) {
return "Objective-C", true
return []string{"Objective-C"}
} else if m_Mercury_Matcher_0.Match(i) {
return "Mercury", true
return []string{"Mercury"}
} else if m_MUF_Matcher_0.Match(i) {
return "MUF", true
return []string{"MUF"}
} else if m_M_Matcher_0.Match(i) {
return "M", true
return []string{"M"}
} else if m_Mathematica_Matcher_0.Match(i) {
return "Mathematica", true
return []string{"Mathematica"}
} else if m_Matlab_Matcher_0.Match(i) {
return "Matlab", true
return []string{"Matlab"}
} else if m_Limbo_Matcher_0.Match(i) {
return "Limbo", true
return []string{"Limbo"}
}
return OtherLanguage, false
return nil
},
".md": func(i []byte) (string, bool) {
".md": func(i []byte) []string {
if md_Markdown_Matcher_0.Match(i) || md_Markdown_Matcher_1.Match(i) {
return "Markdown", true
return []string{"Markdown"}
} else if md_GCCMachineDescription_Matcher_0.Match(i) {
return "GCC Machine Description", true
return []string{"GCC Machine Description"}
}
return "Markdown", true
return []string{"Markdown"}
},
".ml": func(i []byte) (string, bool) {
".ml": func(i []byte) []string {
if ml_OCaml_Matcher_0.Match(i) {
return "OCaml", true
return []string{"OCaml"}
} else if ml_StandardML_Matcher_0.Match(i) {
return "Standard ML", true
return []string{"Standard ML"}
}
return OtherLanguage, false
return nil
},
".mod": func(i []byte) (string, bool) {
".mod": func(i []byte) []string {
if mod_XML_Matcher_0.Match(i) {
return "XML", true
return []string{"XML"}
} else if mod_ModulaDash2_Matcher_0.Match(i) || mod_ModulaDash2_Matcher_1.Match(i) {
return "Modula-2", true
return []string{"Modula-2"}
}
return "Linux Kernel Module", false
return []string{"Linux Kernel Module", "AMPL"}
},
".ms": func(i []byte) (string, bool) {
".ms": func(i []byte) []string {
if ms_Roff_Matcher_0.Match(i) {
return "Roff", true
return []string{"Roff"}
}
return "MAXScript", true
return []string{"MAXScript"}
},
".n": func(i []byte) (string, bool) {
".n": func(i []byte) []string {
if n_Roff_Matcher_0.Match(i) {
return "Roff", true
return []string{"Roff"}
} else if n_Nemerle_Matcher_0.Match(i) {
return "Nemerle", true
return []string{"Nemerle"}
}
return OtherLanguage, false
return nil
},
".ncl": func(i []byte) (string, bool) {
".ncl": func(i []byte) []string {
if ncl_Text_Matcher_0.Match(i) {
return "Text", true
return []string{"Text"}
}
return OtherLanguage, false
return nil
},
".nl": func(i []byte) (string, bool) {
".nl": func(i []byte) []string {
if nl_NL_Matcher_0.Match(i) {
return "NL", true
return []string{"NL"}
}
return "NewLisp", true
return []string{"NewLisp"}
},
".php": func(i []byte) (string, bool) {
".php": func(i []byte) []string {
if php_Hack_Matcher_0.Match(i) {
return "Hack", true
return []string{"Hack"}
} else if php_PHP_Matcher_0.Match(i) {
return "PHP", true
return []string{"PHP"}
}
return OtherLanguage, false
return nil
},
".pl": func(i []byte) (string, bool) {
".pl": func(i []byte) []string {
if pl_Prolog_Matcher_0.Match(i) {
return "Prolog", true
return []string{"Prolog"}
} else if pl_Perl_Matcher_0.Match(i) {
return "Perl", true
return []string{"Perl"}
} else if pl_Perl6_Matcher_0.Match(i) {
return "Perl6", true
return []string{"Perl6"}
}
return OtherLanguage, false
return nil
},
".pm": func(i []byte) (string, bool) {
".pm": func(i []byte) []string {
if pm_Perl6_Matcher_0.Match(i) {
return "Perl6", true
return []string{"Perl6"}
} else if pm_Perl_Matcher_0.Match(i) {
return "Perl", true
return []string{"Perl"}
}
return OtherLanguage, false
return nil
},
".pod": func(i []byte) (string, bool) {
".pod": func(i []byte) []string {
if pod_Pod_Matcher_0.Match(i) {
return "Pod", true
return []string{"Pod"}
}
return "Perl", true
return []string{"Perl"}
},
".pro": func(i []byte) (string, bool) {
".pro": func(i []byte) []string {
if pro_Prolog_Matcher_0.Match(i) {
return "Prolog", true
return []string{"Prolog"}
} else if pro_INI_Matcher_0.Match(i) {
return "INI", true
return []string{"INI"}
} else if pro_QMake_Matcher_0.Match(i) && pro_QMake_Matcher_1.Match(i) {
return "QMake", true
return []string{"QMake"}
} else if pro_IDL_Matcher_0.Match(i) {
return "IDL", true
return []string{"IDL"}
}
return OtherLanguage, false
return nil
},
".props": func(i []byte) (string, bool) {
".props": func(i []byte) []string {
if props_XML_Matcher_0.Match(i) {
return "XML", true
return []string{"XML"}
} else if props_INI_Matcher_0.Match(i) {
return "INI", true
return []string{"INI"}
}
return OtherLanguage, false
return nil
},
".r": func(i []byte) (string, bool) {
".r": func(i []byte) []string {
if r_Rebol_Matcher_0.Match(i) {
return "Rebol", true
return []string{"Rebol"}
} else if r_R_Matcher_0.Match(i) {
return "R", true
return []string{"R"}
}
return OtherLanguage, false
return nil
},
".rno": func(i []byte) (string, bool) {
".rno": func(i []byte) []string {
if rno_RUNOFF_Matcher_0.Match(i) {
return "RUNOFF", true
return []string{"RUNOFF"}
} else if rno_Roff_Matcher_0.Match(i) {
return "Roff", true
return []string{"Roff"}
}
return OtherLanguage, false
return nil
},
".rpy": func(i []byte) (string, bool) {
".rpy": func(i []byte) []string {
if rpy_Python_Matcher_0.Match(i) {
return "Python", true
return []string{"Python"}
}
return "Ren'Py", true
return []string{"Ren'Py"}
},
".rs": func(i []byte) (string, bool) {
".rs": func(i []byte) []string {
if rs_Rust_Matcher_0.Match(i) {
return "Rust", true
return []string{"Rust"}
} else if rs_RenderScript_Matcher_0.Match(i) {
return "RenderScript", true
return []string{"RenderScript"}
}
return OtherLanguage, false
return nil
},
".sc": func(i []byte) (string, bool) {
".sc": func(i []byte) []string {
if sc_SuperCollider_Matcher_0.Match(i) || sc_SuperCollider_Matcher_1.Match(i) || sc_SuperCollider_Matcher_2.Match(i) {
return "SuperCollider", true
return []string{"SuperCollider"}
} else if sc_Scala_Matcher_0.Match(i) || sc_Scala_Matcher_1.Match(i) || sc_Scala_Matcher_2.Match(i) {
return "Scala", true
return []string{"Scala"}
}
return OtherLanguage, false
return nil
},
".sql": func(i []byte) (string, bool) {
".sql": func(i []byte) []string {
if sql_PLpgSQL_Matcher_0.Match(i) || sql_PLpgSQL_Matcher_1.Match(i) || sql_PLpgSQL_Matcher_2.Match(i) {
return "PLpgSQL", true
return []string{"PLpgSQL"}
} else if sql_SQLPL_Matcher_0.Match(i) || sql_SQLPL_Matcher_1.Match(i) {
return "SQLPL", true
return []string{"SQLPL"}
} else if sql_PLSQL_Matcher_0.Match(i) || sql_PLSQL_Matcher_1.Match(i) {
return "PLSQL", true
return []string{"PLSQL"}
} else if sql_SQL_Matcher_0.Match(i) {
return "SQL", true
return []string{"SQL"}
}
return OtherLanguage, false
return nil
},
".srt": func(i []byte) (string, bool) {
".srt": func(i []byte) []string {
if srt_SubRipText_Matcher_0.Match(i) {
return "SubRip Text", true
return []string{"SubRip Text"}
}
return OtherLanguage, false
return nil
},
".t": func(i []byte) (string, bool) {
".t": func(i []byte) []string {
if t_Turing_Matcher_0.Match(i) {
return "Turing", true
return []string{"Turing"}
} else if t_Perl6_Matcher_0.Match(i) {
return "Perl6", true
return []string{"Perl6"}
} else if t_Perl_Matcher_0.Match(i) {
return "Perl", true
return []string{"Perl"}
}
return OtherLanguage, false
return nil
},
".toc": func(i []byte) (string, bool) {
".toc": func(i []byte) []string {
if toc_WorldofWarcraftAddonData_Matcher_0.Match(i) {
return "World of Warcraft Addon Data", true
return []string{"World of Warcraft Addon Data"}
} else if toc_TeX_Matcher_0.Match(i) {
return "TeX", true
return []string{"TeX"}
}
return OtherLanguage, false
return nil
},
".ts": func(i []byte) (string, bool) {
".ts": func(i []byte) []string {
if ts_XML_Matcher_0.Match(i) {
return "XML", true
return []string{"XML"}
}
return "TypeScript", true
return []string{"TypeScript"}
},
".tst": func(i []byte) (string, bool) {
".tst": func(i []byte) []string {
if tst_GAP_Matcher_0.Match(i) {
return "GAP", true
return []string{"GAP"}
}
return "Scilab", true
return []string{"Scilab"}
},
".tsx": func(i []byte) (string, bool) {
".tsx": func(i []byte) []string {
if tsx_TypeScript_Matcher_0.Match(i) {
return "TypeScript", true
return []string{"TypeScript"}
} else if tsx_XML_Matcher_0.Match(i) {
return "XML", true
return []string{"XML"}
}
return OtherLanguage, false
return nil
},
}

View File

@ -2,7 +2,7 @@ package slinguist
// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/simple-linguist.v1/internal/code-generator
// THIS FILE SHOULD NOT BE EDITED BY HAND
// Extracted from github/linguist commit: 60f864a138650dd17fafc94814be9ee2d3aaef8c
// Extracted from github/linguist commit: b6460f8ed6b249281ada099ca28bd8f1230b8892
import "gopkg.in/toqueteos/substring.v1"

View File

@ -2,7 +2,7 @@ package slinguist
// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/simple-linguist.v1/internal/code-generator
// THIS FILE SHOULD NOT BE EDITED BY HAND
// Extracted from github/linguist commit: 60f864a138650dd17fafc94814be9ee2d3aaef8c
// Extracted from github/linguist commit: b6460f8ed6b249281ada099ca28bd8f1230b8892
var languagesByExtension = map[string][]string{
".1": {"Roff"},
@ -926,6 +926,8 @@ var languagesByExtension = map[string][]string{
".vw": {"PLSQL"},
".vxml": {"XML"},
".w": {"CWeb"},
".wast": {"WebAssembly"},
".wat": {"WebAssembly"},
".watchr": {"Ruby"},
".webidl": {"WebIDL"},
".weechatlog": {"IRC log"},
@ -1419,6 +1421,7 @@ var extensionsByLanguage = map[string][]string{
"Wavefront Material": {".mtl"},
"Wavefront Object": {".obj"},
"Web Ontology Language": {".owl"},
"WebAssembly": {".wast", ".wat"},
"WebIDL": {".webidl"},
"World of Warcraft Addon Data": {".toc"},
"X10": {".x10"},

View File

@ -2,141 +2,142 @@ package slinguist
// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/simple-linguist.v1/internal/code-generator
// THIS FILE SHOULD NOT BE EDITED BY HAND
// Extracted from github/linguist commit: 60f864a138650dd17fafc94814be9ee2d3aaef8c
// Extracted from github/linguist commit: b6460f8ed6b249281ada099ca28bd8f1230b8892
var languagesByFilename = map[string]string{
".Rprofile": "R",
".XCompose": "XCompose",
".abbrev_defs": "Emacs Lisp",
".arcconfig": "JSON",
".babelrc": "JSON5",
".bash_history": "Shell",
".bash_logout": "Shell",
".bash_profile": "Shell",
".bashrc": "Shell",
".clang-format": "YAML",
".classpath": "XML",
".emacs": "Emacs Lisp",
".emacs.desktop": "Emacs Lisp",
".factor-boot-rc": "Factor",
".factor-rc": "Factor",
".gclient": "Python",
".gnus": "Emacs Lisp",
".irbrc": "Ruby",
".jshintrc": "JSON",
".nvimrc": "Vim script",
".php_cs": "PHP",
".php_cs.dist": "PHP",
".project": "XML",
".pryrc": "Ruby",
".spacemacs": "Emacs Lisp",
".vimrc": "Vim script",
".viper": "Emacs Lisp",
"APKBUILD": "Alpine Abuild",
"App.config": "XML",
"Appraisals": "Ruby",
"BSDmakefile": "Makefile",
"BUCK": "Python",
"BUILD": "Python",
"Berksfile": "Ruby",
"Brewfile": "Ruby",
"Buildfile": "Ruby",
"CMakeLists.txt": "CMake",
"COPYING": "Text",
"COPYRIGHT.regex": "Text",
"Cakefile": "CoffeeScript",
"Cask": "Emacs Lisp",
"Dangerfile": "Ruby",
"Deliverfile": "Ruby",
"Dockerfile": "Dockerfile",
"Emakefile": "Erlang",
"FONTLOG": "Text",
"Fakefile": "Fancy",
"Fastfile": "Ruby",
"GNUmakefile": "Makefile",
"Gemfile": "Ruby",
"Gemfile.lock": "Ruby",
"Guardfile": "Ruby",
"INSTALL": "Text",
"INSTALL.mysql": "Text",
"Jakefile": "JavaScript",
"Jarfile": "Ruby",
"Jenkinsfile": "Groovy",
"Kbuild": "Makefile",
"LICENSE": "Text",
"LICENSE.mysql": "Text",
"Makefile": "Makefile",
"Makefile.am": "Makefile",
"Makefile.boot": "Makefile",
"Makefile.frag": "Makefile",
"Makefile.in": "Makefile",
"Makefile.inc": "Makefile",
"Mavenfile": "Ruby",
"Modulefile": "Puppet",
"NEWS": "Text",
"Notebook": "Jupyter Notebook",
"NuGet.config": "XML",
"Nukefile": "Nu",
"PKGBUILD": "Shell",
"Phakefile": "PHP",
"Podfile": "Ruby",
"Project.ede": "Emacs Lisp",
"Puppetfile": "Ruby",
"README.1ST": "Text",
"README.me": "Text",
"README.mysql": "Text",
"ROOT": "Isabelle ROOT",
"Rakefile": "Ruby",
"Rexfile": "Perl6",
"SConscript": "Python",
"SConstruct": "Python",
"Settings.StyleCop": "XML",
"Slakefile": "LiveScript",
"Snakefile": "Python",
"Snapfile": "Ruby",
"Thorfile": "Ruby",
"Vagrantfile": "Ruby",
"WORKSPACE": "Python",
"Web.Debug.config": "XML",
"Web.Release.config": "XML",
"Web.config": "XML",
"XCompose": "XCompose",
"_emacs": "Emacs Lisp",
"_vimrc": "Vim script",
"abbrev_defs": "Emacs Lisp",
"ant.xml": "Ant Build System",
"build.xml": "Ant Build System",
"buildfile": "Ruby",
"click.me": "Text",
"composer.lock": "JSON",
"configure.ac": "M4Sugar",
"delete.me": "Text",
"descrip.mmk": "Module Management System",
"descrip.mms": "Module Management System",
"gradlew": "Shell",
"gvimrc": "Vim script",
"keep.me": "Text",
"ld.script": "Linker Script",
"makefile": "Makefile",
"makefile.sco": "Makefile",
"mcmod.info": "JSON",
"meson.build": "Meson",
"meson_options.txt": "Meson",
"mix.lock": "Elixir",
"mkfile": "Makefile",
"mmn": "Roff",
"mmt": "Roff",
"nginx.conf": "Nginx",
"nvimrc": "Vim script",
"packages.config": "XML",
"pom.xml": "Maven POM",
"read.me": "Text",
"rebar.config": "Erlang",
"rebar.config.lock": "Erlang",
"rebar.lock": "Erlang",
"riemann.config": "Clojure",
"test.me": "Text",
"vimrc": "Vim script",
"wscript": "Python",
"xcompose": "XCompose",
var languagesByFilename = map[string][]string{
".Rprofile": {"R"},
".XCompose": {"XCompose"},
".abbrev_defs": {"Emacs Lisp"},
".arcconfig": {"JSON"},
".babelrc": {"JSON5"},
".bash_history": {"Shell"},
".bash_logout": {"Shell"},
".bash_profile": {"Shell"},
".bashrc": {"Shell"},
".clang-format": {"YAML"},
".classpath": {"XML"},
".emacs": {"Emacs Lisp"},
".emacs.desktop": {"Emacs Lisp"},
".factor-boot-rc": {"Factor"},
".factor-rc": {"Factor"},
".gclient": {"Python"},
".gnus": {"Emacs Lisp"},
".irbrc": {"Ruby"},
".jshintrc": {"JSON"},
".nvimrc": {"Vim script"},
".php_cs": {"PHP"},
".php_cs.dist": {"PHP"},
".project": {"XML"},
".pryrc": {"Ruby"},
".spacemacs": {"Emacs Lisp"},
".vimrc": {"Vim script"},
".viper": {"Emacs Lisp"},
"APKBUILD": {"Alpine Abuild"},
"App.config": {"XML"},
"Appraisals": {"Ruby"},
"BSDmakefile": {"Makefile"},
"BUCK": {"Python"},
"BUILD": {"Python"},
"Berksfile": {"Ruby"},
"Brewfile": {"Ruby"},
"Buildfile": {"Ruby"},
"CMakeLists.txt": {"CMake"},
"COPYING": {"Text"},
"COPYRIGHT.regex": {"Text"},
"Cakefile": {"CoffeeScript"},
"Cask": {"Emacs Lisp"},
"Dangerfile": {"Ruby"},
"Deliverfile": {"Ruby"},
"Dockerfile": {"Dockerfile"},
"Emakefile": {"Erlang"},
"FONTLOG": {"Text"},
"Fakefile": {"Fancy"},
"Fastfile": {"Ruby"},
"GNUmakefile": {"Makefile"},
"Gemfile": {"Ruby"},
"Gemfile.lock": {"Ruby"},
"Guardfile": {"Ruby"},
"INSTALL": {"Text"},
"INSTALL.mysql": {"Text"},
"Jakefile": {"JavaScript"},
"Jarfile": {"Ruby"},
"Jenkinsfile": {"Groovy"},
"Kbuild": {"Makefile"},
"LICENSE": {"Text"},
"LICENSE.mysql": {"Text"},
"Makefile": {"Makefile"},
"Makefile.am": {"Makefile"},
"Makefile.boot": {"Makefile"},
"Makefile.frag": {"Makefile"},
"Makefile.in": {"Makefile"},
"Makefile.inc": {"Makefile"},
"Makefile.wat": {"Makefile"},
"Mavenfile": {"Ruby"},
"Modulefile": {"Puppet"},
"NEWS": {"Text"},
"Notebook": {"Jupyter Notebook"},
"NuGet.config": {"XML"},
"Nukefile": {"Nu"},
"PKGBUILD": {"Shell"},
"Phakefile": {"PHP"},
"Podfile": {"Ruby"},
"Project.ede": {"Emacs Lisp"},
"Puppetfile": {"Ruby"},
"README.1ST": {"Text"},
"README.me": {"Text"},
"README.mysql": {"Text"},
"ROOT": {"Isabelle ROOT"},
"Rakefile": {"Ruby"},
"Rexfile": {"Perl6"},
"SConscript": {"Python"},
"SConstruct": {"Python"},
"Settings.StyleCop": {"XML"},
"Slakefile": {"LiveScript"},
"Snakefile": {"Python"},
"Snapfile": {"Ruby"},
"Thorfile": {"Ruby"},
"Vagrantfile": {"Ruby"},
"WORKSPACE": {"Python"},
"Web.Debug.config": {"XML"},
"Web.Release.config": {"XML"},
"Web.config": {"XML"},
"XCompose": {"XCompose"},
"_emacs": {"Emacs Lisp"},
"_vimrc": {"Vim script"},
"abbrev_defs": {"Emacs Lisp"},
"ant.xml": {"Ant Build System"},
"build.xml": {"Ant Build System"},
"buildfile": {"Ruby"},
"click.me": {"Text"},
"composer.lock": {"JSON"},
"configure.ac": {"M4Sugar"},
"delete.me": {"Text"},
"descrip.mmk": {"Module Management System"},
"descrip.mms": {"Module Management System"},
"gradlew": {"Shell"},
"gvimrc": {"Vim script"},
"keep.me": {"Text"},
"ld.script": {"Linker Script"},
"makefile": {"Makefile"},
"makefile.sco": {"Makefile"},
"mcmod.info": {"JSON"},
"meson.build": {"Meson"},
"meson_options.txt": {"Meson"},
"mix.lock": {"Elixir"},
"mkfile": {"Makefile"},
"mmn": {"Roff"},
"mmt": {"Roff"},
"nginx.conf": {"Nginx"},
"nvimrc": {"Vim script"},
"packages.config": {"XML"},
"pom.xml": {"Maven POM"},
"read.me": {"Text"},
"rebar.config": {"Erlang"},
"rebar.config.lock": {"Erlang"},
"rebar.lock": {"Erlang"},
"riemann.config": {"Clojure"},
"test.me": {"Text"},
"vimrc": {"Vim script"},
"wscript": {"Python"},
"xcompose": {"XCompose"},
}

File diff suppressed because it is too large Load Diff

View File

@ -8,11 +8,11 @@ import (
"regexp"
)
type languageMatcher func ([]byte) (string, bool)
type languageMatcher func ([]byte) []string
var contentMatchers = map[string]languageMatcher{
{{ range $index, $disambiguator := . -}}
{{ printf "%q" $disambiguator.Extension }}: func(i []byte) (string, bool) {
{{ printf "%q" $disambiguator.Extension }}: func(i []byte) []string {
{{ range $i, $language := $disambiguator.Languages -}}
{{- if not (avoidLanguage $language) }}
@ -20,14 +20,14 @@ var contentMatchers = map[string]languageMatcher{
{{- if gt $i 0 }} else {{ end -}}
if {{- range $j, $heuristic := $language.Heuristics }} {{ $heuristic.Name }}.Match(i)
{{- if lt $j (len $language.LogicRelations) }} {{index $language.LogicRelations $j}} {{- end -}} {{ end }} {
return {{ printf "%q" $language.Language }}, true
return []string{ {{- printf "%q" $language.Language -}} }
}
{{- end -}}
{{- end -}}
{{- end}}
return {{ returnLanguage $disambiguator.Languages }}, {{ safeLanguage $disambiguator.Languages }}
return {{ returnLanguages $disambiguator.Languages | returnStringSlice }}
},
{{ end -}}
}

View File

@ -4,8 +4,8 @@ package slinguist
// THIS FILE SHOULD NOT BE EDITED BY HAND
// Extracted from github/linguist commit: {{ getCommit }}
var languagesByFilename = map[string]string{
{{range $filename, $language := . -}}
"{{ $filename }}": {{- printf "%q" $language -}},
var languagesByFilename = map[string][]string{
{{range $filename, $languages := . -}}
"{{ $filename }}": { {{- formatStringSlice $languages -}} },
{{end -}}
}

View File

@ -2,8 +2,8 @@ package generator
import (
"bytes"
"html/template"
"io"
"text/template"
yaml "gopkg.in/yaml.v2"
)

View File

@ -3,6 +3,7 @@ package generator
import (
"bytes"
"io"
"strings"
"text/template"
yaml "gopkg.in/yaml.v2"
@ -25,20 +26,21 @@ func Filenames(data []byte, filenamesTmplPath, filenamesTmplName, commit string)
return buf.Bytes(), nil
}
func buildFilenameLanguageMap(languages map[string]*languageInfo) map[string]string {
filenameLangMap := make(map[string]string)
func buildFilenameLanguageMap(languages map[string]*languageInfo) map[string][]string {
filenameLangMap := make(map[string][]string)
for lang, langInfo := range languages {
for _, filename := range langInfo.Filenames {
filenameLangMap[filename] = lang
filenameLangMap[filename] = append(filenameLangMap[filename], lang)
}
}
return filenameLangMap
}
func executeFilenamesTemplate(out io.Writer, languagesByFilename map[string]string, filenamesTmplPath, filenamesTmpl, commit string) error {
func executeFilenamesTemplate(out io.Writer, languagesByFilename map[string][]string, filenamesTmplPath, filenamesTmpl, commit string) error {
fmap := template.FuncMap{
"getCommit": func() string { return commit },
"getCommit": func() string { return commit },
"formatStringSlice": func(slice []string) string { return `"` + strings.Join(slice, `","`) + `"` },
}
t := template.Must(template.New(filenamesTmpl).Funcs(fmap).ParseFiles(filenamesTmplPath))

View File

@ -24,6 +24,8 @@ func Heuristics(heuristics []byte, contentTmplPath, contentTmplName, commit stri
}
return buf.Bytes(), nil
// fmt.Println(string(buf.Bytes()))
// return nil, nil
}
const unknownLanguage = "OtherLanguage"
@ -417,9 +419,15 @@ func executeContentTemplate(out io.Writer, disambiguators []*disambiguator, cont
fmap := template.FuncMap{
"getCommit": func() string { return commit },
"getAllHeuristics": getAllHeuristics,
"returnLanguage": returnLanguage,
"safeLanguage": safeLanguage,
"avoidLanguage": avoidLanguage,
"returnStringSlice": func(slice []string) string {
if len(slice) == 0 {
return "nil"
}
return `[]string{` + strings.Join(slice, `, `) + `}`
},
"returnLanguages": returnLanguages,
"avoidLanguage": avoidLanguage,
}
t := template.Must(template.New(contentTmpl).Funcs(fmap).ParseFiles(contentTmplPath))
@ -458,18 +466,7 @@ func containsInvalidRegexp(reg string) bool {
return strings.Contains(reg, `(?<`) || strings.Contains(reg, `\1`)
}
func returnLanguage(langsHeuristics []*languageHeuristics) string {
lang, _ := returnLangAndSafe(langsHeuristics)
return lang
}
func safeLanguage(langsHeuristics []*languageHeuristics) bool {
_, safe := returnLangAndSafe(langsHeuristics)
return safe
}
func returnLangAndSafe(langsHeuristics []*languageHeuristics) (string, bool) {
// at the moment, only returns one string although might be exists several language to return as a []string.
func returnLanguages(langsHeuristics []*languageHeuristics) []string {
langs := make([]string, 0)
for _, langHeu := range langsHeuristics {
if len(langHeu.Heuristics) == 0 {
@ -477,12 +474,5 @@ func returnLangAndSafe(langsHeuristics []*languageHeuristics) (string, bool) {
}
}
lang := unknownLanguage
safe := false
if len(langs) != 0 {
lang = langs[0]
safe = len(langs) == 1
}
return lang, safe
return langs
}

View File

@ -8,102 +8,102 @@ import (
"regexp"
)
type languageMatcher func([]byte) (string, bool)
type languageMatcher func([]byte) []string
var contentMatchers = map[string]languageMatcher{
".asc": func(i []byte) (string, bool) {
".asc": func(i []byte) []string {
if asc_PublicKey_Matcher_0.Match(i) {
return "Public Key", true
return []string{"Public Key"}
} else if asc_AsciiDoc_Matcher_0.Match(i) {
return "AsciiDoc", true
return []string{"AsciiDoc"}
} else if asc_AGSScript_Matcher_0.Match(i) {
return "AGS Script", true
return []string{"AGS Script"}
}
return OtherLanguage, false
return nil
},
".f": func(i []byte) (string, bool) {
".f": func(i []byte) []string {
if f_Forth_Matcher_0.Match(i) {
return "Forth", true
return []string{"Forth"}
} else if f_FilebenchWML_Matcher_0.Match(i) {
return "Filebench WML", true
return []string{"Filebench WML"}
} else if f_FORTRAN_Matcher_0.Match(i) {
return "FORTRAN", true
return []string{"FORTRAN"}
}
return OtherLanguage, false
return nil
},
".h": func(i []byte) (string, bool) {
".h": func(i []byte) []string {
if h_ObjectiveDashC_Matcher_0.Match(i) {
return "Objective-C", true
return []string{"Objective-C"}
} else if h_CPlusPlus_Matcher_0.Match(i) || h_CPlusPlus_Matcher_1.Match(i) || h_CPlusPlus_Matcher_2.Match(i) || h_CPlusPlus_Matcher_3.Match(i) || h_CPlusPlus_Matcher_4.Match(i) || h_CPlusPlus_Matcher_5.Match(i) || h_CPlusPlus_Matcher_6.Match(i) {
return "C++", true
return []string{"C++"}
}
return OtherLanguage, false
return nil
},
".lsp": func(i []byte) (string, bool) {
".lsp": func(i []byte) []string {
if lsp_CommonLisp_Matcher_0.Match(i) {
return "Common Lisp", true
return []string{"Common Lisp"}
} else if lsp_NewLisp_Matcher_0.Match(i) {
return "NewLisp", true
return []string{"NewLisp"}
}
return OtherLanguage, false
return nil
},
".lisp": func(i []byte) (string, bool) {
".lisp": func(i []byte) []string {
if lisp_CommonLisp_Matcher_0.Match(i) {
return "Common Lisp", true
return []string{"Common Lisp"}
} else if lisp_NewLisp_Matcher_0.Match(i) {
return "NewLisp", true
return []string{"NewLisp"}
}
return OtherLanguage, false
return nil
},
".md": func(i []byte) (string, bool) {
".md": func(i []byte) []string {
if md_Markdown_Matcher_0.Match(i) || md_Markdown_Matcher_1.Match(i) {
return "Markdown", true
return []string{"Markdown"}
} else if md_GCCmachinedescription_Matcher_0.Match(i) {
return "GCC machine description", true
return []string{"GCC machine description"}
}
return "Markdown", true
return []string{"Markdown"}
},
".ms": func(i []byte) (string, bool) {
".ms": func(i []byte) []string {
if ms_Groff_Matcher_0.Match(i) {
return "Groff", true
return []string{"Groff"}
}
return "MAXScript", true
return []string{"MAXScript"}
},
".mod": func(i []byte) (string, bool) {
".mod": func(i []byte) []string {
if mod_XML_Matcher_0.Match(i) {
return "XML", true
return []string{"XML"}
} else if mod_ModulaDash2_Matcher_0.Match(i) || mod_ModulaDash2_Matcher_1.Match(i) {
return "Modula-2", true
return []string{"Modula-2"}
}
return "Linux Kernel Module", false
return []string{"Linux Kernel Module", "AMPL"}
},
".pro": func(i []byte) (string, bool) {
".pro": func(i []byte) []string {
if pro_Prolog_Matcher_0.Match(i) {
return "Prolog", true
return []string{"Prolog"}
} else if pro_INI_Matcher_0.Match(i) {
return "INI", true
return []string{"INI"}
} else if pro_QMake_Matcher_0.Match(i) && pro_QMake_Matcher_1.Match(i) {
return "QMake", true
return []string{"QMake"}
} else if pro_IDL_Matcher_0.Match(i) {
return "IDL", true
return []string{"IDL"}
}
return OtherLanguage, false
return nil
},
".rpy": func(i []byte) (string, bool) {
".rpy": func(i []byte) []string {
if rpy_Python_Matcher_0.Match(i) {
return "Python", true
return []string{"Python"}
}
return "Ren'Py", true
return []string{"Ren'Py"}
},
}

View File

@ -4,9 +4,9 @@ package slinguist
// THIS FILE SHOULD NOT BE EDITED BY HAND
// Extracted from github/linguist commit: 0123456789abcdef0123456789abcdef01234567
var languagesByFilename = map[string]string{
"APKBUILD": "Alpine Abuild",
"CMakeLists.txt": "CMake",
"Cakefile": "CoffeeScript",
"mix.lock": "Elixir",
var languagesByFilename = map[string][]string{
"APKBUILD": {"Alpine Abuild"},
"CMakeLists.txt": {"CMake"},
"Cakefile": {"CoffeeScript"},
"mix.lock": {"Elixir"},
}

View File

@ -2,8 +2,8 @@ package generator
import (
"bytes"
"html/template"
"io"
"text/template"
yaml "gopkg.in/yaml.v2"
)

View File

@ -2,7 +2,7 @@ package slinguist
// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/simple-linguist.v1/internal/code-generator
// THIS FILE SHOULD NOT BE EDITED BY HAND
// Extracted from github/linguist commit: 60f864a138650dd17fafc94814be9ee2d3aaef8c
// Extracted from github/linguist commit: b6460f8ed6b249281ada099ca28bd8f1230b8892
var languagesByInterpreter = map[string][]string{
"Rscript": {"R"},

View File

@ -5,38 +5,60 @@ import (
"regexp"
)
func getLanguageByModeline(content []byte) (lang string, safe bool) {
const (
searchScope = 5
)
// GetLanguagesByModeline returns a slice of possible languages for the given content, filename will be ignored.
// It accomplish the signature to be a Strategy type.
func GetLanguagesByModeline(filename string, content []byte) []string {
headFoot := getHeaderAndFooter(content)
var languages []string
for _, getLang := range modelinesFunc {
lang, safe = getLang(headFoot)
if safe {
languages = getLang("", headFoot)
if len(languages) > 0 {
break
}
}
return
return languages
}
func getHeaderAndFooter(content []byte) []byte {
const (
searchScope = 5
eol = "\n"
)
if bytes.Count(content, []byte(eol)) < 2*searchScope {
if bytes.Count(content, []byte("\n")) < 2*searchScope {
return content
}
splitted := bytes.Split(content, []byte(eol))
header := splitted[:searchScope]
footer := splitted[len(splitted)-searchScope:]
headerAndFooter := append(header, footer...)
return bytes.Join(headerAndFooter, []byte(eol))
header := headScope(content, searchScope)
footer := footScope(content, searchScope)
headerAndFooter := make([]byte, 0, len(content[:header])+len(content[footer:]))
headerAndFooter = append(headerAndFooter, content[:header]...)
headerAndFooter = append(headerAndFooter, content[footer:]...)
return headerAndFooter
}
var modelinesFunc = []func(content []byte) (string, bool){
GetLanguageByEmacsModeline,
GetLanguageByVimModeline,
func headScope(content []byte, scope int) (index int) {
for i := 0; i < scope; i++ {
eol := bytes.IndexAny(content, "\n")
content = content[eol+1:]
index += eol
}
return index + scope - 1
}
func footScope(content []byte, scope int) (index int) {
for i := 0; i < scope; i++ {
index = bytes.LastIndexAny(content, "\n")
content = content[:index]
}
return index + 1
}
var modelinesFunc = []func(filename string, content []byte) []string{
GetLanguagesByEmacsModeline,
GetLanguagesByVimModeline,
}
var (
@ -49,9 +71,20 @@ var (
// GetLanguageByEmacsModeline detecs if the content has a emacs modeline and try to get a
// language basing on alias. If couldn't retrieve a valid language, it returns OtherLanguage and false.
func GetLanguageByEmacsModeline(content []byte) (string, bool) {
languages := GetLanguagesByEmacsModeline("", content)
if len(languages) == 0 {
return OtherLanguage, false
}
return languages[0], true
}
// GetLanguagesByEmacsModeline returns a slice of possible languages for the given content, filename will be ignored.
// It accomplish the signature to be a Strategy type.
func GetLanguagesByEmacsModeline(filename string, content []byte) []string {
matched := reEmacsModeline.FindAllSubmatch(content, -1)
if matched == nil {
return OtherLanguage, false
return nil
}
// only take the last matched line, discard previous lines
@ -64,22 +97,38 @@ func GetLanguageByEmacsModeline(content []byte) (string, bool) {
alias = string(lastLineMatched)
}
return GetLanguageByAlias(alias)
language, ok := GetLanguageByAlias(alias)
if !ok {
return nil
}
return []string{language}
}
// GetLanguageByVimModeline detecs if the content has a vim modeline and try to get a
// language basing on alias. If couldn't retrieve a valid language, it returns OtherLanguage and false.
func GetLanguageByVimModeline(content []byte) (string, bool) {
languages := GetLanguagesByVimModeline("", content)
if len(languages) == 0 {
return OtherLanguage, false
}
return languages[0], true
}
// GetLanguagesByVimModeline returns a slice of possible languages for the given content, filename will be ignored.
// It accomplish the signature to be a Strategy type.
func GetLanguagesByVimModeline(filename string, content []byte) []string {
matched := reVimModeline.FindAllSubmatch(content, -1)
if matched == nil {
return OtherLanguage, false
return nil
}
// only take the last matched line, discard previous lines
lastLineMatched := matched[len(matched)-1][1]
matchedAlias := reVimLang.FindAllSubmatch(lastLineMatched, -1)
if matchedAlias == nil {
return OtherLanguage, false
return nil
}
alias := string(matchedAlias[0][1])
@ -90,11 +139,15 @@ func GetLanguageByVimModeline(content []byte) (string, bool) {
for _, match := range matchedAlias {
otherAlias := string(match[1])
if otherAlias != alias {
alias = OtherLanguage
break
return nil
}
}
}
return GetLanguageByAlias(alias)
language, ok := GetLanguageByAlias(alias)
if !ok {
return nil
}
return []string{language}
}

View File

@ -14,15 +14,11 @@ var (
pythonVersion = regexp.MustCompile(`python\d\.\d+`)
)
func getLanguageByShebang(content []byte) (lang string, safe bool) {
// GetLanguagesByShebang returns a slice of possible languages for the given content, filename will be ignored.
// It accomplish the signature to be a Strategy type.
func GetLanguagesByShebang(filename string, content []byte) (languages []string) {
interpreter := getInterpreter(content)
lang = OtherLanguage
if langs, ok := languagesByInterpreter[interpreter]; ok {
lang = langs[0]
safe = len(langs) == 1
}
return
return languagesByInterpreter[interpreter]
}
func getInterpreter(data []byte) (interpreter string) {

View File

@ -2,7 +2,7 @@ package slinguist
// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/simple-linguist.v1/internal/code-generator
// THIS FILE SHOULD NOT BE EDITED BY HAND
// Extracted from github/linguist commit: 60f864a138650dd17fafc94814be9ee2d3aaef8c
// Extracted from github/linguist commit: b6460f8ed6b249281ada099ca28bd8f1230b8892
var languagesType = map[string]Type{
"1C Enterprise": Programming,
@ -427,6 +427,7 @@ var languagesType = map[string]Type{
"Wavefront Material": Data,
"Wavefront Object": Data,
"Web Ontology Language": Markup,
"WebAssembly": Programming,
"WebIDL": Programming,
"World of Warcraft Addon Data": Data,
"X10": Programming,

View File

@ -4,8 +4,6 @@ import (
"bytes"
"path/filepath"
"strings"
"gopkg.in/toqueteos/substring.v1"
)
var (
@ -46,16 +44,12 @@ func IsDotFile(path string) bool {
// IsVendor returns whether or not path is a vendor path.
func IsVendor(path string) bool {
return findIndex(path, vendorMatchers) >= 0
return vendorMatchers.Match(path)
}
// IsDocumentation returns whether or not path is a documentation path.
func IsDocumentation(path string) bool {
return findIndex(path, documentationMatchers) >= 0
}
func findIndex(path string, matchers substring.StringsMatcher) int {
return matchers.MatchIndex(path)
return documentationMatchers.Match(path)
}
const sniffLen = 8000

View File

@ -2,7 +2,7 @@ package slinguist
// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/simple-linguist.v1/internal/code-generator
// THIS FILE SHOULD NOT BE EDITED BY HAND
// Extracted from github/linguist commit: 60f864a138650dd17fafc94814be9ee2d3aaef8c
// Extracted from github/linguist commit: b6460f8ed6b249281ada099ca28bd8f1230b8892
import "gopkg.in/toqueteos/substring.v1"
@ -41,20 +41,20 @@ var vendorMatchers = substring.Or(
substring.Regexp(`3rd[-_]?party/`),
substring.Regexp(`vendors?/`),
substring.Regexp(`extern(al)?/`),
substring.Regexp(`(^|/)[Vv]&#43;endor/`),
substring.Regexp(`(^|/)[Vv]+endor/`),
substring.Regexp(`^debian/`),
substring.Regexp(`run.n$`),
substring.Regexp(`bootstrap-datepicker/`),
substring.Regexp(`(^|/)jquery([^.]*)\.js$`),
substring.Regexp(`(^|/)jquery\-\d\.\d&#43;(\.\d&#43;)?\.js$`),
substring.Regexp(`(^|/)jquery\-ui(\-\d\.\d&#43;(\.\d&#43;)?)?(\.\w&#43;)?\.(js|css)$`),
substring.Regexp(`(^|/)jquery\-\d\.\d+(\.\d+)?\.js$`),
substring.Regexp(`(^|/)jquery\-ui(\-\d\.\d+(\.\d+)?)?(\.\w+)?\.(js|css)$`),
substring.Regexp(`(^|/)jquery\.(ui|effects)\.([^.]*)\.(js|css)$`),
substring.Regexp(`jquery.fn.gantt.js`),
substring.Regexp(`jquery.fancybox.(js|css)`),
substring.Regexp(`fuelux.js`),
substring.Regexp(`(^|/)jquery\.fileupload(-\w&#43;)?\.js$`),
substring.Regexp(`(^|/)slick\.\w&#43;.js$`),
substring.Regexp(`(^|/)Leaflet\.Coordinates-\d&#43;\.\d&#43;\.\d&#43;\.src\.js$`),
substring.Regexp(`(^|/)jquery\.fileupload(-\w+)?\.js$`),
substring.Regexp(`(^|/)slick\.\w+.js$`),
substring.Regexp(`(^|/)Leaflet\.Coordinates-\d+\.\d+\.\d+\.src\.js$`),
substring.Regexp(`leaflet.draw-src.js`),
substring.Regexp(`leaflet.draw.css`),
substring.Regexp(`Control.FullScreen.css`),
@ -68,7 +68,7 @@ var vendorMatchers = substring.Or(
substring.Regexp(`(^|/)controls\.js$`),
substring.Regexp(`(^|/)dragdrop\.js$`),
substring.Regexp(`(.*?)\.d\.ts$`),
substring.Regexp(`(^|/)mootools([^.]*)\d&#43;\.\d&#43;.\d&#43;([^.]*)\.js$`),
substring.Regexp(`(^|/)mootools([^.]*)\d+\.\d+.\d+([^.]*)\.js$`),
substring.Regexp(`(^|/)dojo\.js$`),
substring.Regexp(`(^|/)MochiKit\.js$`),
substring.Regexp(`(^|/)yahoo-([^.]*)\.js$`),
@ -80,16 +80,16 @@ var vendorMatchers = substring.Or(
substring.Regexp(`(^|/)fontello(.*?)\.css$`),
substring.Regexp(`(^|/)MathJax/`),
substring.Regexp(`(^|/)Chart\.js$`),
substring.Regexp(`(^|/)[Cc]ode[Mm]irror/(\d&#43;\.\d&#43;/)?(lib|mode|theme|addon|keymap|demo)`),
substring.Regexp(`(^|/)[Cc]ode[Mm]irror/(\d+\.\d+/)?(lib|mode|theme|addon|keymap|demo)`),
substring.Regexp(`(^|/)shBrush([^.]*)\.js$`),
substring.Regexp(`(^|/)shCore\.js$`),
substring.Regexp(`(^|/)shLegacy\.js$`),
substring.Regexp(`(^|/)angular([^.]*)\.js$`),
substring.Regexp(`(^|\/)d3(\.v\d&#43;)?([^.]*)\.js$`),
substring.Regexp(`(^|\/)d3(\.v\d+)?([^.]*)\.js$`),
substring.Regexp(`(^|/)react(-[^.]*)?\.js$`),
substring.Regexp(`(^|/)modernizr\-\d\.\d&#43;(\.\d&#43;)?\.js$`),
substring.Regexp(`(^|/)modernizr\.custom\.\d&#43;\.js$`),
substring.Regexp(`(^|/)knockout-(\d&#43;\.){3}(debug\.)?js$`),
substring.Regexp(`(^|/)modernizr\-\d\.\d+(\.\d+)?\.js$`),
substring.Regexp(`(^|/)modernizr\.custom\.\d+\.js$`),
substring.Regexp(`(^|/)knockout-(\d+\.){3}(debug\.)?js$`),
substring.Regexp(`(^|/)docs?/_?(build|themes?|templates?|static)/`),
substring.Regexp(`(^|/)admin_media/`),
substring.Regexp(`(^|/)env/`),
@ -117,7 +117,7 @@ var vendorMatchers = substring.Or(
substring.Regexp(`(^|/)jquery([^.]*)\.validate(\.unobtrusive)?\.js$`),
substring.Regexp(`(^|/)jquery([^.]*)\.unobtrusive\-ajax\.js$`),
substring.Regexp(`(^|/)[Mm]icrosoft([Mm]vc)?([Aa]jax|[Vv]alidation)(\.debug)?\.js$`),
substring.Regexp(`^[Pp]ackages\/.&#43;\.\d&#43;\/`),
substring.Regexp(`^[Pp]ackages\/.+\.\d+\/`),
substring.Regexp(`(^|/)extjs/.*?\.js$`),
substring.Regexp(`(^|/)extjs/.*?\.xml$`),
substring.Regexp(`(^|/)extjs/.*?\.txt$`),