mirror of
https://github.com/ralsina/tartrazine.git
synced 2024-11-10 13:32:24 +00:00
Merge pull request #28 from mcarmonaa/classifier
Added Classifier as the last step in detection strategies
This commit is contained in:
commit
d9c959522f
7
alias.go
7
alias.go
@ -2,11 +2,12 @@ package slinguist
|
|||||||
|
|
||||||
import "strings"
|
import "strings"
|
||||||
|
|
||||||
// GetLanguageByAlias returns the language related to the given alias or Otherlanguage otherwise.
|
// GetLanguageByAlias returns the language related to the given alias and ok set to true,
|
||||||
func GetLanguageByAlias(alias string) (lang string) {
|
// or Otherlanguage and ok set to false otherwise.
|
||||||
|
func GetLanguageByAlias(alias string) (lang string, ok bool) {
|
||||||
a := strings.Split(alias, `,`)[0]
|
a := strings.Split(alias, `,`)[0]
|
||||||
a = strings.ToLower(a)
|
a = strings.ToLower(a)
|
||||||
lang, ok := languagesByAlias[a]
|
lang, ok = languagesByAlias[a]
|
||||||
if !ok {
|
if !ok {
|
||||||
lang = OtherLanguage
|
lang = OtherLanguage
|
||||||
}
|
}
|
||||||
|
@ -6,21 +6,23 @@ func (s *TSuite) TestGetLanguageByAlias(c *C) {
|
|||||||
tests := []struct {
|
tests := []struct {
|
||||||
alias string
|
alias string
|
||||||
expectedLang string
|
expectedLang string
|
||||||
|
expectedOk bool
|
||||||
}{
|
}{
|
||||||
{alias: "BestLanguageEver", expectedLang: OtherLanguage},
|
{alias: "BestLanguageEver", expectedLang: OtherLanguage, expectedOk: false},
|
||||||
{alias: "aspx-vb", expectedLang: "ASP"},
|
{alias: "aspx-vb", expectedLang: "ASP", expectedOk: true},
|
||||||
{alias: "C++", expectedLang: "C++"},
|
{alias: "C++", expectedLang: "C++", expectedOk: true},
|
||||||
{alias: "c++", expectedLang: "C++"},
|
{alias: "c++", expectedLang: "C++", expectedOk: true},
|
||||||
{alias: "objc", expectedLang: "Objective-C"},
|
{alias: "objc", expectedLang: "Objective-C", expectedOk: true},
|
||||||
{alias: "golang", expectedLang: "Go"},
|
{alias: "golang", expectedLang: "Go", expectedOk: true},
|
||||||
{alias: "GOLANG", expectedLang: "Go"},
|
{alias: "GOLANG", expectedLang: "Go", expectedOk: true},
|
||||||
{alias: "bsdmake", expectedLang: "Makefile"},
|
{alias: "bsdmake", expectedLang: "Makefile", expectedOk: true},
|
||||||
{alias: "xhTmL", expectedLang: "HTML"},
|
{alias: "xhTmL", expectedLang: "HTML", expectedOk: true},
|
||||||
{alias: "python", expectedLang: "Python"},
|
{alias: "python", expectedLang: "Python", expectedOk: true},
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, test := range tests {
|
for _, test := range tests {
|
||||||
lang := GetLanguageByAlias(test.alias)
|
lang, ok := GetLanguageByAlias(test.alias)
|
||||||
c.Assert(lang, Equals, test.expectedLang)
|
c.Assert(lang, Equals, test.expectedLang)
|
||||||
|
c.Assert(ok, Equals, test.expectedOk)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2,7 +2,7 @@ package slinguist
|
|||||||
|
|
||||||
// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/simple-linguist.v1/internal/code-generator
|
// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/simple-linguist.v1/internal/code-generator
|
||||||
// THIS FILE SHOULD NOT BE EDITED BY HAND
|
// THIS FILE SHOULD NOT BE EDITED BY HAND
|
||||||
// Extracted from github/linguist commit: dae33dc2b20cddc85d1300435c3be7118a7115a9
|
// Extracted from github/linguist commit: 60f864a138650dd17fafc94814be9ee2d3aaef8c
|
||||||
|
|
||||||
// languagesByAlias keeps alias for different languages and use the name of the languages as a alias too. All the
|
// languagesByAlias keeps alias for different languages and use the name of the languages as a alias too. All the
|
||||||
// keys (alias or not) are written in lower case and the whitespaces has been replaced by underscores.
|
// keys (alias or not) are written in lower case and the whitespaces has been replaced by underscores.
|
||||||
@ -107,6 +107,7 @@ var languagesByAlias = map[string]string{
|
|||||||
"clipper": "xBase",
|
"clipper": "xBase",
|
||||||
"clips": "CLIPS",
|
"clips": "CLIPS",
|
||||||
"clojure": "Clojure",
|
"clojure": "Clojure",
|
||||||
|
"closure_templates": "Closure Templates",
|
||||||
"cmake": "CMake",
|
"cmake": "CMake",
|
||||||
"cobol": "COBOL",
|
"cobol": "COBOL",
|
||||||
"coffee": "CoffeeScript",
|
"coffee": "CoffeeScript",
|
||||||
@ -137,6 +138,7 @@ var languagesByAlias = map[string]string{
|
|||||||
"csv": "CSV",
|
"csv": "CSV",
|
||||||
"cucumber": "Gherkin",
|
"cucumber": "Gherkin",
|
||||||
"cuda": "Cuda",
|
"cuda": "Cuda",
|
||||||
|
"cweb": "CWeb",
|
||||||
"cycript": "Cycript",
|
"cycript": "Cycript",
|
||||||
"cython": "Cython",
|
"cython": "Cython",
|
||||||
"d": "D",
|
"d": "D",
|
||||||
@ -281,6 +283,7 @@ var languagesByAlias = map[string]string{
|
|||||||
"jflex": "JFlex",
|
"jflex": "JFlex",
|
||||||
"jison": "Jison",
|
"jison": "Jison",
|
||||||
"jison_lex": "Jison Lex",
|
"jison_lex": "Jison Lex",
|
||||||
|
"jolie": "Jolie",
|
||||||
"jruby": "Ruby",
|
"jruby": "Ruby",
|
||||||
"js": "JavaScript",
|
"js": "JavaScript",
|
||||||
"json": "JSON",
|
"json": "JSON",
|
||||||
@ -433,6 +436,7 @@ var languagesByAlias = map[string]string{
|
|||||||
"pascal": "Pascal",
|
"pascal": "Pascal",
|
||||||
"pasm": "Parrot Assembly",
|
"pasm": "Parrot Assembly",
|
||||||
"pawn": "PAWN",
|
"pawn": "PAWN",
|
||||||
|
"pep8": "Pep8",
|
||||||
"perl": "Perl",
|
"perl": "Perl",
|
||||||
"perl6": "Perl6",
|
"perl6": "Perl6",
|
||||||
"php": "PHP",
|
"php": "PHP",
|
||||||
@ -529,6 +533,7 @@ var languagesByAlias = map[string]string{
|
|||||||
"scss": "SCSS",
|
"scss": "SCSS",
|
||||||
"self": "Self",
|
"self": "Self",
|
||||||
"sh": "Shell",
|
"sh": "Shell",
|
||||||
|
"shaderlab": "ShaderLab",
|
||||||
"shell": "Shell",
|
"shell": "Shell",
|
||||||
"shell-script": "Shell",
|
"shell-script": "Shell",
|
||||||
"shellsession": "ShellSession",
|
"shellsession": "ShellSession",
|
||||||
@ -572,6 +577,7 @@ var languagesByAlias = map[string]string{
|
|||||||
"textile": "Textile",
|
"textile": "Textile",
|
||||||
"thrift": "Thrift",
|
"thrift": "Thrift",
|
||||||
"ti_program": "TI Program",
|
"ti_program": "TI Program",
|
||||||
|
"tl": "Type Language",
|
||||||
"tla": "TLA",
|
"tla": "TLA",
|
||||||
"toml": "TOML",
|
"toml": "TOML",
|
||||||
"ts": "TypeScript",
|
"ts": "TypeScript",
|
||||||
@ -579,6 +585,7 @@ var languagesByAlias = map[string]string{
|
|||||||
"turtle": "Turtle",
|
"turtle": "Turtle",
|
||||||
"twig": "Twig",
|
"twig": "Twig",
|
||||||
"txl": "TXL",
|
"txl": "TXL",
|
||||||
|
"type_language": "Type Language",
|
||||||
"typescript": "TypeScript",
|
"typescript": "TypeScript",
|
||||||
"udiff": "Diff",
|
"udiff": "Diff",
|
||||||
"unified_parallel_c": "Unified Parallel C",
|
"unified_parallel_c": "Unified Parallel C",
|
||||||
|
100
classifier.go
Normal file
100
classifier.go
Normal file
@ -0,0 +1,100 @@
|
|||||||
|
package slinguist
|
||||||
|
|
||||||
|
import (
|
||||||
|
"math"
|
||||||
|
|
||||||
|
"gopkg.in/src-d/simple-linguist.v1/internal/tokenizer"
|
||||||
|
)
|
||||||
|
|
||||||
|
// GetLanguageByClassifier takes in a content and a list of candidates, and apply the classifier's Classify method to
|
||||||
|
// get the most probably language. If classifier is null then DefaultClassfier will be used.
|
||||||
|
func GetLanguageByClassifier(content []byte, candidates []string, classifier Classifier) string {
|
||||||
|
if classifier == nil {
|
||||||
|
classifier = DefaultClassifier
|
||||||
|
}
|
||||||
|
|
||||||
|
scores := classifier.Classify(content, candidates)
|
||||||
|
if len(scores) == 0 {
|
||||||
|
return OtherLanguage
|
||||||
|
}
|
||||||
|
|
||||||
|
return getLangugeHigherScore(scores)
|
||||||
|
}
|
||||||
|
|
||||||
|
func getLangugeHigherScore(scores map[string]float64) string {
|
||||||
|
var language string
|
||||||
|
higher := -math.MaxFloat64
|
||||||
|
for lang, score := range scores {
|
||||||
|
if higher < score {
|
||||||
|
language = lang
|
||||||
|
higher = score
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return language
|
||||||
|
}
|
||||||
|
|
||||||
|
// Classifier is the interface that contains the method Classify which is in charge to assign scores to the possibles candidates.
|
||||||
|
// The scores must order the candidates so as the highest score be the most probably language of the content.
|
||||||
|
type Classifier interface {
|
||||||
|
Classify(content []byte, candidates []string) map[string]float64
|
||||||
|
}
|
||||||
|
|
||||||
|
type classifier struct {
|
||||||
|
languagesLogProbabilities map[string]float64
|
||||||
|
tokensLogProbabilities map[string]map[string]float64
|
||||||
|
tokensTotal float64
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *classifier) Classify(content []byte, candidates []string) map[string]float64 {
|
||||||
|
if len(content) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
var languages []string
|
||||||
|
if len(candidates) == 0 {
|
||||||
|
languages = c.knownLangs()
|
||||||
|
} else {
|
||||||
|
languages = make([]string, 0, len(candidates))
|
||||||
|
for _, candidate := range candidates {
|
||||||
|
if lang, ok := GetLanguageByAlias(candidate); ok {
|
||||||
|
languages = append(languages, lang)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
tokens := tokenizer.Tokenize(content)
|
||||||
|
scores := make(map[string]float64, len(languages))
|
||||||
|
for _, language := range languages {
|
||||||
|
scores[language] = c.tokensLogProbability(tokens, language) + c.languagesLogProbabilities[language]
|
||||||
|
}
|
||||||
|
|
||||||
|
return scores
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *classifier) knownLangs() []string {
|
||||||
|
langs := make([]string, 0, len(c.languagesLogProbabilities))
|
||||||
|
for lang := range c.languagesLogProbabilities {
|
||||||
|
langs = append(langs, lang)
|
||||||
|
}
|
||||||
|
|
||||||
|
return langs
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *classifier) tokensLogProbability(tokens []string, language string) float64 {
|
||||||
|
var sum float64
|
||||||
|
for _, token := range tokens {
|
||||||
|
sum += c.tokenProbability(token, language)
|
||||||
|
}
|
||||||
|
|
||||||
|
return sum
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *classifier) tokenProbability(token, language string) float64 {
|
||||||
|
tokenProb, ok := c.tokensLogProbabilities[language][token]
|
||||||
|
if !ok {
|
||||||
|
tokenProb = math.Log(1.000000 / c.tokensTotal)
|
||||||
|
}
|
||||||
|
|
||||||
|
return tokenProb
|
||||||
|
}
|
32
classifier_test.go
Normal file
32
classifier_test.go
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
package slinguist
|
||||||
|
|
||||||
|
import (
|
||||||
|
"io/ioutil"
|
||||||
|
"path/filepath"
|
||||||
|
|
||||||
|
. "gopkg.in/check.v1"
|
||||||
|
)
|
||||||
|
|
||||||
|
func (s *TSuite) TestGetLanguageByClassifier(c *C) {
|
||||||
|
const samples = `.linguist/samples/`
|
||||||
|
test := []struct {
|
||||||
|
filename string
|
||||||
|
candidates []string
|
||||||
|
expectedLang string
|
||||||
|
}{
|
||||||
|
{filename: filepath.Join(samples, "C/blob.c"), candidates: []string{"python", "ruby", "c", "c++"}, expectedLang: "C"},
|
||||||
|
{filename: filepath.Join(samples, "C/blob.c"), candidates: nil, expectedLang: "C"},
|
||||||
|
{filename: filepath.Join(samples, "C/main.c"), candidates: nil, expectedLang: "C"},
|
||||||
|
{filename: filepath.Join(samples, "C/blob.c"), candidates: []string{"python", "ruby", "c++"}, expectedLang: "C++"},
|
||||||
|
{filename: filepath.Join(samples, "C/blob.c"), candidates: []string{"ruby"}, expectedLang: "Ruby"},
|
||||||
|
{filename: filepath.Join(samples, "Python/django-models-base.py"), candidates: []string{"python", "ruby", "c", "c++"}, expectedLang: "Python"},
|
||||||
|
{filename: filepath.Join(samples, "Python/django-models-base.py"), candidates: nil, expectedLang: "Python"},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, test := range test {
|
||||||
|
content, err := ioutil.ReadFile(test.filename)
|
||||||
|
c.Assert(err, Equals, nil)
|
||||||
|
lang := GetLanguageByClassifier(content, test.candidates, nil)
|
||||||
|
c.Assert(lang, Equals, test.expectedLang)
|
||||||
|
}
|
||||||
|
}
|
@ -52,7 +52,11 @@ func GetLanguage(filename string, content []byte) string {
|
|||||||
return lang
|
return lang
|
||||||
}
|
}
|
||||||
|
|
||||||
lang, _ := GetLanguageByContent(filename, content)
|
if lang, safe := GetLanguageByContent(filename, content); safe {
|
||||||
|
return lang
|
||||||
|
}
|
||||||
|
|
||||||
|
lang := GetLanguageByClassifier(content, nil, nil)
|
||||||
return lang
|
return lang
|
||||||
}
|
}
|
||||||
|
|
||||||
|
77
content.go
77
content.go
@ -2,7 +2,7 @@ package slinguist
|
|||||||
|
|
||||||
// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/simple-linguist.v1/internal/code-generator
|
// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/simple-linguist.v1/internal/code-generator
|
||||||
// THIS FILE SHOULD NOT BE EDITED BY HAND
|
// THIS FILE SHOULD NOT BE EDITED BY HAND
|
||||||
// Extracted from github/linguist commit: dae33dc2b20cddc85d1300435c3be7118a7115a9
|
// Extracted from github/linguist commit: 60f864a138650dd17fafc94814be9ee2d3aaef8c
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
@ -117,8 +117,8 @@ var matchers = map[string]languageMatcher{
|
|||||||
return "Forth", true
|
return "Forth", true
|
||||||
} else if f_FilebenchWML_Matcher_0.Match(i) {
|
} else if f_FilebenchWML_Matcher_0.Match(i) {
|
||||||
return "Filebench WML", true
|
return "Filebench WML", true
|
||||||
} else if f_FORTRAN_Matcher_0.Match(i) {
|
} else if f_Fortran_Matcher_0.Match(i) {
|
||||||
return "FORTRAN", true
|
return "Fortran", true
|
||||||
}
|
}
|
||||||
|
|
||||||
return OtherLanguage, false
|
return OtherLanguage, false
|
||||||
@ -126,8 +126,8 @@ var matchers = map[string]languageMatcher{
|
|||||||
".for": func(i []byte) (string, bool) {
|
".for": func(i []byte) (string, bool) {
|
||||||
if for_Forth_Matcher_0.Match(i) {
|
if for_Forth_Matcher_0.Match(i) {
|
||||||
return "Forth", true
|
return "Forth", true
|
||||||
} else if for_FORTRAN_Matcher_0.Match(i) {
|
} else if for_Fortran_Matcher_0.Match(i) {
|
||||||
return "FORTRAN", true
|
return "Fortran", true
|
||||||
}
|
}
|
||||||
|
|
||||||
return OtherLanguage, false
|
return OtherLanguage, false
|
||||||
@ -184,8 +184,8 @@ var matchers = map[string]languageMatcher{
|
|||||||
return "Common Lisp", true
|
return "Common Lisp", true
|
||||||
} else if l_Lex_Matcher_0.Match(i) {
|
} else if l_Lex_Matcher_0.Match(i) {
|
||||||
return "Lex", true
|
return "Lex", true
|
||||||
} else if l_Groff_Matcher_0.Match(i) {
|
} else if l_Roff_Matcher_0.Match(i) {
|
||||||
return "Groff", true
|
return "Roff", true
|
||||||
} else if l_PicoLisp_Matcher_0.Match(i) {
|
} else if l_PicoLisp_Matcher_0.Match(i) {
|
||||||
return "PicoLisp", true
|
return "PicoLisp", true
|
||||||
}
|
}
|
||||||
@ -239,8 +239,8 @@ var matchers = map[string]languageMatcher{
|
|||||||
".md": func(i []byte) (string, bool) {
|
".md": func(i []byte) (string, bool) {
|
||||||
if md_Markdown_Matcher_0.Match(i) || md_Markdown_Matcher_1.Match(i) {
|
if md_Markdown_Matcher_0.Match(i) || md_Markdown_Matcher_1.Match(i) {
|
||||||
return "Markdown", true
|
return "Markdown", true
|
||||||
} else if md_GCCmachinedescription_Matcher_0.Match(i) {
|
} else if md_GCCMachineDescription_Matcher_0.Match(i) {
|
||||||
return "GCC machine description", true
|
return "GCC Machine Description", true
|
||||||
}
|
}
|
||||||
|
|
||||||
return "Markdown", true
|
return "Markdown", true
|
||||||
@ -264,15 +264,15 @@ var matchers = map[string]languageMatcher{
|
|||||||
return "Linux Kernel Module", false
|
return "Linux Kernel Module", false
|
||||||
},
|
},
|
||||||
".ms": func(i []byte) (string, bool) {
|
".ms": func(i []byte) (string, bool) {
|
||||||
if ms_Groff_Matcher_0.Match(i) {
|
if ms_Roff_Matcher_0.Match(i) {
|
||||||
return "Groff", true
|
return "Roff", true
|
||||||
}
|
}
|
||||||
|
|
||||||
return "MAXScript", true
|
return "MAXScript", true
|
||||||
},
|
},
|
||||||
".n": func(i []byte) (string, bool) {
|
".n": func(i []byte) (string, bool) {
|
||||||
if n_Groff_Matcher_0.Match(i) {
|
if n_Roff_Matcher_0.Match(i) {
|
||||||
return "Groff", true
|
return "Roff", true
|
||||||
} else if n_Nemerle_Matcher_0.Match(i) {
|
} else if n_Nemerle_Matcher_0.Match(i) {
|
||||||
return "Nemerle", true
|
return "Nemerle", true
|
||||||
}
|
}
|
||||||
@ -314,19 +314,10 @@ var matchers = map[string]languageMatcher{
|
|||||||
return OtherLanguage, false
|
return OtherLanguage, false
|
||||||
},
|
},
|
||||||
".pm": func(i []byte) (string, bool) {
|
".pm": func(i []byte) (string, bool) {
|
||||||
if pm_Perl_Matcher_0.Match(i) {
|
if pm_Perl6_Matcher_0.Match(i) {
|
||||||
return "Perl", true
|
|
||||||
} else if pm_Perl6_Matcher_0.Match(i) {
|
|
||||||
return "Perl6", true
|
return "Perl6", true
|
||||||
}
|
} else if pm_Perl_Matcher_0.Match(i) {
|
||||||
|
|
||||||
return OtherLanguage, false
|
|
||||||
},
|
|
||||||
".t": func(i []byte) (string, bool) {
|
|
||||||
if t_Perl_Matcher_0.Match(i) {
|
|
||||||
return "Perl", true
|
return "Perl", true
|
||||||
} else if t_Perl6_Matcher_0.Match(i) {
|
|
||||||
return "Perl6", true
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return OtherLanguage, false
|
return OtherLanguage, false
|
||||||
@ -372,8 +363,8 @@ var matchers = map[string]languageMatcher{
|
|||||||
".rno": func(i []byte) (string, bool) {
|
".rno": func(i []byte) (string, bool) {
|
||||||
if rno_RUNOFF_Matcher_0.Match(i) {
|
if rno_RUNOFF_Matcher_0.Match(i) {
|
||||||
return "RUNOFF", true
|
return "RUNOFF", true
|
||||||
} else if rno_Groff_Matcher_0.Match(i) {
|
} else if rno_Roff_Matcher_0.Match(i) {
|
||||||
return "Groff", true
|
return "Roff", true
|
||||||
}
|
}
|
||||||
|
|
||||||
return OtherLanguage, false
|
return OtherLanguage, false
|
||||||
@ -423,6 +414,17 @@ var matchers = map[string]languageMatcher{
|
|||||||
|
|
||||||
return OtherLanguage, false
|
return OtherLanguage, false
|
||||||
},
|
},
|
||||||
|
".t": func(i []byte) (string, bool) {
|
||||||
|
if t_Turing_Matcher_0.Match(i) {
|
||||||
|
return "Turing", true
|
||||||
|
} else if t_Perl6_Matcher_0.Match(i) {
|
||||||
|
return "Perl6", true
|
||||||
|
} else if t_Perl_Matcher_0.Match(i) {
|
||||||
|
return "Perl", true
|
||||||
|
}
|
||||||
|
|
||||||
|
return OtherLanguage, false
|
||||||
|
},
|
||||||
".toc": func(i []byte) (string, bool) {
|
".toc": func(i []byte) (string, bool) {
|
||||||
if toc_WorldofWarcraftAddonData_Matcher_0.Match(i) {
|
if toc_WorldofWarcraftAddonData_Matcher_0.Match(i) {
|
||||||
return "World of Warcraft Addon Data", true
|
return "World of Warcraft Addon Data", true
|
||||||
@ -481,9 +483,9 @@ var (
|
|||||||
es_Erlang_Matcher_0 = regexp.MustCompile(`(?m)^\s*(?:%%|main\s*\(.*?\)\s*->)`)
|
es_Erlang_Matcher_0 = regexp.MustCompile(`(?m)^\s*(?:%%|main\s*\(.*?\)\s*->)`)
|
||||||
f_Forth_Matcher_0 = regexp.MustCompile(`(?m)^: `)
|
f_Forth_Matcher_0 = regexp.MustCompile(`(?m)^: `)
|
||||||
f_FilebenchWML_Matcher_0 = regexp.MustCompile(`(?m)flowop`)
|
f_FilebenchWML_Matcher_0 = regexp.MustCompile(`(?m)flowop`)
|
||||||
f_FORTRAN_Matcher_0 = regexp.MustCompile(`(?mi)^([c*][^abd-z]| (subroutine|program|end|data)\s|\s*!)`)
|
f_Fortran_Matcher_0 = regexp.MustCompile(`(?mi)^([c*][^abd-z]| (subroutine|program|end|data)\s|\s*!)`)
|
||||||
for_Forth_Matcher_0 = regexp.MustCompile(`(?m)^: `)
|
for_Forth_Matcher_0 = regexp.MustCompile(`(?m)^: `)
|
||||||
for_FORTRAN_Matcher_0 = regexp.MustCompile(`(?mi)^([c*][^abd-z]| (subroutine|program|end|data)\s|\s*!)`)
|
for_Fortran_Matcher_0 = regexp.MustCompile(`(?mi)^([c*][^abd-z]| (subroutine|program|end|data)\s|\s*!)`)
|
||||||
fr_Forth_Matcher_0 = regexp.MustCompile(`(?m)^(: |also |new-device|previous )`)
|
fr_Forth_Matcher_0 = regexp.MustCompile(`(?m)^(: |also |new-device|previous )`)
|
||||||
fr_Frege_Matcher_0 = regexp.MustCompile(`(?m)^\s*(import|module|package|data|type) `)
|
fr_Frege_Matcher_0 = regexp.MustCompile(`(?m)^\s*(import|module|package|data|type) `)
|
||||||
fs_Forth_Matcher_0 = regexp.MustCompile(`(?m)^(: |new-device)`)
|
fs_Forth_Matcher_0 = regexp.MustCompile(`(?m)^(: |new-device)`)
|
||||||
@ -503,7 +505,7 @@ var (
|
|||||||
inc_POVDashRaySDL_Matcher_0 = regexp.MustCompile(`(?m)^\s*#(declare|local|macro|while)\s`)
|
inc_POVDashRaySDL_Matcher_0 = regexp.MustCompile(`(?m)^\s*#(declare|local|macro|while)\s`)
|
||||||
l_CommonLisp_Matcher_0 = regexp.MustCompile(`(?m)\(def(un|macro)\s`)
|
l_CommonLisp_Matcher_0 = regexp.MustCompile(`(?m)\(def(un|macro)\s`)
|
||||||
l_Lex_Matcher_0 = regexp.MustCompile(`(?m)^(%[%{}]xs|<.*>)`)
|
l_Lex_Matcher_0 = regexp.MustCompile(`(?m)^(%[%{}]xs|<.*>)`)
|
||||||
l_Groff_Matcher_0 = regexp.MustCompile(`(?mi)^\.[a-z][a-z](\s|$)`)
|
l_Roff_Matcher_0 = regexp.MustCompile(`(?mi)^\.[a-z][a-z](\s|$)`)
|
||||||
l_PicoLisp_Matcher_0 = regexp.MustCompile(`(?m)^\((de|class|rel|code|data|must)\s`)
|
l_PicoLisp_Matcher_0 = regexp.MustCompile(`(?m)^\((de|class|rel|code|data|must)\s`)
|
||||||
ls_LoomScript_Matcher_0 = regexp.MustCompile(`(?m)^\s*package\s*[\w\.\/\*\s]*\s*{`)
|
ls_LoomScript_Matcher_0 = regexp.MustCompile(`(?m)^\s*package\s*[\w\.\/\*\s]*\s*{`)
|
||||||
lsp_CommonLisp_Matcher_0 = regexp.MustCompile(`(?mi)^\s*\((defun|in-package|defpackage) `)
|
lsp_CommonLisp_Matcher_0 = regexp.MustCompile(`(?mi)^\s*\((defun|in-package|defpackage) `)
|
||||||
@ -519,14 +521,14 @@ var (
|
|||||||
m_Limbo_Matcher_0 = regexp.MustCompile(`(?m)^\w+\s*:\s*module\s*{`)
|
m_Limbo_Matcher_0 = regexp.MustCompile(`(?m)^\w+\s*:\s*module\s*{`)
|
||||||
md_Markdown_Matcher_0 = regexp.MustCompile(`(?mi)(^[-a-z0-9=#!\*\[|>])|<\/`)
|
md_Markdown_Matcher_0 = regexp.MustCompile(`(?mi)(^[-a-z0-9=#!\*\[|>])|<\/`)
|
||||||
md_Markdown_Matcher_1 = regexp.MustCompile(`(?m)^$`)
|
md_Markdown_Matcher_1 = regexp.MustCompile(`(?m)^$`)
|
||||||
md_GCCmachinedescription_Matcher_0 = regexp.MustCompile(`(?m)^(;;|\(define_)`)
|
md_GCCMachineDescription_Matcher_0 = regexp.MustCompile(`(?m)^(;;|\(define_)`)
|
||||||
ml_OCaml_Matcher_0 = regexp.MustCompile(`(?m)(^\s*module)|let rec |match\s+(\S+\s)+with`)
|
ml_OCaml_Matcher_0 = regexp.MustCompile(`(?m)(^\s*module)|let rec |match\s+(\S+\s)+with`)
|
||||||
ml_StandardML_Matcher_0 = regexp.MustCompile(`(?m)=> |case\s+(\S+\s)+of`)
|
ml_StandardML_Matcher_0 = regexp.MustCompile(`(?m)=> |case\s+(\S+\s)+of`)
|
||||||
mod_XML_Matcher_0 = regexp.MustCompile(`(?m)<!ENTITY `)
|
mod_XML_Matcher_0 = regexp.MustCompile(`(?m)<!ENTITY `)
|
||||||
mod_ModulaDash2_Matcher_0 = regexp.MustCompile(`(?mi)^\s*MODULE [\w\.]+;`)
|
mod_ModulaDash2_Matcher_0 = regexp.MustCompile(`(?mi)^\s*MODULE [\w\.]+;`)
|
||||||
mod_ModulaDash2_Matcher_1 = regexp.MustCompile(`(?mi)^\s*END [\w\.]+;`)
|
mod_ModulaDash2_Matcher_1 = regexp.MustCompile(`(?mi)^\s*END [\w\.]+;`)
|
||||||
ms_Groff_Matcher_0 = regexp.MustCompile(`(?mi)^[.'][a-z][a-z](\s|$)`)
|
ms_Roff_Matcher_0 = regexp.MustCompile(`(?mi)^[.'][a-z][a-z](\s|$)`)
|
||||||
n_Groff_Matcher_0 = regexp.MustCompile(`(?m)^[.']`)
|
n_Roff_Matcher_0 = regexp.MustCompile(`(?m)^[.']`)
|
||||||
n_Nemerle_Matcher_0 = regexp.MustCompile(`(?m)^(module|namespace|using)\s`)
|
n_Nemerle_Matcher_0 = regexp.MustCompile(`(?m)^(module|namespace|using)\s`)
|
||||||
ncl_Text_Matcher_0 = regexp.MustCompile(`(?m)THE_TITLE`)
|
ncl_Text_Matcher_0 = regexp.MustCompile(`(?m)THE_TITLE`)
|
||||||
nl_NL_Matcher_0 = regexp.MustCompile(`(?m)^(b|g)[0-9]+ `)
|
nl_NL_Matcher_0 = regexp.MustCompile(`(?m)^(b|g)[0-9]+ `)
|
||||||
@ -535,10 +537,8 @@ var (
|
|||||||
pl_Prolog_Matcher_0 = regexp.MustCompile(`(?m)^[^#]*:-`)
|
pl_Prolog_Matcher_0 = regexp.MustCompile(`(?m)^[^#]*:-`)
|
||||||
pl_Perl_Matcher_0 = regexp.MustCompile(`(?m)use strict|use\s+v?5\.`)
|
pl_Perl_Matcher_0 = regexp.MustCompile(`(?m)use strict|use\s+v?5\.`)
|
||||||
pl_Perl6_Matcher_0 = regexp.MustCompile(`(?m)^(use v6|(my )?class|module)`)
|
pl_Perl6_Matcher_0 = regexp.MustCompile(`(?m)^(use v6|(my )?class|module)`)
|
||||||
pm_Perl_Matcher_0 = regexp.MustCompile(`(?m)use strict|use\s+v?5\.`)
|
pm_Perl6_Matcher_0 = regexp.MustCompile(`(?m)^\s*(?:use\s+v6\s*;|(?:\bmy\s+)?class|module)\b`)
|
||||||
pm_Perl6_Matcher_0 = regexp.MustCompile(`(?m)^(use v6|(my )?class|module)`)
|
pm_Perl_Matcher_0 = regexp.MustCompile(`(?m)\buse\s+(?:strict\b|v?5\.)`)
|
||||||
t_Perl_Matcher_0 = regexp.MustCompile(`(?m)use strict|use\s+v?5\.`)
|
|
||||||
t_Perl6_Matcher_0 = regexp.MustCompile(`(?m)^(use v6|(my )?class|module)`)
|
|
||||||
pod_Pod_Matcher_0 = regexp.MustCompile(`(?m)^=\w+\b`)
|
pod_Pod_Matcher_0 = regexp.MustCompile(`(?m)^=\w+\b`)
|
||||||
pro_Prolog_Matcher_0 = regexp.MustCompile(`(?m)^[^#]+:-`)
|
pro_Prolog_Matcher_0 = regexp.MustCompile(`(?m)^[^#]+:-`)
|
||||||
pro_INI_Matcher_0 = regexp.MustCompile(`(?m)last_client=`)
|
pro_INI_Matcher_0 = regexp.MustCompile(`(?m)last_client=`)
|
||||||
@ -550,7 +550,7 @@ var (
|
|||||||
r_Rebol_Matcher_0 = regexp.MustCompile(`(?mi)\bRebol\b`)
|
r_Rebol_Matcher_0 = regexp.MustCompile(`(?mi)\bRebol\b`)
|
||||||
r_R_Matcher_0 = regexp.MustCompile(`(?m)<-|^\s*#`)
|
r_R_Matcher_0 = regexp.MustCompile(`(?m)<-|^\s*#`)
|
||||||
rno_RUNOFF_Matcher_0 = regexp.MustCompile(`(?mi)^\.!|^\.end lit(?:eral)?\b`)
|
rno_RUNOFF_Matcher_0 = regexp.MustCompile(`(?mi)^\.!|^\.end lit(?:eral)?\b`)
|
||||||
rno_Groff_Matcher_0 = regexp.MustCompile(`(?m)^\.\\" `)
|
rno_Roff_Matcher_0 = regexp.MustCompile(`(?m)^\.\\" `)
|
||||||
rpy_Python_Matcher_0 = regexp.MustCompile(`(?ms)(^(import|from|class|def)\s)`)
|
rpy_Python_Matcher_0 = regexp.MustCompile(`(?ms)(^(import|from|class|def)\s)`)
|
||||||
rs_Rust_Matcher_0 = regexp.MustCompile(`(?m)^(use |fn |mod |pub |macro_rules|impl|#!?\[)`)
|
rs_Rust_Matcher_0 = regexp.MustCompile(`(?m)^(use |fn |mod |pub |macro_rules|impl|#!?\[)`)
|
||||||
rs_RenderScript_Matcher_0 = regexp.MustCompile(`(?m)#include|#pragma\s+(rs|version)|__attribute__`)
|
rs_RenderScript_Matcher_0 = regexp.MustCompile(`(?m)#include|#pragma\s+(rs|version)|__attribute__`)
|
||||||
@ -569,6 +569,9 @@ var (
|
|||||||
sql_PLSQL_Matcher_1 = regexp.MustCompile(`(?mi)constructor\W+function`)
|
sql_PLSQL_Matcher_1 = regexp.MustCompile(`(?mi)constructor\W+function`)
|
||||||
sql_SQL_Matcher_0 = regexp.MustCompile(`(?mi)! /begin|boolean|package|exception`)
|
sql_SQL_Matcher_0 = regexp.MustCompile(`(?mi)! /begin|boolean|package|exception`)
|
||||||
srt_SubRipText_Matcher_0 = regexp.MustCompile(`(?m)^(\d{2}:\d{2}:\d{2},\d{3})\s*(-->)\s*(\d{2}:\d{2}:\d{2},\d{3})$`)
|
srt_SubRipText_Matcher_0 = regexp.MustCompile(`(?m)^(\d{2}:\d{2}:\d{2},\d{3})\s*(-->)\s*(\d{2}:\d{2}:\d{2},\d{3})$`)
|
||||||
|
t_Turing_Matcher_0 = regexp.MustCompile(`(?m)^\s*%[ \t]+|^\s*var\s+\w+\s*:=\s*\w+`)
|
||||||
|
t_Perl6_Matcher_0 = regexp.MustCompile(`(?m)^\s*(?:use\s+v6\s*;|\bmodule\b|\b(?:my\s+)?class\b)`)
|
||||||
|
t_Perl_Matcher_0 = regexp.MustCompile(`(?m)\buse\s+(?:strict\b|v?5\.)`)
|
||||||
toc_WorldofWarcraftAddonData_Matcher_0 = regexp.MustCompile(`(?m)^## |@no-lib-strip@`)
|
toc_WorldofWarcraftAddonData_Matcher_0 = regexp.MustCompile(`(?m)^## |@no-lib-strip@`)
|
||||||
toc_TeX_Matcher_0 = regexp.MustCompile(`(?m)^\\(contentsline|defcounter|beamer|boolfalse)`)
|
toc_TeX_Matcher_0 = regexp.MustCompile(`(?m)^\\(contentsline|defcounter|beamer|boolfalse)`)
|
||||||
ts_XML_Matcher_0 = regexp.MustCompile(`(?m)<TS`)
|
ts_XML_Matcher_0 = regexp.MustCompile(`(?m)<TS`)
|
||||||
|
@ -2,7 +2,7 @@ package slinguist
|
|||||||
|
|
||||||
// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/simple-linguist.v1/internal/code-generator
|
// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/simple-linguist.v1/internal/code-generator
|
||||||
// THIS FILE SHOULD NOT BE EDITED BY HAND
|
// THIS FILE SHOULD NOT BE EDITED BY HAND
|
||||||
// Extracted from github/linguist commit: dae33dc2b20cddc85d1300435c3be7118a7115a9
|
// Extracted from github/linguist commit: 60f864a138650dd17fafc94814be9ee2d3aaef8c
|
||||||
|
|
||||||
import "gopkg.in/toqueteos/substring.v1"
|
import "gopkg.in/toqueteos/substring.v1"
|
||||||
|
|
||||||
|
@ -2,7 +2,7 @@ package slinguist
|
|||||||
|
|
||||||
// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/simple-linguist.v1/internal/code-generator
|
// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/simple-linguist.v1/internal/code-generator
|
||||||
// THIS FILE SHOULD NOT BE EDITED BY HAND
|
// THIS FILE SHOULD NOT BE EDITED BY HAND
|
||||||
// Extracted from github/linguist commit: dae33dc2b20cddc85d1300435c3be7118a7115a9
|
// Extracted from github/linguist commit: 60f864a138650dd17fafc94814be9ee2d3aaef8c
|
||||||
|
|
||||||
var languagesByExtension = map[string][]string{
|
var languagesByExtension = map[string][]string{
|
||||||
".1": {"Roff"},
|
".1": {"Roff"},
|
||||||
@ -39,6 +39,8 @@ var languagesByExtension = map[string][]string{
|
|||||||
".abnf": {"ABNF"},
|
".abnf": {"ABNF"},
|
||||||
".ada": {"Ada"},
|
".ada": {"Ada"},
|
||||||
".adb": {"Ada"},
|
".adb": {"Ada"},
|
||||||
|
".adml": {"XML"},
|
||||||
|
".admx": {"XML"},
|
||||||
".ado": {"Stata"},
|
".ado": {"Stata"},
|
||||||
".adoc": {"AsciiDoc"},
|
".adoc": {"AsciiDoc"},
|
||||||
".adp": {"Tcl"},
|
".adp": {"Tcl"},
|
||||||
@ -94,6 +96,7 @@ var languagesByExtension = map[string][]string{
|
|||||||
".bats": {"Shell"},
|
".bats": {"Shell"},
|
||||||
".bb": {"BitBake", "BlitzBasic"},
|
".bb": {"BitBake", "BlitzBasic"},
|
||||||
".bbx": {"TeX"},
|
".bbx": {"TeX"},
|
||||||
|
".bdy": {"PLSQL"},
|
||||||
".befunge": {"Befunge"},
|
".befunge": {"Befunge"},
|
||||||
".bf": {"Brainfuck", "HyPhy"},
|
".bf": {"Brainfuck", "HyPhy"},
|
||||||
".bib": {"TeX"},
|
".bib": {"TeX"},
|
||||||
@ -132,6 +135,7 @@ var languagesByExtension = map[string][]string{
|
|||||||
".cfm": {"ColdFusion"},
|
".cfm": {"ColdFusion"},
|
||||||
".cfml": {"ColdFusion"},
|
".cfml": {"ColdFusion"},
|
||||||
".cgi": {"Perl", "Python", "Shell"},
|
".cgi": {"Perl", "Python", "Shell"},
|
||||||
|
".cginc": {"HLSL"},
|
||||||
".ch": {"Charity", "xBase"},
|
".ch": {"Charity", "xBase"},
|
||||||
".chem": {"Pic"},
|
".chem": {"Pic"},
|
||||||
".chpl": {"Chapel"},
|
".chpl": {"Chapel"},
|
||||||
@ -197,7 +201,7 @@ var languagesByExtension = map[string][]string{
|
|||||||
".dats": {"ATS"},
|
".dats": {"ATS"},
|
||||||
".db2": {"SQLPL"},
|
".db2": {"SQLPL"},
|
||||||
".dcl": {"Clean"},
|
".dcl": {"Clean"},
|
||||||
".ddl": {"SQL"},
|
".ddl": {"PLSQL", "SQL"},
|
||||||
".decls": {"BlitzBasic"},
|
".decls": {"BlitzBasic"},
|
||||||
".desktop": {"desktop"},
|
".desktop": {"desktop"},
|
||||||
".desktop.in": {"desktop"},
|
".desktop.in": {"desktop"},
|
||||||
@ -273,6 +277,7 @@ var languagesByExtension = map[string][]string{
|
|||||||
".fish": {"fish"},
|
".fish": {"fish"},
|
||||||
".flex": {"JFlex"},
|
".flex": {"JFlex"},
|
||||||
".flux": {"FLUX"},
|
".flux": {"FLUX"},
|
||||||
|
".fnc": {"PLSQL"},
|
||||||
".for": {"Formatted", "Forth", "Fortran"},
|
".for": {"Formatted", "Forth", "Fortran"},
|
||||||
".forth": {"Forth"},
|
".forth": {"Forth"},
|
||||||
".fp": {"GLSL"},
|
".fp": {"GLSL"},
|
||||||
@ -385,10 +390,10 @@ var languagesByExtension = map[string][]string{
|
|||||||
".ins": {"TeX"},
|
".ins": {"TeX"},
|
||||||
".intr": {"Dylan"},
|
".intr": {"Dylan"},
|
||||||
".io": {"Io"},
|
".io": {"Io"},
|
||||||
|
".iol": {"Jolie"},
|
||||||
".ipf": {"IGOR Pro"},
|
".ipf": {"IGOR Pro"},
|
||||||
".ipp": {"C++"},
|
".ipp": {"C++"},
|
||||||
".ipynb": {"Jupyter Notebook"},
|
".ipynb": {"Jupyter Notebook"},
|
||||||
".irbrc": {"Ruby"},
|
|
||||||
".irclog": {"IRC log"},
|
".irclog": {"IRC log"},
|
||||||
".iss": {"Inno Setup"},
|
".iss": {"Inno Setup"},
|
||||||
".ivy": {"XML"},
|
".ivy": {"XML"},
|
||||||
@ -494,6 +499,7 @@ var languagesByExtension = map[string][]string{
|
|||||||
".minid": {"MiniD"},
|
".minid": {"MiniD"},
|
||||||
".mir": {"Mirah"},
|
".mir": {"Mirah"},
|
||||||
".mirah": {"Mirah"},
|
".mirah": {"Mirah"},
|
||||||
|
".mjml": {"XML"},
|
||||||
".mk": {"Makefile"},
|
".mk": {"Makefile"},
|
||||||
".mkd": {"Markdown"},
|
".mkd": {"Markdown"},
|
||||||
".mkdn": {"Markdown"},
|
".mkdn": {"Markdown"},
|
||||||
@ -554,6 +560,7 @@ var languagesByExtension = map[string][]string{
|
|||||||
".no": {"Text"},
|
".no": {"Text"},
|
||||||
".nproj": {"XML"},
|
".nproj": {"XML"},
|
||||||
".nqp": {"Perl6"},
|
".nqp": {"Perl6"},
|
||||||
|
".nr": {"Roff"},
|
||||||
".nse": {"Lua"},
|
".nse": {"Lua"},
|
||||||
".nsh": {"NSIS"},
|
".nsh": {"NSIS"},
|
||||||
".nsi": {"NSIS"},
|
".nsi": {"NSIS"},
|
||||||
@ -567,6 +574,7 @@ var languagesByExtension = map[string][]string{
|
|||||||
".obj": {"Wavefront Object"},
|
".obj": {"Wavefront Object"},
|
||||||
".objdump": {"ObjDump"},
|
".objdump": {"ObjDump"},
|
||||||
".odd": {"XML"},
|
".odd": {"XML"},
|
||||||
|
".ol": {"Jolie"},
|
||||||
".omgrofl": {"Omgrofl"},
|
".omgrofl": {"Omgrofl"},
|
||||||
".ooc": {"ooc"},
|
".ooc": {"ooc"},
|
||||||
".opa": {"Opa"},
|
".opa": {"Opa"},
|
||||||
@ -602,6 +610,7 @@ var languagesByExtension = map[string][]string{
|
|||||||
".pd": {"Pure Data"},
|
".pd": {"Pure Data"},
|
||||||
".pd_lua": {"Lua"},
|
".pd_lua": {"Lua"},
|
||||||
".pde": {"Processing"},
|
".pde": {"Processing"},
|
||||||
|
".pep": {"Pep8"},
|
||||||
".perl": {"Perl"},
|
".perl": {"Perl"},
|
||||||
".ph": {"Perl"},
|
".ph": {"Perl"},
|
||||||
".php": {"Hack", "PHP"},
|
".php": {"Hack", "PHP"},
|
||||||
@ -642,7 +651,7 @@ var languagesByExtension = map[string][]string{
|
|||||||
".pov": {"POV-Ray SDL"},
|
".pov": {"POV-Ray SDL"},
|
||||||
".pp": {"Pascal", "Puppet"},
|
".pp": {"Pascal", "Puppet"},
|
||||||
".pprx": {"REXX"},
|
".pprx": {"REXX"},
|
||||||
".prc": {"SQL"},
|
".prc": {"PLSQL", "SQL"},
|
||||||
".prefab": {"Unity3D Asset"},
|
".prefab": {"Unity3D Asset"},
|
||||||
".prefs": {"INI"},
|
".prefs": {"INI"},
|
||||||
".prg": {"xBase"},
|
".prg": {"xBase"},
|
||||||
@ -671,6 +680,7 @@ var languagesByExtension = map[string][]string{
|
|||||||
".py": {"Python"},
|
".py": {"Python"},
|
||||||
".py3": {"Python"},
|
".py3": {"Python"},
|
||||||
".pyde": {"Python"},
|
".pyde": {"Python"},
|
||||||
|
".pyi": {"Python"},
|
||||||
".pyp": {"Python"},
|
".pyp": {"Python"},
|
||||||
".pyt": {"Python"},
|
".pyt": {"Python"},
|
||||||
".pytb": {"Python traceback"},
|
".pytb": {"Python traceback"},
|
||||||
@ -766,7 +776,7 @@ var languagesByExtension = map[string][]string{
|
|||||||
".sh": {"Shell"},
|
".sh": {"Shell"},
|
||||||
".sh-session": {"ShellSession"},
|
".sh-session": {"ShellSession"},
|
||||||
".sh.in": {"Shell"},
|
".sh.in": {"Shell"},
|
||||||
".shader": {"GLSL"},
|
".shader": {"GLSL", "ShaderLab"},
|
||||||
".shen": {"Shen"},
|
".shen": {"Shen"},
|
||||||
".sig": {"Standard ML"},
|
".sig": {"Standard ML"},
|
||||||
".sj": {"Objective-J"},
|
".sj": {"Objective-J"},
|
||||||
@ -780,8 +790,10 @@ var languagesByExtension = map[string][]string{
|
|||||||
".sml": {"Standard ML"},
|
".sml": {"Standard ML"},
|
||||||
".smt": {"SMT"},
|
".smt": {"SMT"},
|
||||||
".smt2": {"SMT"},
|
".smt2": {"SMT"},
|
||||||
|
".soy": {"Closure Templates"},
|
||||||
".sp": {"SourcePawn"},
|
".sp": {"SourcePawn"},
|
||||||
".sparql": {"SPARQL"},
|
".sparql": {"SPARQL"},
|
||||||
|
".spc": {"PLSQL"},
|
||||||
".spec": {"Python", "RPM Spec", "Ruby"},
|
".spec": {"Python", "RPM Spec", "Ruby"},
|
||||||
".spin": {"Propeller Spin"},
|
".spin": {"Propeller Spin"},
|
||||||
".sps": {"Scheme"},
|
".sps": {"Scheme"},
|
||||||
@ -830,12 +842,15 @@ var languagesByExtension = map[string][]string{
|
|||||||
".tcl": {"Tcl"},
|
".tcl": {"Tcl"},
|
||||||
".tcsh": {"Tcsh"},
|
".tcsh": {"Tcsh"},
|
||||||
".tea": {"Tea"},
|
".tea": {"Tea"},
|
||||||
|
".tesc": {"GLSL"},
|
||||||
|
".tese": {"GLSL"},
|
||||||
".tex": {"TeX"},
|
".tex": {"TeX"},
|
||||||
".textile": {"Textile"},
|
".textile": {"Textile"},
|
||||||
".tf": {"HCL"},
|
".tf": {"HCL"},
|
||||||
".thor": {"Ruby"},
|
".thor": {"Ruby"},
|
||||||
".thrift": {"Thrift"},
|
".thrift": {"Thrift"},
|
||||||
".thy": {"Isabelle"},
|
".thy": {"Isabelle"},
|
||||||
|
".tl": {"Type Language"},
|
||||||
".tla": {"TLA"},
|
".tla": {"TLA"},
|
||||||
".tm": {"Tcl"},
|
".tm": {"Tcl"},
|
||||||
".tmCommand": {"XML"},
|
".tmCommand": {"XML"},
|
||||||
@ -850,8 +865,11 @@ var languagesByExtension = map[string][]string{
|
|||||||
".toml": {"TOML"},
|
".toml": {"TOML"},
|
||||||
".tool": {"Shell"},
|
".tool": {"Shell"},
|
||||||
".topojson": {"JSON"},
|
".topojson": {"JSON"},
|
||||||
|
".tpb": {"PLSQL"},
|
||||||
".tpl": {"Smarty"},
|
".tpl": {"Smarty"},
|
||||||
".tpp": {"C++"},
|
".tpp": {"C++"},
|
||||||
|
".tps": {"PLSQL"},
|
||||||
|
".trg": {"PLSQL"},
|
||||||
".ts": {"TypeScript", "XML"},
|
".ts": {"TypeScript", "XML"},
|
||||||
".tst": {"GAP", "Scilab"},
|
".tst": {"GAP", "Scilab"},
|
||||||
".tsx": {"TypeScript", "XML"},
|
".tsx": {"TypeScript", "XML"},
|
||||||
@ -901,10 +919,13 @@ var languagesByExtension = map[string][]string{
|
|||||||
".vrx": {"GLSL"},
|
".vrx": {"GLSL"},
|
||||||
".vsh": {"GLSL"},
|
".vsh": {"GLSL"},
|
||||||
".vshader": {"GLSL"},
|
".vshader": {"GLSL"},
|
||||||
|
".vsixmanifest": {"XML"},
|
||||||
".vssettings": {"XML"},
|
".vssettings": {"XML"},
|
||||||
|
".vstemplate": {"XML"},
|
||||||
".vue": {"Vue"},
|
".vue": {"Vue"},
|
||||||
|
".vw": {"PLSQL"},
|
||||||
".vxml": {"XML"},
|
".vxml": {"XML"},
|
||||||
".w": {"C"},
|
".w": {"CWeb"},
|
||||||
".watchr": {"Ruby"},
|
".watchr": {"Ruby"},
|
||||||
".webidl": {"WebIDL"},
|
".webidl": {"WebIDL"},
|
||||||
".weechatlog": {"IRC log"},
|
".weechatlog": {"IRC log"},
|
||||||
@ -914,6 +935,7 @@ var languagesByExtension = map[string][]string{
|
|||||||
".wl": {"Mathematica"},
|
".wl": {"Mathematica"},
|
||||||
".wlt": {"Mathematica"},
|
".wlt": {"Mathematica"},
|
||||||
".wlua": {"Lua"},
|
".wlua": {"Lua"},
|
||||||
|
".workbook": {"Markdown"},
|
||||||
".wsdl": {"XML"},
|
".wsdl": {"XML"},
|
||||||
".wsf": {"XML"},
|
".wsf": {"XML"},
|
||||||
".wsgi": {"Python"},
|
".wsgi": {"Python"},
|
||||||
|
@ -2,7 +2,7 @@ package slinguist
|
|||||||
|
|
||||||
// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/simple-linguist.v1/internal/code-generator
|
// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/simple-linguist.v1/internal/code-generator
|
||||||
// THIS FILE SHOULD NOT BE EDITED BY HAND
|
// THIS FILE SHOULD NOT BE EDITED BY HAND
|
||||||
// Extracted from github/linguist commit: dae33dc2b20cddc85d1300435c3be7118a7115a9
|
// Extracted from github/linguist commit: 60f864a138650dd17fafc94814be9ee2d3aaef8c
|
||||||
|
|
||||||
var languagesByFilename = map[string]string{
|
var languagesByFilename = map[string]string{
|
||||||
".Rprofile": "R",
|
".Rprofile": "R",
|
||||||
@ -22,6 +22,7 @@ var languagesByFilename = map[string]string{
|
|||||||
".factor-rc": "Factor",
|
".factor-rc": "Factor",
|
||||||
".gclient": "Python",
|
".gclient": "Python",
|
||||||
".gnus": "Emacs Lisp",
|
".gnus": "Emacs Lisp",
|
||||||
|
".irbrc": "Ruby",
|
||||||
".jshintrc": "JSON",
|
".jshintrc": "JSON",
|
||||||
".nvimrc": "Vim script",
|
".nvimrc": "Vim script",
|
||||||
".php_cs": "PHP",
|
".php_cs": "PHP",
|
||||||
@ -85,6 +86,7 @@ var languagesByFilename = map[string]string{
|
|||||||
"README.me": "Text",
|
"README.me": "Text",
|
||||||
"README.mysql": "Text",
|
"README.mysql": "Text",
|
||||||
"ROOT": "Isabelle ROOT",
|
"ROOT": "Isabelle ROOT",
|
||||||
|
"Rakefile": "Ruby",
|
||||||
"Rexfile": "Perl6",
|
"Rexfile": "Perl6",
|
||||||
"SConscript": "Python",
|
"SConscript": "Python",
|
||||||
"SConstruct": "Python",
|
"SConstruct": "Python",
|
||||||
|
128805
frequencies.go
Normal file
128805
frequencies.go
Normal file
File diff suppressed because it is too large
Load Diff
24
internal/code-generator/assets/frequencies.go.tmpl
Normal file
24
internal/code-generator/assets/frequencies.go.tmpl
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
package slinguist
|
||||||
|
|
||||||
|
// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/simple-linguist.v1/internal/code-generator
|
||||||
|
// THIS FILE SHOULD NOT BE EDITED BY HAND
|
||||||
|
// Extracted from github/linguist commit: {{ getCommit }}
|
||||||
|
|
||||||
|
var DefaultClassifier Classifier = &classifier{
|
||||||
|
languagesLogProbabilities: map[string]float64{
|
||||||
|
{{ $freqs := . -}}
|
||||||
|
{{range $index, $language := orderKeys .Languages -}}
|
||||||
|
"{{ $language }}": {{ languageLogProbability $language -}},
|
||||||
|
{{end -}}
|
||||||
|
},
|
||||||
|
tokensLogProbabilities: map[string]map[string]float64{
|
||||||
|
{{range $index, $language := orderMapMapKeys .Tokens -}}
|
||||||
|
"{{ $language }}": map[string]float64{
|
||||||
|
{{range $i, $token := index $freqs.Tokens $language | orderKeys -}}
|
||||||
|
{{ quote $token }}: {{ tokenLogProbability $language $token }},
|
||||||
|
{{end -}}
|
||||||
|
},
|
||||||
|
{{end -}}
|
||||||
|
},
|
||||||
|
tokensTotal: {{ toFloat64 .TokensTotal -}},
|
||||||
|
}
|
@ -21,6 +21,10 @@ func FromFile(fileToParse, outPath, tmplPath, tmplName, commit string, generate
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return formatedWrite(outPath, source)
|
||||||
|
}
|
||||||
|
|
||||||
|
func formatedWrite(outPath string, source []byte) error {
|
||||||
formatedSource, err := format.Source(source)
|
formatedSource, err := format.Source(source)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
|
@ -4,15 +4,20 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
"os"
|
"os"
|
||||||
|
"os/exec"
|
||||||
|
"path/filepath"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/suite"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
|
lingustURL = "https://github.com/github/linguist.git"
|
||||||
|
commitTree = "60f864a138650dd17fafc94814be9ee2d3aaef8c"
|
||||||
commitTest = "fe8b44ab8a225b1ffa75b983b916ea22fee5b6f7"
|
commitTest = "fe8b44ab8a225b1ffa75b983b916ea22fee5b6f7"
|
||||||
|
|
||||||
// Languages test
|
// Extensions test
|
||||||
extensionsTestFile = "test_files/extensions.test.yml"
|
extensionsTestFile = "test_files/extensions.test.yml"
|
||||||
extensionsGold = "test_files/extensions.gold"
|
extensionsGold = "test_files/extensions.gold"
|
||||||
extensionsTestTmplPath = "../assets/extensions.go.tmpl"
|
extensionsTestTmplPath = "../assets/extensions.go.tmpl"
|
||||||
@ -59,9 +64,48 @@ const (
|
|||||||
aliasesGold = "test_files/aliases.gold"
|
aliasesGold = "test_files/aliases.gold"
|
||||||
aliasesTestTmplPath = "../assets/aliases.go.tmpl"
|
aliasesTestTmplPath = "../assets/aliases.go.tmpl"
|
||||||
aliasesTestTmplName = "aliases.go.tmpl"
|
aliasesTestTmplName = "aliases.go.tmpl"
|
||||||
|
|
||||||
|
// Frequencies test
|
||||||
|
frequenciesTestDir = "/samples"
|
||||||
|
frequenciesGold = "test_files/frequencies.gold"
|
||||||
|
frequenciesTestTmplPath = "../assets/frequencies.go.tmpl"
|
||||||
|
frequenciesTestTmplName = "frequencies.go.tmpl"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestFromFile(t *testing.T) {
|
type GeneratorTestSuite struct {
|
||||||
|
suite.Suite
|
||||||
|
tmpLinguist string
|
||||||
|
}
|
||||||
|
|
||||||
|
func (g *GeneratorTestSuite) SetupSuite() {
|
||||||
|
tmpLinguist, err := ioutil.TempDir("", "linguist-")
|
||||||
|
assert.NoError(g.T(), err)
|
||||||
|
g.tmpLinguist = tmpLinguist
|
||||||
|
|
||||||
|
cmd := exec.Command("git", "clone", lingustURL, tmpLinguist)
|
||||||
|
err = cmd.Run()
|
||||||
|
assert.NoError(g.T(), err)
|
||||||
|
|
||||||
|
cwd, err := os.Getwd()
|
||||||
|
assert.NoError(g.T(), err)
|
||||||
|
|
||||||
|
err = os.Chdir(tmpLinguist)
|
||||||
|
assert.NoError(g.T(), err)
|
||||||
|
|
||||||
|
cmd = exec.Command("git", "checkout", commitTree)
|
||||||
|
err = cmd.Run()
|
||||||
|
assert.NoError(g.T(), err)
|
||||||
|
|
||||||
|
err = os.Chdir(cwd)
|
||||||
|
assert.NoError(g.T(), err)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (g *GeneratorTestSuite) TearDownSuite() {
|
||||||
|
err := os.RemoveAll(g.tmpLinguist)
|
||||||
|
assert.NoError(g.T(), err)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (g *GeneratorTestSuite) TestFromFile() {
|
||||||
tests := []struct {
|
tests := []struct {
|
||||||
name string
|
name string
|
||||||
fileToParse string
|
fileToParse string
|
||||||
@ -145,20 +189,57 @@ func TestFromFile(t *testing.T) {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, tt := range tests {
|
for _, test := range tests {
|
||||||
t.Run(tt.name, func(t *testing.T) {
|
gold, err := ioutil.ReadFile(test.wantOut)
|
||||||
gold, err := ioutil.ReadFile(tt.wantOut)
|
assert.NoError(g.T(), err)
|
||||||
assert.NoError(t, err)
|
|
||||||
|
|
||||||
outPath, err := ioutil.TempFile("/tmp", "generator-test-")
|
outPath, err := ioutil.TempFile("/tmp", "generator-test-")
|
||||||
assert.NoError(t, err)
|
assert.NoError(g.T(), err)
|
||||||
defer os.Remove(outPath.Name())
|
defer os.Remove(outPath.Name())
|
||||||
|
|
||||||
err = FromFile(tt.fileToParse, outPath.Name(), tt.tmplPath, tt.tmplName, tt.commit, tt.generate)
|
err = FromFile(test.fileToParse, outPath.Name(), test.tmplPath, test.tmplName, test.commit, test.generate)
|
||||||
assert.NoError(t, err)
|
assert.NoError(g.T(), err)
|
||||||
out, err := ioutil.ReadFile(outPath.Name())
|
out, err := ioutil.ReadFile(outPath.Name())
|
||||||
assert.NoError(t, err)
|
assert.NoError(g.T(), err)
|
||||||
assert.EqualValues(t, gold, out, fmt.Sprintf("FromFile() = %v, want %v", string(out), string(tt.wantOut)))
|
assert.EqualValues(g.T(), gold, out, fmt.Sprintf("FromFile() = %v, want %v", string(out), string(test.wantOut)))
|
||||||
})
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (g *GeneratorTestSuite) TestFrequencies() {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
samplesDir string
|
||||||
|
tmplPath string
|
||||||
|
tmplName string
|
||||||
|
commit string
|
||||||
|
wantOut string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "Frequencies_1",
|
||||||
|
samplesDir: filepath.Join(g.tmpLinguist, frequenciesTestDir),
|
||||||
|
tmplPath: frequenciesTestTmplPath,
|
||||||
|
tmplName: frequenciesTestTmplName,
|
||||||
|
commit: commitTree,
|
||||||
|
wantOut: frequenciesGold,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, test := range tests {
|
||||||
|
gold, err := ioutil.ReadFile(test.wantOut)
|
||||||
|
assert.NoError(g.T(), err)
|
||||||
|
|
||||||
|
outPath, err := ioutil.TempFile("/tmp", "frequencies-test-")
|
||||||
|
assert.NoError(g.T(), err)
|
||||||
|
defer os.Remove(outPath.Name())
|
||||||
|
|
||||||
|
err = Frequencies(test.samplesDir, test.tmplPath, test.tmplName, test.commit, outPath.Name())
|
||||||
|
assert.NoError(g.T(), err)
|
||||||
|
out, err := ioutil.ReadFile(outPath.Name())
|
||||||
|
assert.NoError(g.T(), err)
|
||||||
|
assert.EqualValues(g.T(), gold, out, fmt.Sprintf("Frequencies() = %v, want %v", string(out), string(test.wantOut)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGeneratorTestSuite(t *testing.T) {
|
||||||
|
suite.Run(t, new(GeneratorTestSuite))
|
||||||
|
}
|
||||||
|
198
internal/code-generator/generator/samplesfreq.go
Normal file
198
internal/code-generator/generator/samplesfreq.go
Normal file
@ -0,0 +1,198 @@
|
|||||||
|
package generator
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"io/ioutil"
|
||||||
|
"log"
|
||||||
|
"math"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"sort"
|
||||||
|
"strconv"
|
||||||
|
"text/template"
|
||||||
|
|
||||||
|
"gopkg.in/src-d/simple-linguist.v1/internal/tokenizer"
|
||||||
|
)
|
||||||
|
|
||||||
|
const samplesSubDir = "filenames"
|
||||||
|
|
||||||
|
type samplesFrequencies struct {
|
||||||
|
LanguageTotal int `json:"language_total,omitempty"`
|
||||||
|
Languages map[string]int `json:"languages,omitempty"`
|
||||||
|
TokensTotal int `json:"tokens_total,omitempty"`
|
||||||
|
Tokens map[string]map[string]int `json:"tokens,omitempty"`
|
||||||
|
LanguageTokens map[string]int `json:"language_tokens,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// Frequencies reads directories in samplesDir, retrieves information about frequencies of languages and tokens, and write
|
||||||
|
// the file outPath using frequenciesTmplName as a template.
|
||||||
|
func Frequencies(samplesDir, frequenciesTmplPath, frequenciesTmplName, commit, outPath string) error {
|
||||||
|
freqs, err := getFrequencies(samplesDir)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
buf := &bytes.Buffer{}
|
||||||
|
if err := executeFrequenciesTemplate(buf, freqs, frequenciesTmplPath, frequenciesTmplName, commit); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return formatedWrite(outPath, buf.Bytes())
|
||||||
|
}
|
||||||
|
|
||||||
|
func getFrequencies(samplesDir string) (*samplesFrequencies, error) {
|
||||||
|
entries, err := ioutil.ReadDir(samplesDir)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var languageTotal int
|
||||||
|
var languages = make(map[string]int)
|
||||||
|
var tokensTotal int
|
||||||
|
var tokens = make(map[string]map[string]int)
|
||||||
|
var languageTokens = make(map[string]int)
|
||||||
|
|
||||||
|
for _, entry := range entries {
|
||||||
|
if !entry.IsDir() {
|
||||||
|
log.Println(err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
samples, err := getSamples(samplesDir, entry)
|
||||||
|
if err != nil {
|
||||||
|
log.Println(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(samples) == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
samplesTokens, err := getTokens(samples)
|
||||||
|
if err != nil {
|
||||||
|
log.Println(err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
lang := entry.Name()
|
||||||
|
languageTotal += len(samples)
|
||||||
|
languages[lang] = len(samples)
|
||||||
|
tokensTotal += len(samplesTokens)
|
||||||
|
languageTokens[lang] = len(samplesTokens)
|
||||||
|
tokens[lang] = make(map[string]int)
|
||||||
|
for _, token := range samplesTokens {
|
||||||
|
tokens[lang][token]++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return &samplesFrequencies{
|
||||||
|
TokensTotal: tokensTotal,
|
||||||
|
LanguageTotal: languageTotal,
|
||||||
|
Tokens: tokens,
|
||||||
|
LanguageTokens: languageTokens,
|
||||||
|
Languages: languages,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func getSamples(samplesDir string, langDir os.FileInfo) ([]string, error) {
|
||||||
|
samples := []string{}
|
||||||
|
path := filepath.Join(samplesDir, langDir.Name())
|
||||||
|
entries, err := ioutil.ReadDir(path)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, entry := range entries {
|
||||||
|
if entry.Mode().IsRegular() {
|
||||||
|
samples = append(samples, filepath.Join(path, entry.Name()))
|
||||||
|
}
|
||||||
|
|
||||||
|
if entry.IsDir() && entry.Name() == samplesSubDir {
|
||||||
|
subSamples, err := getSubSamples(samplesDir, langDir.Name(), entry)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
samples = append(samples, subSamples...)
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
return samples, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func getSubSamples(samplesDir, langDir string, subLangDir os.FileInfo) ([]string, error) {
|
||||||
|
subSamples := []string{}
|
||||||
|
path := filepath.Join(samplesDir, langDir, subLangDir.Name())
|
||||||
|
entries, err := ioutil.ReadDir(path)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, entry := range entries {
|
||||||
|
if entry.Mode().IsRegular() {
|
||||||
|
subSamples = append(subSamples, filepath.Join(path, entry.Name()))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return subSamples, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func getTokens(samples []string) ([]string, error) {
|
||||||
|
tokens := make([]string, 0, 20)
|
||||||
|
var anyError error
|
||||||
|
for _, sample := range samples {
|
||||||
|
content, err := ioutil.ReadFile(sample)
|
||||||
|
if err != nil {
|
||||||
|
anyError = err
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
t := tokenizer.Tokenize(content)
|
||||||
|
tokens = append(tokens, t...)
|
||||||
|
}
|
||||||
|
|
||||||
|
return tokens, anyError
|
||||||
|
}
|
||||||
|
|
||||||
|
func executeFrequenciesTemplate(out io.Writer, freqs *samplesFrequencies, frequenciesTmplPath, frequenciesTmpl, commit string) error {
|
||||||
|
fmap := template.FuncMap{
|
||||||
|
"getCommit": func() string { return commit },
|
||||||
|
"toFloat64": func(num int) string { return fmt.Sprintf("%f", float64(num)) },
|
||||||
|
"orderKeys": func(m map[string]int) []string {
|
||||||
|
keys := make([]string, 0, len(m))
|
||||||
|
for key := range m {
|
||||||
|
keys = append(keys, key)
|
||||||
|
}
|
||||||
|
|
||||||
|
sort.Strings(keys)
|
||||||
|
return keys
|
||||||
|
},
|
||||||
|
"languageLogProbability": func(language string) string {
|
||||||
|
num := math.Log(float64(freqs.Languages[language]) / float64(freqs.LanguageTotal))
|
||||||
|
return fmt.Sprintf("%f", num)
|
||||||
|
},
|
||||||
|
"orderMapMapKeys": func(mm map[string]map[string]int) []string {
|
||||||
|
keys := make([]string, 0, len(mm))
|
||||||
|
for key := range mm {
|
||||||
|
keys = append(keys, key)
|
||||||
|
}
|
||||||
|
|
||||||
|
sort.Strings(keys)
|
||||||
|
return keys
|
||||||
|
},
|
||||||
|
"tokenLogProbability": func(language, token string) string {
|
||||||
|
num := math.Log(float64(freqs.Tokens[language][token]) / float64(freqs.LanguageTokens[language]))
|
||||||
|
return fmt.Sprintf("%f", num)
|
||||||
|
},
|
||||||
|
"quote": strconv.Quote,
|
||||||
|
}
|
||||||
|
|
||||||
|
t := template.Must(template.New(frequenciesTmpl).Funcs(fmap).ParseFiles(frequenciesTmplPath))
|
||||||
|
if err := t.Execute(out, freqs); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
128805
internal/code-generator/generator/test_files/frequencies.gold
Normal file
128805
internal/code-generator/generator/test_files/frequencies.gold
Normal file
File diff suppressed because it is too large
Load Diff
@ -54,7 +54,13 @@ const (
|
|||||||
aliasesTmplPath = "internal/code-generator/assets/aliases.go.tmpl"
|
aliasesTmplPath = "internal/code-generator/assets/aliases.go.tmpl"
|
||||||
aliasesTmpl = "aliases.go.tmpl"
|
aliasesTmpl = "aliases.go.tmpl"
|
||||||
|
|
||||||
commitPath = ".git/refs/heads/master"
|
// frequencies.go generation
|
||||||
|
samplesDir = ".linguist/samples"
|
||||||
|
frequenciesFile = "frequencies.go"
|
||||||
|
frequenciesTmplPath = "internal/code-generator/assets/frequencies.go.tmpl"
|
||||||
|
frequenciesTmpl = "frequencies.go.tmpl"
|
||||||
|
|
||||||
|
commitPath = ".linguist/.git/refs/heads/master"
|
||||||
)
|
)
|
||||||
|
|
||||||
type generatorArgs struct {
|
type generatorArgs struct {
|
||||||
@ -88,6 +94,10 @@ func main() {
|
|||||||
log.Println(err)
|
log.Println(err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if err := generator.Frequencies(samplesDir, frequenciesTmplPath, frequenciesTmpl, commit, frequenciesFile); err != nil {
|
||||||
|
log.Println(err)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func getCommit(path string) (string, error) {
|
func getCommit(path string) (string, error) {
|
||||||
|
169
internal/tokenizer/tokenize.go
Normal file
169
internal/tokenizer/tokenize.go
Normal file
@ -0,0 +1,169 @@
|
|||||||
|
package tokenizer
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"regexp"
|
||||||
|
)
|
||||||
|
|
||||||
|
func Tokenize(content []byte) []string {
|
||||||
|
tokens := make([][]byte, 0, 50)
|
||||||
|
for _, extract := range extractTokens {
|
||||||
|
var extractedTokens [][]byte
|
||||||
|
content, extractedTokens = extract(content)
|
||||||
|
tokens = append(tokens, extractedTokens...)
|
||||||
|
}
|
||||||
|
|
||||||
|
return toString(tokens)
|
||||||
|
}
|
||||||
|
|
||||||
|
func toString(tokens [][]byte) []string {
|
||||||
|
stokens := make([]string, 0, len(tokens))
|
||||||
|
for _, token := range tokens {
|
||||||
|
stokens = append(stokens, string(token))
|
||||||
|
}
|
||||||
|
|
||||||
|
return stokens
|
||||||
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
extractTokens = []func(content []byte) (replacedContent []byte, tokens [][]byte){
|
||||||
|
// The order to must be this
|
||||||
|
extractAndReplaceShebang,
|
||||||
|
extractAndReplaceSGML,
|
||||||
|
skipCommentsAndLiterals,
|
||||||
|
extractAndReplacePunctuation,
|
||||||
|
extractAndReplaceRegular,
|
||||||
|
extractAndReplaceOperator,
|
||||||
|
extractRemainders,
|
||||||
|
}
|
||||||
|
|
||||||
|
reLiteralStringQuotes = regexp.MustCompile(`(?sU)(".*"|'.*')`)
|
||||||
|
reSingleLineComment = regexp.MustCompile(`(?m)(//|--|#|%|")(.*$)`)
|
||||||
|
reMultilineComment = regexp.MustCompile(`(?sU)(/\*.*\*/|<!--.*-->|\{-.*-\}|\(\*.*\*\)|""".*"""|'''.*''')`)
|
||||||
|
reLiteralNumber = regexp.MustCompile(`(0x[0-9A-Fa-f]([0-9A-Fa-f]|\.)*|\d(\d|\.)*)([uU][lL]{0,2}|([eE][-+]\d*)?[fFlL]*)`)
|
||||||
|
reShebang = regexp.MustCompile(`(?m)^#!(?:/\w+)*/(?:(\w+)|\w+(?:\s*\w+=\w+\s*)*\s*(\w+))(?:\s*-\w+\s*)*$`)
|
||||||
|
rePunctuation = regexp.MustCompile(`;|\{|\}|\(|\)|\[|\]`)
|
||||||
|
reSGML = regexp.MustCompile(`(?sU)(<\/?[^\s<>=\d"']+)(?:\s.*\/?>|>)`)
|
||||||
|
reSGMLComment = regexp.MustCompile(`(?sU)(<!--.*-->)`)
|
||||||
|
reSGMLAttributes = regexp.MustCompile(`\s+(\w+=)|\s+([^\s>]+)`)
|
||||||
|
reSGMLLoneAttribute = regexp.MustCompile(`(\w+)`)
|
||||||
|
reRegularToken = regexp.MustCompile(`[\w\.@#\/\*]+`)
|
||||||
|
reOperators = regexp.MustCompile(`<<?|\+|\-|\*|\/|%|&&?|\|\|?`)
|
||||||
|
|
||||||
|
regexToSkip = []*regexp.Regexp{
|
||||||
|
// The order must be this
|
||||||
|
reLiteralStringQuotes,
|
||||||
|
reMultilineComment,
|
||||||
|
reSingleLineComment,
|
||||||
|
reLiteralNumber,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
func extractAndReplaceShebang(content []byte) ([]byte, [][]byte) {
|
||||||
|
var shebangTokens [][]byte
|
||||||
|
matches := reShebang.FindAllSubmatch(content, -1)
|
||||||
|
if matches != nil {
|
||||||
|
shebangTokens = make([][]byte, 0, 2)
|
||||||
|
for _, match := range matches {
|
||||||
|
shebangToken := getShebangToken(match)
|
||||||
|
shebangTokens = append(shebangTokens, shebangToken)
|
||||||
|
}
|
||||||
|
|
||||||
|
reShebang.ReplaceAll(content, []byte(` `))
|
||||||
|
}
|
||||||
|
|
||||||
|
return content, shebangTokens
|
||||||
|
}
|
||||||
|
|
||||||
|
func getShebangToken(matchedShebang [][]byte) []byte {
|
||||||
|
const prefix = `SHEBANG#!`
|
||||||
|
var token []byte
|
||||||
|
for i := 1; i < len(matchedShebang); i++ {
|
||||||
|
if len(matchedShebang[i]) > 0 {
|
||||||
|
token = matchedShebang[i]
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
tokenShebang := append([]byte(prefix), token...)
|
||||||
|
return tokenShebang
|
||||||
|
}
|
||||||
|
|
||||||
|
func commonExtracAndReplace(content []byte, re *regexp.Regexp) ([]byte, [][]byte) {
|
||||||
|
tokens := re.FindAll(content, -1)
|
||||||
|
content = re.ReplaceAll(content, []byte(` `))
|
||||||
|
return content, tokens
|
||||||
|
}
|
||||||
|
|
||||||
|
func extractAndReplacePunctuation(content []byte) ([]byte, [][]byte) {
|
||||||
|
return commonExtracAndReplace(content, rePunctuation)
|
||||||
|
}
|
||||||
|
|
||||||
|
func extractAndReplaceRegular(content []byte) ([]byte, [][]byte) {
|
||||||
|
return commonExtracAndReplace(content, reRegularToken)
|
||||||
|
}
|
||||||
|
|
||||||
|
func extractAndReplaceOperator(content []byte) ([]byte, [][]byte) {
|
||||||
|
return commonExtracAndReplace(content, reOperators)
|
||||||
|
}
|
||||||
|
|
||||||
|
func extractAndReplaceSGML(content []byte) ([]byte, [][]byte) {
|
||||||
|
var SGMLTokens [][]byte
|
||||||
|
matches := reSGML.FindAllSubmatch(content, -1)
|
||||||
|
if matches != nil {
|
||||||
|
SGMLTokens = make([][]byte, 0, 2)
|
||||||
|
for _, match := range matches {
|
||||||
|
if reSGMLComment.Match(match[0]) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
token := append(match[1], '>')
|
||||||
|
SGMLTokens = append(SGMLTokens, token)
|
||||||
|
attributes := getSGMLAttributes(match[0])
|
||||||
|
SGMLTokens = append(SGMLTokens, attributes...)
|
||||||
|
}
|
||||||
|
|
||||||
|
content = reSGML.ReplaceAll(content, []byte(` `))
|
||||||
|
}
|
||||||
|
|
||||||
|
return content, SGMLTokens
|
||||||
|
}
|
||||||
|
|
||||||
|
func getSGMLAttributes(SGMLTag []byte) [][]byte {
|
||||||
|
var attributes [][]byte
|
||||||
|
matches := reSGMLAttributes.FindAllSubmatch(SGMLTag, -1)
|
||||||
|
if matches != nil {
|
||||||
|
attributes = make([][]byte, 0, 5)
|
||||||
|
for _, match := range matches {
|
||||||
|
if len(match[1]) != 0 {
|
||||||
|
attributes = append(attributes, match[1])
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(match[2]) != 0 {
|
||||||
|
loneAttributes := reSGMLLoneAttribute.FindAll(match[2], -1)
|
||||||
|
attributes = append(attributes, loneAttributes...)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return attributes
|
||||||
|
}
|
||||||
|
|
||||||
|
func skipCommentsAndLiterals(content []byte) ([]byte, [][]byte) {
|
||||||
|
for _, skip := range regexToSkip {
|
||||||
|
content = skip.ReplaceAll(content, []byte(` `))
|
||||||
|
}
|
||||||
|
|
||||||
|
return content, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func extractRemainders(content []byte) ([]byte, [][]byte) {
|
||||||
|
splitted := bytes.Fields(content)
|
||||||
|
remainderTokens := make([][]byte, 0, len(splitted)*3)
|
||||||
|
for _, remainder := range splitted {
|
||||||
|
remainders := bytes.Split(remainder, nil)
|
||||||
|
remainderTokens = append(remainderTokens, remainders...)
|
||||||
|
}
|
||||||
|
|
||||||
|
return content, remainderTokens
|
||||||
|
}
|
107
internal/tokenizer/tokenize_test.go
Normal file
107
internal/tokenizer/tokenize_test.go
Normal file
@ -0,0 +1,107 @@
|
|||||||
|
package tokenizer
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
testContent = `#!/usr/bin/ruby
|
||||||
|
|
||||||
|
#!/usr/bin/env node
|
||||||
|
|
||||||
|
aaa
|
||||||
|
|
||||||
|
#!/usr/bin/env A=B foo=bar awk -f
|
||||||
|
|
||||||
|
#!python
|
||||||
|
|
||||||
|
func Tokenize(content []byte) []string {
|
||||||
|
splitted := bytes.Fields(content)
|
||||||
|
tokens := /* make([]string, 0, len(splitted))
|
||||||
|
no comment -- comment
|
||||||
|
for _, tokenByte := range splitted {
|
||||||
|
token64 := base64.StdEncoding.EncodeToString(tokenByte)
|
||||||
|
tokens = append(tokens, token64)
|
||||||
|
notcatchasanumber3.5
|
||||||
|
}*/
|
||||||
|
othercode
|
||||||
|
/* testing multiple
|
||||||
|
|
||||||
|
multiline comments*/
|
||||||
|
|
||||||
|
<!-- com
|
||||||
|
ment -->
|
||||||
|
<!-- comment 2-->
|
||||||
|
ppp no comment # comment
|
||||||
|
|
||||||
|
"literal1"
|
||||||
|
|
||||||
|
abb (tokenByte, 0xAF02) | ,3.2L
|
||||||
|
|
||||||
|
'literal2' notcatchasanumber3.5
|
||||||
|
|
||||||
|
5 += number * anotherNumber
|
||||||
|
if isTrue && isToo {
|
||||||
|
0b00001000 >> 1
|
||||||
|
}
|
||||||
|
|
||||||
|
return tokens
|
||||||
|
|
||||||
|
oneBool = 3 <= 2
|
||||||
|
varBool = 3<=2>
|
||||||
|
|
||||||
|
PyErr_SetString(PyExc_RuntimeError, "Relative import is not supported for Python <=2.4.");
|
||||||
|
|
||||||
|
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||||
|
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||||
|
<head>
|
||||||
|
<title id="hola" class="">This is a XHTML sample file</title>
|
||||||
|
<style type="text/css"><![CDATA[
|
||||||
|
#example {
|
||||||
|
background-color: yellow;
|
||||||
|
}
|
||||||
|
]]></style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<div id="example">
|
||||||
|
Just a simple <strong>XHTML</strong> test page.
|
||||||
|
</div>
|
||||||
|
</body>
|
||||||
|
</html>`
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
tokensFromTestContent = []string{"SHEBANG#!ruby", "SHEBANG#!node", "SHEBANG#!awk", "<!DOCTYPE>", "PUBLIC", "W3C", "DTD", "XHTML", "1", "0",
|
||||||
|
"Strict", "EN", "http", "www", "w3", "org", "TR", "xhtml1", "DTD", "xhtml1", "strict", "dtd", "<html>", "<head>", "<title>", "class=",
|
||||||
|
"</title>", "<style>", "<![CDATA[>", "example", "background", "color", "yellow", "</style>", "</head>", "<body>", "<div>", "<strong>",
|
||||||
|
"</strong>", "</div>", "</body>", "</html>", "(", "[", "]", ")", "[", "]", "{", "(", ")", "(", ")", "{", "}", "(", ")", ";", ";", "}",
|
||||||
|
"]", "]", "aaa", "func", "Tokenize", "content", "byte", "string", "splitted", "bytes.Fields", "content", "tokens", "othercode", "ppp",
|
||||||
|
"no", "comment", "abb", "tokenByte", "notcatchasanumber", "number", "*", "anotherNumber", "if", "isTrue", "isToo", "b", "return",
|
||||||
|
"tokens", "oneBool", "varBool", "PyErr_SetString", "PyExc_RuntimeError", "html", "PUBLIC", "xmlns", "id", "class", "This", "is", "a",
|
||||||
|
"XHTML", "sample", "file", "type", "background", "color", "yellow", "id", "Just", "a", "simple", "XHTML", "test", "page.", "|", "+",
|
||||||
|
"&&", "<", "<", "-", ":", "=", ":", "=", ",", ",", "=", ">", ">", "=", "=", "=", "=", ">", ",", ">", "=", ">", "=", "=", ">", "=", ">",
|
||||||
|
":", ">", "=", ">"}
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestTokenize(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
content []byte
|
||||||
|
expected []string
|
||||||
|
}{
|
||||||
|
{name: "content", content: []byte(testContent), expected: tokensFromTestContent},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, test := range tests {
|
||||||
|
t.Run(test.name, func(t *testing.T) {
|
||||||
|
tokens := Tokenize(test.content)
|
||||||
|
assert.Equal(t, len(test.expected), len(tokens), fmt.Sprintf("token' slice length = %v, want %v", len(test.expected), len(tokens)))
|
||||||
|
for i, expectedToken := range test.expected {
|
||||||
|
assert.Equal(t, expectedToken, tokens[i], fmt.Sprintf("token = %v, want %v", tokens[i], expectedToken))
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
@ -2,7 +2,7 @@ package slinguist
|
|||||||
|
|
||||||
// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/simple-linguist.v1/internal/code-generator
|
// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/simple-linguist.v1/internal/code-generator
|
||||||
// THIS FILE SHOULD NOT BE EDITED BY HAND
|
// THIS FILE SHOULD NOT BE EDITED BY HAND
|
||||||
// Extracted from github/linguist commit: dae33dc2b20cddc85d1300435c3be7118a7115a9
|
// Extracted from github/linguist commit: 60f864a138650dd17fafc94814be9ee2d3aaef8c
|
||||||
|
|
||||||
var languagesByInterpreter = map[string][]string{
|
var languagesByInterpreter = map[string][]string{
|
||||||
"Rscript": {"R"},
|
"Rscript": {"R"},
|
||||||
@ -36,6 +36,7 @@ var languagesByInterpreter = map[string][]string{
|
|||||||
"io": {"Io"},
|
"io": {"Io"},
|
||||||
"ioke": {"Ioke"},
|
"ioke": {"Ioke"},
|
||||||
"jconsole": {"J"},
|
"jconsole": {"J"},
|
||||||
|
"jolie": {"Jolie"},
|
||||||
"jruby": {"Ruby"},
|
"jruby": {"Ruby"},
|
||||||
"julia": {"Julia"},
|
"julia": {"Julia"},
|
||||||
"lisp": {"Common Lisp"},
|
"lisp": {"Common Lisp"},
|
||||||
|
29
modeline.go
29
modeline.go
@ -10,10 +10,9 @@ import (
|
|||||||
func GetLanguageByModeline(content []byte) (lang string, safe bool) {
|
func GetLanguageByModeline(content []byte) (lang string, safe bool) {
|
||||||
headFoot := getHeaderAndFooter(content)
|
headFoot := getHeaderAndFooter(content)
|
||||||
for _, getLang := range modelinesFunc {
|
for _, getLang := range modelinesFunc {
|
||||||
lang = getLang(headFoot)
|
lang, safe = getLang(headFoot)
|
||||||
safe = lang != OtherLanguage
|
|
||||||
if safe {
|
if safe {
|
||||||
return
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -23,7 +22,7 @@ func GetLanguageByModeline(content []byte) (lang string, safe bool) {
|
|||||||
func getHeaderAndFooter(content []byte) []byte {
|
func getHeaderAndFooter(content []byte) []byte {
|
||||||
const (
|
const (
|
||||||
searchScope = 5
|
searchScope = 5
|
||||||
eol = `\n`
|
eol = "\n"
|
||||||
)
|
)
|
||||||
|
|
||||||
if bytes.Count(content, []byte(eol)) < 2*searchScope {
|
if bytes.Count(content, []byte(eol)) < 2*searchScope {
|
||||||
@ -37,7 +36,7 @@ func getHeaderAndFooter(content []byte) []byte {
|
|||||||
return bytes.Join(headerAndFooter, []byte(eol))
|
return bytes.Join(headerAndFooter, []byte(eol))
|
||||||
}
|
}
|
||||||
|
|
||||||
var modelinesFunc = []func(content []byte) string{
|
var modelinesFunc = []func(content []byte) (string, bool){
|
||||||
GetLanguageByEmacsModeline,
|
GetLanguageByEmacsModeline,
|
||||||
GetLanguageByVimModeline,
|
GetLanguageByVimModeline,
|
||||||
}
|
}
|
||||||
@ -50,11 +49,11 @@ var (
|
|||||||
)
|
)
|
||||||
|
|
||||||
// GetLanguageByEmacsModeline detecs if the content has a emacs modeline and try to get a
|
// GetLanguageByEmacsModeline detecs if the content has a emacs modeline and try to get a
|
||||||
// language basing on alias. If couldn't retrieve a valid language, it returns OtherLanguage.
|
// language basing on alias. If couldn't retrieve a valid language, it returns OtherLanguage and false.
|
||||||
func GetLanguageByEmacsModeline(content []byte) (lang string) {
|
func GetLanguageByEmacsModeline(content []byte) (string, bool) {
|
||||||
matched := reEmacsModeline.FindAllSubmatch(content, -1)
|
matched := reEmacsModeline.FindAllSubmatch(content, -1)
|
||||||
if matched == nil {
|
if matched == nil {
|
||||||
return OtherLanguage
|
return OtherLanguage, false
|
||||||
}
|
}
|
||||||
|
|
||||||
// only take the last matched line, discard previous lines
|
// only take the last matched line, discard previous lines
|
||||||
@ -67,23 +66,22 @@ func GetLanguageByEmacsModeline(content []byte) (lang string) {
|
|||||||
alias = string(lastLineMatched)
|
alias = string(lastLineMatched)
|
||||||
}
|
}
|
||||||
|
|
||||||
lang = GetLanguageByAlias(alias)
|
return GetLanguageByAlias(alias)
|
||||||
return
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// GetLanguageByVimModeline detecs if the content has a vim modeline and try to get a
|
// GetLanguageByVimModeline detecs if the content has a vim modeline and try to get a
|
||||||
// language basing on alias. If couldn't retrieve a valid language, it returns OtherLanguage.
|
// language basing on alias. If couldn't retrieve a valid language, it returns OtherLanguage and false.
|
||||||
func GetLanguageByVimModeline(content []byte) (lang string) {
|
func GetLanguageByVimModeline(content []byte) (string, bool) {
|
||||||
matched := reVimModeline.FindAllSubmatch(content, -1)
|
matched := reVimModeline.FindAllSubmatch(content, -1)
|
||||||
if matched == nil {
|
if matched == nil {
|
||||||
return OtherLanguage
|
return OtherLanguage, false
|
||||||
}
|
}
|
||||||
|
|
||||||
// only take the last matched line, discard previous lines
|
// only take the last matched line, discard previous lines
|
||||||
lastLineMatched := matched[len(matched)-1][1]
|
lastLineMatched := matched[len(matched)-1][1]
|
||||||
matchedAlias := reVimLang.FindAllSubmatch(lastLineMatched, -1)
|
matchedAlias := reVimLang.FindAllSubmatch(lastLineMatched, -1)
|
||||||
if matchedAlias == nil {
|
if matchedAlias == nil {
|
||||||
return OtherLanguage
|
return OtherLanguage, false
|
||||||
}
|
}
|
||||||
|
|
||||||
alias := string(matchedAlias[0][1])
|
alias := string(matchedAlias[0][1])
|
||||||
@ -100,6 +98,5 @@ func GetLanguageByVimModeline(content []byte) (lang string) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
lang = GetLanguageByAlias(alias)
|
return GetLanguageByAlias(alias)
|
||||||
return
|
|
||||||
}
|
}
|
||||||
|
@ -9,6 +9,7 @@ import (
|
|||||||
|
|
||||||
const (
|
const (
|
||||||
modelinesDir = ".linguist/test/fixtures/Data/Modelines"
|
modelinesDir = ".linguist/test/fixtures/Data/Modelines"
|
||||||
|
samplesDir = ".linguist/samples"
|
||||||
)
|
)
|
||||||
|
|
||||||
func (s *TSuite) TestGetLanguageByModeline(c *C) {
|
func (s *TSuite) TestGetLanguageByModeline(c *C) {
|
||||||
@ -18,42 +19,43 @@ func (s *TSuite) TestGetLanguageByModeline(c *C) {
|
|||||||
expectedSafe bool
|
expectedSafe bool
|
||||||
}{
|
}{
|
||||||
// Emacs
|
// Emacs
|
||||||
{filename: "example_smalltalk.md", expectedLang: "Smalltalk", expectedSafe: true},
|
{filename: filepath.Join(modelinesDir, "example_smalltalk.md"), expectedLang: "Smalltalk", expectedSafe: true},
|
||||||
{filename: "fundamentalEmacs.c", expectedLang: "Text", expectedSafe: true},
|
{filename: filepath.Join(modelinesDir, "fundamentalEmacs.c"), expectedLang: "Text", expectedSafe: true},
|
||||||
{filename: "iamphp.inc", expectedLang: "PHP", expectedSafe: true},
|
{filename: filepath.Join(modelinesDir, "iamphp.inc"), expectedLang: "PHP", expectedSafe: true},
|
||||||
{filename: "seeplusplusEmacs1", expectedLang: "C++", expectedSafe: true},
|
{filename: filepath.Join(modelinesDir, "seeplusplusEmacs1"), expectedLang: "C++", expectedSafe: true},
|
||||||
{filename: "seeplusplusEmacs2", expectedLang: "C++", expectedSafe: true},
|
{filename: filepath.Join(modelinesDir, "seeplusplusEmacs2"), expectedLang: "C++", expectedSafe: true},
|
||||||
{filename: "seeplusplusEmacs3", expectedLang: "C++", expectedSafe: true},
|
{filename: filepath.Join(modelinesDir, "seeplusplusEmacs3"), expectedLang: "C++", expectedSafe: true},
|
||||||
{filename: "seeplusplusEmacs4", expectedLang: "C++", expectedSafe: true},
|
{filename: filepath.Join(modelinesDir, "seeplusplusEmacs4"), expectedLang: "C++", expectedSafe: true},
|
||||||
{filename: "seeplusplusEmacs5", expectedLang: "C++", expectedSafe: true},
|
{filename: filepath.Join(modelinesDir, "seeplusplusEmacs5"), expectedLang: "C++", expectedSafe: true},
|
||||||
{filename: "seeplusplusEmacs6", expectedLang: "C++", expectedSafe: true},
|
{filename: filepath.Join(modelinesDir, "seeplusplusEmacs6"), expectedLang: "C++", expectedSafe: true},
|
||||||
{filename: "seeplusplusEmacs7", expectedLang: "C++", expectedSafe: true},
|
{filename: filepath.Join(modelinesDir, "seeplusplusEmacs7"), expectedLang: "C++", expectedSafe: true},
|
||||||
{filename: "seeplusplusEmacs9", expectedLang: "C++", expectedSafe: true},
|
{filename: filepath.Join(modelinesDir, "seeplusplusEmacs9"), expectedLang: "C++", expectedSafe: true},
|
||||||
{filename: "seeplusplusEmacs10", expectedLang: "C++", expectedSafe: true},
|
{filename: filepath.Join(modelinesDir, "seeplusplusEmacs10"), expectedLang: "C++", expectedSafe: true},
|
||||||
{filename: "seeplusplusEmacs11", expectedLang: "C++", expectedSafe: true},
|
{filename: filepath.Join(modelinesDir, "seeplusplusEmacs11"), expectedLang: "C++", expectedSafe: true},
|
||||||
{filename: "seeplusplusEmacs12", expectedLang: "C++", expectedSafe: true},
|
{filename: filepath.Join(modelinesDir, "seeplusplusEmacs12"), expectedLang: "C++", expectedSafe: true},
|
||||||
|
|
||||||
// Vim
|
// Vim
|
||||||
{filename: "seeplusplus", expectedLang: "C++", expectedSafe: true},
|
{filename: filepath.Join(modelinesDir, "seeplusplus"), expectedLang: "C++", expectedSafe: true},
|
||||||
{filename: "iamjs.pl", expectedLang: "JavaScript", expectedSafe: true},
|
{filename: filepath.Join(modelinesDir, "iamjs.pl"), expectedLang: "JavaScript", expectedSafe: true},
|
||||||
{filename: "iamjs2.pl", expectedLang: "JavaScript", expectedSafe: true},
|
{filename: filepath.Join(modelinesDir, "iamjs2.pl"), expectedLang: "JavaScript", expectedSafe: true},
|
||||||
{filename: "not_perl.pl", expectedLang: "Prolog", expectedSafe: true},
|
{filename: filepath.Join(modelinesDir, "not_perl.pl"), expectedLang: "Prolog", expectedSafe: true},
|
||||||
{filename: "ruby", expectedLang: "Ruby", expectedSafe: true},
|
{filename: filepath.Join(modelinesDir, "ruby"), expectedLang: "Ruby", expectedSafe: true},
|
||||||
{filename: "ruby2", expectedLang: "Ruby", expectedSafe: true},
|
{filename: filepath.Join(modelinesDir, "ruby2"), expectedLang: "Ruby", expectedSafe: true},
|
||||||
{filename: "ruby3", expectedLang: "Ruby", expectedSafe: true},
|
{filename: filepath.Join(modelinesDir, "ruby3"), expectedLang: "Ruby", expectedSafe: true},
|
||||||
{filename: "ruby4", expectedLang: "Ruby", expectedSafe: true},
|
{filename: filepath.Join(modelinesDir, "ruby4"), expectedLang: "Ruby", expectedSafe: true},
|
||||||
{filename: "ruby5", expectedLang: "Ruby", expectedSafe: true},
|
{filename: filepath.Join(modelinesDir, "ruby5"), expectedLang: "Ruby", expectedSafe: true},
|
||||||
{filename: "ruby6", expectedLang: "Ruby", expectedSafe: true},
|
{filename: filepath.Join(modelinesDir, "ruby6"), expectedLang: "Ruby", expectedSafe: true},
|
||||||
{filename: "ruby7", expectedLang: "Ruby", expectedSafe: true},
|
{filename: filepath.Join(modelinesDir, "ruby7"), expectedLang: "Ruby", expectedSafe: true},
|
||||||
{filename: "ruby8", expectedLang: "Ruby", expectedSafe: true},
|
{filename: filepath.Join(modelinesDir, "ruby8"), expectedLang: "Ruby", expectedSafe: true},
|
||||||
{filename: "ruby9", expectedLang: "Ruby", expectedSafe: true},
|
{filename: filepath.Join(modelinesDir, "ruby9"), expectedLang: "Ruby", expectedSafe: true},
|
||||||
{filename: "ruby10", expectedLang: "Ruby", expectedSafe: true},
|
{filename: filepath.Join(modelinesDir, "ruby10"), expectedLang: "Ruby", expectedSafe: true},
|
||||||
{filename: "ruby11", expectedLang: "Ruby", expectedSafe: true},
|
{filename: filepath.Join(modelinesDir, "ruby11"), expectedLang: "Ruby", expectedSafe: true},
|
||||||
{filename: "ruby12", expectedLang: "Ruby", expectedSafe: true},
|
{filename: filepath.Join(modelinesDir, "ruby12"), expectedLang: "Ruby", expectedSafe: true},
|
||||||
|
{filename: filepath.Join(samplesDir, "C/main.c"), expectedLang: OtherLanguage, expectedSafe: false},
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, test := range linguistTests {
|
for _, test := range linguistTests {
|
||||||
content, err := ioutil.ReadFile(filepath.Join(modelinesDir, test.filename))
|
content, err := ioutil.ReadFile(test.filename)
|
||||||
c.Assert(err, Equals, nil)
|
c.Assert(err, Equals, nil)
|
||||||
|
|
||||||
lang, safe := GetLanguageByModeline(content)
|
lang, safe := GetLanguageByModeline(content)
|
||||||
@ -62,8 +64,9 @@ func (s *TSuite) TestGetLanguageByModeline(c *C) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const (
|
const (
|
||||||
wrongVim = `# vim: set syntax=ruby ft =python filetype=perl :`
|
wrongVim = `# vim: set syntax=ruby ft =python filetype=perl :`
|
||||||
rightVim = `/* vim: set syntax=python ft =python filetype=python */`
|
rightVim = `/* vim: set syntax=python ft =python filetype=python */`
|
||||||
|
noLangVim = `/* vim: set shiftwidth=4 softtabstop=0 cindent cinoptions={1s: */`
|
||||||
)
|
)
|
||||||
|
|
||||||
tests := []struct {
|
tests := []struct {
|
||||||
@ -73,6 +76,7 @@ func (s *TSuite) TestGetLanguageByModeline(c *C) {
|
|||||||
}{
|
}{
|
||||||
{content: []byte(wrongVim), expectedLang: OtherLanguage, expectedSafe: false},
|
{content: []byte(wrongVim), expectedLang: OtherLanguage, expectedSafe: false},
|
||||||
{content: []byte(rightVim), expectedLang: "Python", expectedSafe: true},
|
{content: []byte(rightVim), expectedLang: "Python", expectedSafe: true},
|
||||||
|
{content: []byte(noLangVim), expectedLang: OtherLanguage, expectedSafe: false},
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, test := range tests {
|
for _, test := range tests {
|
||||||
|
@ -2,7 +2,7 @@ package slinguist
|
|||||||
|
|
||||||
// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/simple-linguist.v1/internal/code-generator
|
// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/simple-linguist.v1/internal/code-generator
|
||||||
// THIS FILE SHOULD NOT BE EDITED BY HAND
|
// THIS FILE SHOULD NOT BE EDITED BY HAND
|
||||||
// Extracted from github/linguist commit: dae33dc2b20cddc85d1300435c3be7118a7115a9
|
// Extracted from github/linguist commit: 60f864a138650dd17fafc94814be9ee2d3aaef8c
|
||||||
|
|
||||||
var languagesType = map[string]Type{
|
var languagesType = map[string]Type{
|
||||||
"1C Enterprise": Programming,
|
"1C Enterprise": Programming,
|
||||||
@ -59,6 +59,7 @@ var languagesType = map[string]Type{
|
|||||||
"CSON": Data,
|
"CSON": Data,
|
||||||
"CSS": Markup,
|
"CSS": Markup,
|
||||||
"CSV": Data,
|
"CSV": Data,
|
||||||
|
"CWeb": Programming,
|
||||||
"Cap'n Proto": Programming,
|
"Cap'n Proto": Programming,
|
||||||
"CartoCSS": Programming,
|
"CartoCSS": Programming,
|
||||||
"Ceylon": Programming,
|
"Ceylon": Programming,
|
||||||
@ -70,6 +71,7 @@ var languagesType = map[string]Type{
|
|||||||
"Clean": Programming,
|
"Clean": Programming,
|
||||||
"Click": Programming,
|
"Click": Programming,
|
||||||
"Clojure": Programming,
|
"Clojure": Programming,
|
||||||
|
"Closure Templates": Markup,
|
||||||
"CoffeeScript": Programming,
|
"CoffeeScript": Programming,
|
||||||
"ColdFusion": Programming,
|
"ColdFusion": Programming,
|
||||||
"ColdFusion CFC": Programming,
|
"ColdFusion CFC": Programming,
|
||||||
@ -193,6 +195,7 @@ var languagesType = map[string]Type{
|
|||||||
"JavaScript": Programming,
|
"JavaScript": Programming,
|
||||||
"Jison": Programming,
|
"Jison": Programming,
|
||||||
"Jison Lex": Programming,
|
"Jison Lex": Programming,
|
||||||
|
"Jolie": Programming,
|
||||||
"Julia": Programming,
|
"Julia": Programming,
|
||||||
"Jupyter Notebook": Markup,
|
"Jupyter Notebook": Markup,
|
||||||
"KRL": Programming,
|
"KRL": Programming,
|
||||||
@ -297,6 +300,7 @@ var languagesType = map[string]Type{
|
|||||||
"Parrot Assembly": Programming,
|
"Parrot Assembly": Programming,
|
||||||
"Parrot Internal Representation": Programming,
|
"Parrot Internal Representation": Programming,
|
||||||
"Pascal": Programming,
|
"Pascal": Programming,
|
||||||
|
"Pep8": Programming,
|
||||||
"Perl": Programming,
|
"Perl": Programming,
|
||||||
"Perl6": Programming,
|
"Perl6": Programming,
|
||||||
"Pic": Markup,
|
"Pic": Markup,
|
||||||
@ -368,6 +372,7 @@ var languagesType = map[string]Type{
|
|||||||
"Scheme": Programming,
|
"Scheme": Programming,
|
||||||
"Scilab": Programming,
|
"Scilab": Programming,
|
||||||
"Self": Programming,
|
"Self": Programming,
|
||||||
|
"ShaderLab": Programming,
|
||||||
"Shell": Programming,
|
"Shell": Programming,
|
||||||
"ShellSession": Programming,
|
"ShellSession": Programming,
|
||||||
"Shen": Programming,
|
"Shen": Programming,
|
||||||
@ -403,6 +408,7 @@ var languagesType = map[string]Type{
|
|||||||
"Turing": Programming,
|
"Turing": Programming,
|
||||||
"Turtle": Data,
|
"Turtle": Data,
|
||||||
"Twig": Markup,
|
"Twig": Markup,
|
||||||
|
"Type Language": Data,
|
||||||
"TypeScript": Programming,
|
"TypeScript": Programming,
|
||||||
"Unified Parallel C": Programming,
|
"Unified Parallel C": Programming,
|
||||||
"Unity3D Asset": Data,
|
"Unity3D Asset": Data,
|
||||||
|
@ -2,7 +2,7 @@ package slinguist
|
|||||||
|
|
||||||
// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/simple-linguist.v1/internal/code-generator
|
// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/simple-linguist.v1/internal/code-generator
|
||||||
// THIS FILE SHOULD NOT BE EDITED BY HAND
|
// THIS FILE SHOULD NOT BE EDITED BY HAND
|
||||||
// Extracted from github/linguist commit: dae33dc2b20cddc85d1300435c3be7118a7115a9
|
// Extracted from github/linguist commit: 60f864a138650dd17fafc94814be9ee2d3aaef8c
|
||||||
|
|
||||||
import "gopkg.in/toqueteos/substring.v1"
|
import "gopkg.in/toqueteos/substring.v1"
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user