Merge pull request #34 from mcarmonaa/improvement-detection

Improved detection and changed strategies' signature
This commit is contained in:
Santiago M. Mola 2017-06-15 13:17:22 +02:00 committed by GitHub
commit 91cee517c5
52 changed files with 280263 additions and 231165 deletions

View File

@ -2,15 +2,15 @@ package enry
import ( import (
"math" "math"
"sort"
"gopkg.in/src-d/enry.v1/internal/tokenizer" "gopkg.in/src-d/enry.v1/internal/tokenizer"
) )
// Classifier is the interface that contains the method Classify which is in charge to assign scores to the possibles candidates. // Classifier is the interface in charge to detect the possible languages of the given content based on a set of
// The scores must order the candidates so as the highest score be the most probably language of the content. The candidates is // candidates. Candidates is a map which can be used to assign weights to languages dynamically.
// a map which can be used to assign weights to languages dynamically.
type Classifier interface { type Classifier interface {
Classify(content []byte, candidates map[string]float64) map[string]float64 Classify(content []byte, candidates map[string]float64) (languages []string)
} }
type classifier struct { type classifier struct {
@ -19,7 +19,13 @@ type classifier struct {
tokensTotal float64 tokensTotal float64
} }
func (c *classifier) Classify(content []byte, candidates map[string]float64) map[string]float64 { type scoredLanguage struct {
language string
score float64
}
// Classify returns a sorted slice of possible languages sorted by decreasing language's probability
func (c *classifier) Classify(content []byte, candidates map[string]float64) []string {
if len(content) == 0 { if len(content) == 0 {
return nil return nil
} }
@ -31,18 +37,35 @@ func (c *classifier) Classify(content []byte, candidates map[string]float64) map
languages = make(map[string]float64, len(candidates)) languages = make(map[string]float64, len(candidates))
for candidate, weight := range candidates { for candidate, weight := range candidates {
if lang, ok := GetLanguageByAlias(candidate); ok { if lang, ok := GetLanguageByAlias(candidate); ok {
languages[lang] = weight candidate = lang
} }
languages[candidate] = weight
} }
} }
tokens := tokenizer.Tokenize(content) tokens := tokenizer.Tokenize(content)
scores := make(map[string]float64, len(languages)) scoredLangs := make([]*scoredLanguage, 0, len(languages))
for language := range languages { for language := range languages {
scores[language] = c.tokensLogProbability(tokens, language) + c.languagesLogProbabilities[language] scoredLang := &scoredLanguage{
language: language,
score: c.tokensLogProbability(tokens, language) + c.languagesLogProbabilities[language],
}
scoredLangs = append(scoredLangs, scoredLang)
} }
return scores return sortLanguagesByScore(scoredLangs)
}
func sortLanguagesByScore(scoredLangs []*scoredLanguage) []string {
sort.SliceStable(scoredLangs, func(i, j int) bool { return scoredLangs[j].score < scoredLangs[i].score })
sortedLanguages := make([]string, 0, len(scoredLangs))
for _, scoredLang := range scoredLangs {
sortedLanguages = append(sortedLanguages, scoredLang.language)
}
return sortedLanguages
} }
func (c *classifier) knownLangs() map[string]float64 { func (c *classifier) knownLangs() map[string]float64 {

422
common.go
View File

@ -1,96 +1,371 @@
package enry package enry
import ( import (
"math" "bufio"
"bytes"
"path/filepath" "path/filepath"
"regexp"
"strings" "strings"
) )
// OtherLanguage is used as a zero value when a function can not return a specific language. // OtherLanguage is used as a zero value when a function can not return a specific language.
const OtherLanguage = "Other" const OtherLanguage = ""
// Strategy type fix the signature for the functions that can be used as a strategy. // Strategy type fix the signature for the functions that can be used as a strategy.
type Strategy func(filename string, content []byte) (languages []string) type Strategy func(filename string, content []byte, candidates []string) (languages []string)
var strategies = []Strategy{ // DefaultStrategies is the strategies' sequence GetLanguage uses to detect languages.
var DefaultStrategies = []Strategy{
GetLanguagesByModeline, GetLanguagesByModeline,
GetLanguagesByFilename, GetLanguagesByFilename,
GetLanguagesByShebang, GetLanguagesByShebang,
GetLanguagesByExtension, GetLanguagesByExtension,
GetLanguagesByContent, GetLanguagesByContent,
GetLanguagesByClassifier,
} }
// GetLanguage applies a sequence of strategies based on the given filename and content // GetLanguage applies a sequence of strategies based on the given filename and content
// to find out the most probably language to return. // to find out the most probably language to return.
func GetLanguage(filename string, content []byte) string { func GetLanguage(filename string, content []byte) (language string) {
candidates := map[string]float64{} languages := GetLanguages(filename, content)
for _, strategy := range strategies { return firstLanguage(languages)
languages := strategy(filename, content) }
func firstLanguage(languages []string) string {
if len(languages) == 0 {
return OtherLanguage
}
return languages[0]
}
// GetLanguageByModeline returns detected language. If there are more than one possibles languages
// it returns the first language by alphabetically order and safe to false.
func GetLanguageByModeline(content []byte) (language string, safe bool) {
return getLanguageByStrategy(GetLanguagesByModeline, "", content, nil)
}
// GetLanguageByEmacsModeline returns detected language. If there are more than one possibles languages
// it returns the first language by alphabetically order and safe to false.
func GetLanguageByEmacsModeline(content []byte) (language string, safe bool) {
return getLanguageByStrategy(GetLanguagesByEmacsModeline, "", content, nil)
}
// GetLanguageByVimModeline returns detected language. If there are more than one possibles languages
// it returns the first language by alphabetically order and safe to false.
func GetLanguageByVimModeline(content []byte) (language string, safe bool) {
return getLanguageByStrategy(GetLanguagesByVimModeline, "", content, nil)
}
// GetLanguageByFilename returns detected language. If there are more than one possibles languages
// it returns the first language by alphabetically order and safe to false.
func GetLanguageByFilename(filename string) (language string, safe bool) {
return getLanguageByStrategy(GetLanguagesByFilename, filename, nil, nil)
}
// GetLanguageByShebang returns detected language. If there are more than one possibles languages
// it returns the first language by alphabetically order and safe to false.
func GetLanguageByShebang(content []byte) (language string, safe bool) {
return getLanguageByStrategy(GetLanguagesByShebang, "", content, nil)
}
// GetLanguageByExtension returns detected language. If there are more than one possibles languages
// it returns the first language by alphabetically order and safe to false.
func GetLanguageByExtension(filename string) (language string, safe bool) {
return getLanguageByStrategy(GetLanguagesByExtension, filename, nil, nil)
}
// GetLanguageByContent returns detected language. If there are more than one possibles languages
// it returns the first language by alphabetically order and safe to false.
func GetLanguageByContent(content []byte) (language string, safe bool) {
return getLanguageByStrategy(GetLanguagesByContent, "", content, nil)
}
// GetLanguageByClassifier returns the most probably language detected for the given content. It uses
// DefaultClassifier, if no candidates are provided it returns OtherLanguage.
func GetLanguageByClassifier(content []byte, candidates []string) (language string, safe bool) {
return getLanguageByStrategy(GetLanguagesByClassifier, "", content, candidates)
}
func getLanguageByStrategy(strategy Strategy, filename string, content []byte, candidates []string) (string, bool) {
languages := strategy(filename, content, candidates)
return getFirstLanguageAndSafe(languages)
}
func getFirstLanguageAndSafe(languages []string) (language string, safe bool) {
language = firstLanguage(languages)
safe = len(languages) == 1
return
}
// GetLanguageBySpecificClassifier returns the most probably language for the given content using
// classifier to detect language.
func GetLanguageBySpecificClassifier(content []byte, candidates []string, classifier Classifier) (language string, safe bool) {
languages := GetLanguagesBySpecificClassifier(content, candidates, classifier)
return getFirstLanguageAndSafe(languages)
}
// GetLanguages applies a sequence of strategies based on the given filename and content
// to find out the most probably languages to return.
func GetLanguages(filename string, content []byte) []string {
var languages []string
candidates := []string{}
for _, strategy := range DefaultStrategies {
languages = strategy(filename, content, candidates)
if len(languages) == 1 { if len(languages) == 1 {
return languages[0] return languages
} }
if len(languages) > 0 { if len(languages) > 0 {
for _, language := range languages { candidates = append(candidates, languages...)
candidates[language]++ }
}
return languages
}
// GetLanguagesByModeline returns a slice of possible languages for the given content, filename will be ignored.
// It is comply with the signature to be a Strategy type.
func GetLanguagesByModeline(filename string, content []byte, candidates []string) []string {
headFoot := getHeaderAndFooter(content)
var languages []string
for _, getLang := range modelinesFunc {
languages = getLang("", headFoot, candidates)
if len(languages) > 0 {
break
}
}
return languages
}
var modelinesFunc = []Strategy{
GetLanguagesByEmacsModeline,
GetLanguagesByVimModeline,
}
func getHeaderAndFooter(content []byte) []byte {
const searchScope = 5
if bytes.Count(content, []byte("\n")) < 2*searchScope {
return content
}
header := headScope(content, searchScope)
footer := footScope(content, searchScope)
headerAndFooter := make([]byte, 0, len(content[:header])+len(content[footer:]))
headerAndFooter = append(headerAndFooter, content[:header]...)
headerAndFooter = append(headerAndFooter, content[footer:]...)
return headerAndFooter
}
func headScope(content []byte, scope int) (index int) {
for i := 0; i < scope; i++ {
eol := bytes.IndexAny(content, "\n")
content = content[eol+1:]
index += eol
}
return index + scope - 1
}
func footScope(content []byte, scope int) (index int) {
for i := 0; i < scope; i++ {
index = bytes.LastIndexAny(content, "\n")
content = content[:index]
}
return index + 1
}
var (
reEmacsModeline = regexp.MustCompile(`.*-\*-\s*(.+?)\s*-\*-.*(?m:$)`)
reEmacsLang = regexp.MustCompile(`.*(?i:mode)\s*:\s*([^\s;]+)\s*;*.*`)
reVimModeline = regexp.MustCompile(`(?:(?m:\s|^)vi(?:m[<=>]?\d+|m)?|[\t\x20]*ex)\s*[:]\s*(.*)(?m:$)`)
reVimLang = regexp.MustCompile(`(?i:filetype|ft|syntax)\s*=(\w+)(?:\s|:|$)`)
)
// GetLanguagesByEmacsModeline returns a slice of possible languages for the given content, filename and candidates
// will be ignored. It is comply with the signature to be a Strategy type.
func GetLanguagesByEmacsModeline(filename string, content []byte, candidates []string) []string {
matched := reEmacsModeline.FindAllSubmatch(content, -1)
if matched == nil {
return nil
}
// only take the last matched line, discard previous lines
lastLineMatched := matched[len(matched)-1][1]
matchedAlias := reEmacsLang.FindSubmatch(lastLineMatched)
var alias string
if matchedAlias != nil {
alias = string(matchedAlias[1])
} else {
alias = string(lastLineMatched)
}
language, ok := GetLanguageByAlias(alias)
if !ok {
return nil
}
return []string{language}
}
// GetLanguagesByVimModeline returns a slice of possible languages for the given content, filename and candidates
// will be ignored. It is comply with the signature to be a Strategy type.
func GetLanguagesByVimModeline(filename string, content []byte, candidates []string) []string {
matched := reVimModeline.FindAllSubmatch(content, -1)
if matched == nil {
return nil
}
// only take the last matched line, discard previous lines
lastLineMatched := matched[len(matched)-1][1]
matchedAlias := reVimLang.FindAllSubmatch(lastLineMatched, -1)
if matchedAlias == nil {
return nil
}
alias := string(matchedAlias[0][1])
if len(matchedAlias) > 1 {
// cases:
// matchedAlias = [["syntax=ruby " "ruby"] ["ft=python " "python"] ["filetype=perl " "perl"]] returns OtherLanguage;
// matchedAlias = [["syntax=python " "python"] ["ft=python " "python"] ["filetype=python " "python"]] returns "Python";
for _, match := range matchedAlias {
otherAlias := string(match[1])
if otherAlias != alias {
return nil
} }
} }
} }
if len(candidates) == 0 { language, ok := GetLanguageByAlias(alias)
return OtherLanguage if !ok {
return nil
} }
lang := GetLanguageByClassifier(content, candidates, nil) return []string{language}
return lang
} }
// GetLanguageByModeline returns the language of the given content looking for the modeline, // GetLanguagesByFilename returns a slice of possible languages for the given filename, content and candidates
// and safe to indicate the sureness of returned language. // will be ignored. It is comply with the signature to be a Strategy type.
func GetLanguageByModeline(content []byte) (lang string, safe bool) { func GetLanguagesByFilename(filename string, content []byte, candidates []string) []string {
return getLangAndSafe("", content, GetLanguagesByModeline) return languagesByFilename[filepath.Base(filename)]
} }
// GetLanguageByFilename returns a language based on the given filename, and safe to indicate // GetLanguagesByShebang returns a slice of possible languages for the given content, filename and candidates
// the sureness of returned language. // will be ignored. It is comply with the signature to be a Strategy type.
func GetLanguageByFilename(filename string) (lang string, safe bool) { func GetLanguagesByShebang(filename string, content []byte, candidates []string) (languages []string) {
return getLangAndSafe(filename, nil, GetLanguagesByFilename) interpreter := getInterpreter(content)
return languagesByInterpreter[interpreter]
} }
// GetLanguagesByFilename returns a slice of possible languages for the given filename, content will be ignored. var (
// It accomplish the signature to be a Strategy type. shebangExecHack = regexp.MustCompile(`exec (\w+).+\$0.+\$@`)
func GetLanguagesByFilename(filename string, content []byte) []string { pythonVersion = regexp.MustCompile(`python\d\.\d+`)
return languagesByFilename[filename] )
func getInterpreter(data []byte) (interpreter string) {
line := getFirstLine(data)
if !hasShebang(line) {
return ""
}
// skip shebang
line = bytes.TrimSpace(line[2:])
splitted := bytes.Fields(line)
if bytes.Contains(splitted[0], []byte("env")) {
if len(splitted) > 1 {
interpreter = string(splitted[1])
}
} else {
splittedPath := bytes.Split(splitted[0], []byte{'/'})
interpreter = string(splittedPath[len(splittedPath)-1])
}
if interpreter == "sh" {
interpreter = lookForMultilineExec(data)
}
if pythonVersion.MatchString(interpreter) {
interpreter = interpreter[:strings.Index(interpreter, `.`)]
}
return
} }
// GetLanguageByShebang returns the language of the given content looking for the shebang line, func getFirstLine(data []byte) []byte {
// and safe to indicate the sureness of returned language. buf := bufio.NewScanner(bytes.NewReader(data))
func GetLanguageByShebang(content []byte) (lang string, safe bool) { buf.Scan()
return getLangAndSafe("", content, GetLanguagesByShebang) line := buf.Bytes()
if err := buf.Err(); err != nil {
return nil
}
return line
} }
// GetLanguageByExtension returns a language based on the given filename, and safe to indicate func hasShebang(line []byte) bool {
// the sureness of returned language. const shebang = `#!`
func GetLanguageByExtension(filename string) (lang string, safe bool) { prefix := []byte(shebang)
return getLangAndSafe(filename, nil, GetLanguagesByExtension) return bytes.HasPrefix(line, prefix)
} }
// GetLanguagesByExtension returns a slice of possible languages for the given filename, content will be ignored. func lookForMultilineExec(data []byte) string {
// It accomplish the signature to be a Strategy type. const magicNumOfLines = 5
func GetLanguagesByExtension(filename string, content []byte) []string { interpreter := "sh"
ext := strings.ToLower(filepath.Ext(filename))
return languagesByExtension[ext] buf := bufio.NewScanner(bytes.NewReader(data))
for i := 0; i < magicNumOfLines && buf.Scan(); i++ {
line := buf.Bytes()
if shebangExecHack.Match(line) {
interpreter = shebangExecHack.FindStringSubmatch(string(line))[1]
break
}
}
if err := buf.Err(); err != nil {
return interpreter
}
return interpreter
} }
// GetLanguageByContent returns a language based on the filename and heuristics applies to the content, // GetLanguagesByExtension returns a slice of possible languages for the given filename, content and candidates
// and safe to indicate the sureness of returned language. // will be ignored. It is comply with the signature to be a Strategy type.
func GetLanguageByContent(filename string, content []byte) (lang string, safe bool) { func GetLanguagesByExtension(filename string, content []byte, candidates []string) []string {
return getLangAndSafe(filename, content, GetLanguagesByContent) if !strings.Contains(filename, ".") {
return nil
}
filename = strings.ToLower(filename)
dots := getDotIndexes(filename)
for _, dot := range dots {
ext := filename[dot:]
languages, ok := languagesByExtension[ext]
if ok {
return languages
}
}
return nil
} }
// GetLanguagesByContent returns a slice of possible languages for the given content, filename will be ignored. func getDotIndexes(filename string) []int {
// It accomplish the signature to be a Strategy type. dots := make([]int, 0, 2)
func GetLanguagesByContent(filename string, content []byte) []string { for i, letter := range filename {
if letter == rune('.') {
dots = append(dots, i)
}
}
return dots
}
// GetLanguagesByContent returns a slice of possible languages for the given content, filename and candidates
// will be ignored. It is comply with the signature to be a Strategy type.
func GetLanguagesByContent(filename string, content []byte, candidates []string) []string {
ext := strings.ToLower(filepath.Ext(filename)) ext := strings.ToLower(filepath.Ext(filename))
fnMatcher, ok := contentMatchers[ext] fnMatcher, ok := contentMatchers[ext]
if !ok { if !ok {
@ -100,51 +375,24 @@ func GetLanguagesByContent(filename string, content []byte) []string {
return fnMatcher(content) return fnMatcher(content)
} }
func getLangAndSafe(filename string, content []byte, getLanguageByStrategy Strategy) (lang string, safe bool) { // GetLanguagesByClassifier uses DefaultClassifier as a Classifier and returns a sorted slice of possible languages ordered by
languages := getLanguageByStrategy(filename, content) // decreasing language's probability. If there are not candidates it returns nil. It is comply with the signature to be a Strategy type.
if len(languages) == 0 { func GetLanguagesByClassifier(filename string, content []byte, candidates []string) (languages []string) {
lang = OtherLanguage if len(candidates) == 0 {
return return nil
} }
lang = languages[0] return GetLanguagesBySpecificClassifier(content, candidates, DefaultClassifier)
safe = len(languages) == 1
return
} }
// GetLanguageByClassifier takes in a content and a list of candidates, and apply the classifier's Classify method to // GetLanguagesBySpecificClassifier returns a slice of possible languages. It takes in a Classifier to be used.
// get the most probably language. If classifier is null then DefaultClassfier will be used. If there aren't candidates func GetLanguagesBySpecificClassifier(content []byte, candidates []string, classifier Classifier) (languages []string) {
// OtherLanguage is returned. mapCandidates := make(map[string]float64)
func GetLanguageByClassifier(content []byte, candidates map[string]float64, classifier Classifier) string { for _, candidate := range candidates {
scores := GetLanguagesByClassifier(content, candidates, classifier) mapCandidates[candidate]++
if len(scores) == 0 {
return OtherLanguage
} }
return getLangugeHigherScore(scores) return classifier.Classify(content, mapCandidates)
}
func getLangugeHigherScore(scores map[string]float64) string {
var language string
higher := -math.MaxFloat64
for lang, score := range scores {
if higher < score {
language = lang
higher = score
}
}
return language
}
// GetLanguagesByClassifier returns a map of possible languages as keys and a score as value based on content and candidates. The values can be ordered
// with the highest value as the most probably language. If classifier is null then DefaultClassfier will be used.
func GetLanguagesByClassifier(content []byte, candidates map[string]float64, classifier Classifier) map[string]float64 {
if classifier == nil {
classifier = DefaultClassifier
}
return classifier.Classify(content, candidates)
} }
// GetLanguageExtensions returns the different extensions being used by the language. // GetLanguageExtensions returns the different extensions being used by the language.
@ -164,7 +412,7 @@ const (
Prose Prose
) )
// GetLanguageType returns the given language's type. // GetLanguageType returns the type of the given language.
func GetLanguageType(language string) (langType Type) { func GetLanguageType(language string) (langType Type) {
langType, ok := languagesType[language] langType, ok := languagesType[language]
if !ok { if !ok {

View File

@ -6,7 +6,6 @@ import (
"os" "os"
"path/filepath" "path/filepath"
"testing" "testing"
"text/tabwriter"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/suite" "github.com/stretchr/testify/suite"
@ -26,6 +25,7 @@ func (s *SimpleLinguistTestSuite) TestGetLanguage() {
filename string filename string
content []byte content []byte
expected string expected string
safe bool
}{ }{
{name: "TestGetLanguage_1", filename: "foo.py", content: []byte{}, expected: "Python"}, {name: "TestGetLanguage_1", filename: "foo.py", content: []byte{}, expected: "Python"},
{name: "TestGetLanguage_2", filename: "foo.m", content: []byte(":- module"), expected: "Mercury"}, {name: "TestGetLanguage_2", filename: "foo.m", content: []byte(":- module"), expected: "Mercury"},
@ -38,65 +38,64 @@ func (s *SimpleLinguistTestSuite) TestGetLanguage() {
} }
} }
func (s *SimpleLinguistTestSuite) TestGetLanguageByModelineLinguist() { func (s *SimpleLinguistTestSuite) TestGetLanguagesByModelineLinguist() {
const ( const (
modelinesDir = ".linguist/test/fixtures/Data/Modelines" modelinesDir = ".linguist/test/fixtures/Data/Modelines"
samplesDir = ".linguist/samples" samplesDir = ".linguist/samples"
) )
tests := []struct { tests := []struct {
name string name string
filename string filename string
expectedLang string candidates []string
expectedSafe bool expected []string
}{ }{
// Emacs // Emacs
{name: "TestGetLanguageByModelineLinguist_1", filename: filepath.Join(modelinesDir, "example_smalltalk.md"), expectedLang: "Smalltalk", expectedSafe: true}, {name: "TestGetLanguagesByModelineLinguist_1", filename: filepath.Join(modelinesDir, "example_smalltalk.md"), expected: []string{"Smalltalk"}},
{name: "TestGetLanguageByModelineLinguist_2", filename: filepath.Join(modelinesDir, "fundamentalEmacs.c"), expectedLang: "Text", expectedSafe: true}, {name: "TestGetLanguagesByModelineLinguist_2", filename: filepath.Join(modelinesDir, "fundamentalEmacs.c"), expected: []string{"Text"}},
{name: "TestGetLanguageByModelineLinguist_3", filename: filepath.Join(modelinesDir, "iamphp.inc"), expectedLang: "PHP", expectedSafe: true}, {name: "TestGetLanguagesByModelineLinguist_3", filename: filepath.Join(modelinesDir, "iamphp.inc"), expected: []string{"PHP"}},
{name: "TestGetLanguageByModelineLinguist_4", filename: filepath.Join(modelinesDir, "seeplusplusEmacs1"), expectedLang: "C++", expectedSafe: true}, {name: "TestGetLanguagesByModelineLinguist_4", filename: filepath.Join(modelinesDir, "seeplusplusEmacs1"), expected: []string{"C++"}},
{name: "TestGetLanguageByModelineLinguist_5", filename: filepath.Join(modelinesDir, "seeplusplusEmacs2"), expectedLang: "C++", expectedSafe: true}, {name: "TestGetLanguagesByModelineLinguist_5", filename: filepath.Join(modelinesDir, "seeplusplusEmacs2"), expected: []string{"C++"}},
{name: "TestGetLanguageByModelineLinguist_6", filename: filepath.Join(modelinesDir, "seeplusplusEmacs3"), expectedLang: "C++", expectedSafe: true}, {name: "TestGetLanguagesByModelineLinguist_6", filename: filepath.Join(modelinesDir, "seeplusplusEmacs3"), expected: []string{"C++"}},
{name: "TestGetLanguageByModelineLinguist_7", filename: filepath.Join(modelinesDir, "seeplusplusEmacs4"), expectedLang: "C++", expectedSafe: true}, {name: "TestGetLanguagesByModelineLinguist_7", filename: filepath.Join(modelinesDir, "seeplusplusEmacs4"), expected: []string{"C++"}},
{name: "TestGetLanguageByModelineLinguist_8", filename: filepath.Join(modelinesDir, "seeplusplusEmacs5"), expectedLang: "C++", expectedSafe: true}, {name: "TestGetLanguagesByModelineLinguist_8", filename: filepath.Join(modelinesDir, "seeplusplusEmacs5"), expected: []string{"C++"}},
{name: "TestGetLanguageByModelineLinguist_9", filename: filepath.Join(modelinesDir, "seeplusplusEmacs6"), expectedLang: "C++", expectedSafe: true}, {name: "TestGetLanguagesByModelineLinguist_9", filename: filepath.Join(modelinesDir, "seeplusplusEmacs6"), expected: []string{"C++"}},
{name: "TestGetLanguageByModelineLinguist_10", filename: filepath.Join(modelinesDir, "seeplusplusEmacs7"), expectedLang: "C++", expectedSafe: true}, {name: "TestGetLanguagesByModelineLinguist_10", filename: filepath.Join(modelinesDir, "seeplusplusEmacs7"), expected: []string{"C++"}},
{name: "TestGetLanguageByModelineLinguist_11", filename: filepath.Join(modelinesDir, "seeplusplusEmacs9"), expectedLang: "C++", expectedSafe: true}, {name: "TestGetLanguagesByModelineLinguist_11", filename: filepath.Join(modelinesDir, "seeplusplusEmacs9"), expected: []string{"C++"}},
{name: "TestGetLanguageByModelineLinguist_12", filename: filepath.Join(modelinesDir, "seeplusplusEmacs10"), expectedLang: "C++", expectedSafe: true}, {name: "TestGetLanguagesByModelineLinguist_12", filename: filepath.Join(modelinesDir, "seeplusplusEmacs10"), expected: []string{"C++"}},
{name: "TestGetLanguageByModelineLinguist_13", filename: filepath.Join(modelinesDir, "seeplusplusEmacs11"), expectedLang: "C++", expectedSafe: true}, {name: "TestGetLanguagesByModelineLinguist_13", filename: filepath.Join(modelinesDir, "seeplusplusEmacs11"), expected: []string{"C++"}},
{name: "TestGetLanguageByModelineLinguist_14", filename: filepath.Join(modelinesDir, "seeplusplusEmacs12"), expectedLang: "C++", expectedSafe: true}, {name: "TestGetLanguagesByModelineLinguist_14", filename: filepath.Join(modelinesDir, "seeplusplusEmacs12"), expected: []string{"C++"}},
// Vim // Vim
{name: "TestGetLanguageByModelineLinguist_15", filename: filepath.Join(modelinesDir, "seeplusplus"), expectedLang: "C++", expectedSafe: true}, {name: "TestGetLanguagesByModelineLinguist_15", filename: filepath.Join(modelinesDir, "seeplusplus"), expected: []string{"C++"}},
{name: "TestGetLanguageByModelineLinguist_16", filename: filepath.Join(modelinesDir, "iamjs.pl"), expectedLang: "JavaScript", expectedSafe: true}, {name: "TestGetLanguagesByModelineLinguist_16", filename: filepath.Join(modelinesDir, "iamjs.pl"), expected: []string{"JavaScript"}},
{name: "TestGetLanguageByModelineLinguist_17", filename: filepath.Join(modelinesDir, "iamjs2.pl"), expectedLang: "JavaScript", expectedSafe: true}, {name: "TestGetLanguagesByModelineLinguist_17", filename: filepath.Join(modelinesDir, "iamjs2.pl"), expected: []string{"JavaScript"}},
{name: "TestGetLanguageByModelineLinguist_18", filename: filepath.Join(modelinesDir, "not_perl.pl"), expectedLang: "Prolog", expectedSafe: true}, {name: "TestGetLanguagesByModelineLinguist_18", filename: filepath.Join(modelinesDir, "not_perl.pl"), expected: []string{"Prolog"}},
{name: "TestGetLanguageByModelineLinguist_19", filename: filepath.Join(modelinesDir, "ruby"), expectedLang: "Ruby", expectedSafe: true}, {name: "TestGetLanguagesByModelineLinguist_19", filename: filepath.Join(modelinesDir, "ruby"), expected: []string{"Ruby"}},
{name: "TestGetLanguageByModelineLinguist_20", filename: filepath.Join(modelinesDir, "ruby2"), expectedLang: "Ruby", expectedSafe: true}, {name: "TestGetLanguagesByModelineLinguist_20", filename: filepath.Join(modelinesDir, "ruby2"), expected: []string{"Ruby"}},
{name: "TestGetLanguageByModelineLinguist_21", filename: filepath.Join(modelinesDir, "ruby3"), expectedLang: "Ruby", expectedSafe: true}, {name: "TestGetLanguagesByModelineLinguist_21", filename: filepath.Join(modelinesDir, "ruby3"), expected: []string{"Ruby"}},
{name: "TestGetLanguageByModelineLinguist_22", filename: filepath.Join(modelinesDir, "ruby4"), expectedLang: "Ruby", expectedSafe: true}, {name: "TestGetLanguagesByModelineLinguist_22", filename: filepath.Join(modelinesDir, "ruby4"), expected: []string{"Ruby"}},
{name: "TestGetLanguageByModelineLinguist_23", filename: filepath.Join(modelinesDir, "ruby5"), expectedLang: "Ruby", expectedSafe: true}, {name: "TestGetLanguagesByModelineLinguist_23", filename: filepath.Join(modelinesDir, "ruby5"), expected: []string{"Ruby"}},
{name: "TestGetLanguageByModelineLinguist_24", filename: filepath.Join(modelinesDir, "ruby6"), expectedLang: "Ruby", expectedSafe: true}, {name: "TestGetLanguagesByModelineLinguist_24", filename: filepath.Join(modelinesDir, "ruby6"), expected: []string{"Ruby"}},
{name: "TestGetLanguageByModelineLinguist_25", filename: filepath.Join(modelinesDir, "ruby7"), expectedLang: "Ruby", expectedSafe: true}, {name: "TestGetLanguagesByModelineLinguist_25", filename: filepath.Join(modelinesDir, "ruby7"), expected: []string{"Ruby"}},
{name: "TestGetLanguageByModelineLinguist_26", filename: filepath.Join(modelinesDir, "ruby8"), expectedLang: "Ruby", expectedSafe: true}, {name: "TestGetLanguagesByModelineLinguist_26", filename: filepath.Join(modelinesDir, "ruby8"), expected: []string{"Ruby"}},
{name: "TestGetLanguageByModelineLinguist_27", filename: filepath.Join(modelinesDir, "ruby9"), expectedLang: "Ruby", expectedSafe: true}, {name: "TestGetLanguagesByModelineLinguist_27", filename: filepath.Join(modelinesDir, "ruby9"), expected: []string{"Ruby"}},
{name: "TestGetLanguageByModelineLinguist_28", filename: filepath.Join(modelinesDir, "ruby10"), expectedLang: "Ruby", expectedSafe: true}, {name: "TestGetLanguagesByModelineLinguist_28", filename: filepath.Join(modelinesDir, "ruby10"), expected: []string{"Ruby"}},
{name: "TestGetLanguageByModelineLinguist_29", filename: filepath.Join(modelinesDir, "ruby11"), expectedLang: "Ruby", expectedSafe: true}, {name: "TestGetLanguagesByModelineLinguist_29", filename: filepath.Join(modelinesDir, "ruby11"), expected: []string{"Ruby"}},
{name: "TestGetLanguageByModelineLinguist_30", filename: filepath.Join(modelinesDir, "ruby12"), expectedLang: "Ruby", expectedSafe: true}, {name: "TestGetLanguagesByModelineLinguist_30", filename: filepath.Join(modelinesDir, "ruby12"), expected: []string{"Ruby"}},
{name: "TestGetLanguageByModelineLinguist_31", filename: filepath.Join(samplesDir, "C/main.c"), expectedLang: OtherLanguage, expectedSafe: false}, {name: "TestGetLanguagesByModelineLinguist_31", filename: filepath.Join(samplesDir, "C/main.c"), expected: nil},
} }
for _, test := range tests { for _, test := range tests {
content, err := ioutil.ReadFile(test.filename) content, err := ioutil.ReadFile(test.filename)
assert.NoError(s.T(), err) assert.NoError(s.T(), err)
lang, safe := GetLanguageByModeline(content) languages := GetLanguagesByModeline(test.filename, content, test.candidates)
assert.Equal(s.T(), test.expectedLang, lang, fmt.Sprintf("%v: lang = %v, expected: %v", test.name, lang, test.expectedLang)) assert.Equal(s.T(), test.expected, languages, fmt.Sprintf("%v: languages = %v, expected: %v", test.name, languages, test.expected))
assert.Equal(s.T(), test.expectedSafe, safe, fmt.Sprintf("%v: safe = %v, expected: %v", test.name, safe, test.expectedSafe))
} }
} }
func (s *SimpleLinguistTestSuite) TestGetLanguageByModeline() { func (s *SimpleLinguistTestSuite) TestGetLanguagesByModeline() {
const ( const (
wrongVim = `# vim: set syntax=ruby ft =python filetype=perl :` wrongVim = `# vim: set syntax=ruby ft =python filetype=perl :`
rightVim = `/* vim: set syntax=python ft =python filetype=python */` rightVim = `/* vim: set syntax=python ft =python filetype=python */`
@ -104,48 +103,48 @@ func (s *SimpleLinguistTestSuite) TestGetLanguageByModeline() {
) )
tests := []struct { tests := []struct {
name string name string
content []byte filename string
expectedLang string content []byte
expectedSafe bool candidates []string
expected []string
}{ }{
{name: "TestGetLanguageByModeline_1", content: []byte(wrongVim), expectedLang: OtherLanguage, expectedSafe: false}, {name: "TestGetLanguagesByModeline_1", content: []byte(wrongVim), expected: nil},
{name: "TestGetLanguageByModeline_2", content: []byte(rightVim), expectedLang: "Python", expectedSafe: true}, {name: "TestGetLanguagesByModeline_2", content: []byte(rightVim), expected: []string{"Python"}},
{name: "TestGetLanguageByModeline_3", content: []byte(noLangVim), expectedLang: OtherLanguage, expectedSafe: false}, {name: "TestGetLanguagesByModeline_3", content: []byte(noLangVim), expected: nil},
} }
for _, test := range tests { for _, test := range tests {
lang, safe := GetLanguageByModeline(test.content) languages := GetLanguagesByModeline(test.filename, test.content, test.candidates)
assert.Equal(s.T(), test.expectedLang, lang, fmt.Sprintf("%v: lang = %v, expected: %v", test.name, lang, test.expectedLang)) assert.Equal(s.T(), test.expected, languages, fmt.Sprintf("%v: languages = %v, expected: %v", test.name, languages, test.expected))
assert.Equal(s.T(), test.expectedSafe, safe, fmt.Sprintf("%v: safe = %v, expected: %v", test.name, safe, test.expectedSafe))
} }
} }
func (s *SimpleLinguistTestSuite) TestGetLanguageByFilename() { func (s *SimpleLinguistTestSuite) TestGetLanguagesByFilename() {
tests := []struct { tests := []struct {
name string name string
filename string filename string
expectedLang string content []byte
expectedSafe bool candidates []string
expected []string
}{ }{
{name: "TestGetLanguageByFilename_1", filename: "unknown.interpreter", expectedLang: OtherLanguage, expectedSafe: false}, {name: "TestGetLanguagesByFilename_1", filename: "unknown.interpreter", expected: nil},
{name: "TestGetLanguageByFilename_2", filename: ".bashrc", expectedLang: "Shell", expectedSafe: true}, {name: "TestGetLanguagesByFilename_2", filename: ".bashrc", expected: []string{"Shell"}},
{name: "TestGetLanguageByFilename_3", filename: "Dockerfile", expectedLang: "Dockerfile", expectedSafe: true}, {name: "TestGetLanguagesByFilename_3", filename: "Dockerfile", expected: []string{"Dockerfile"}},
{name: "TestGetLanguageByFilename_4", filename: "Makefile.frag", expectedLang: "Makefile", expectedSafe: true}, {name: "TestGetLanguagesByFilename_4", filename: "Makefile.frag", expected: []string{"Makefile"}},
{name: "TestGetLanguageByFilename_5", filename: "makefile", expectedLang: "Makefile", expectedSafe: true}, {name: "TestGetLanguagesByFilename_5", filename: "makefile", expected: []string{"Makefile"}},
{name: "TestGetLanguageByFilename_6", filename: "Vagrantfile", expectedLang: "Ruby", expectedSafe: true}, {name: "TestGetLanguagesByFilename_6", filename: "Vagrantfile", expected: []string{"Ruby"}},
{name: "TestGetLanguageByFilename_7", filename: "_vimrc", expectedLang: "Vim script", expectedSafe: true}, {name: "TestGetLanguagesByFilename_7", filename: "_vimrc", expected: []string{"Vim script"}},
{name: "TestGetLanguageByFilename_8", filename: "pom.xml", expectedLang: "Maven POM", expectedSafe: true}, {name: "TestGetLanguagesByFilename_8", filename: "pom.xml", expected: []string{"Maven POM"}},
} }
for _, test := range tests { for _, test := range tests {
lang, safe := GetLanguageByFilename(test.filename) languages := GetLanguagesByFilename(test.filename, test.content, test.candidates)
assert.Equal(s.T(), test.expectedLang, lang, fmt.Sprintf("%v: lang = %v, expected: %v", test.name, lang, test.expectedLang)) assert.Equal(s.T(), test.expected, languages, fmt.Sprintf("%v: languages = %v, expected: %v", test.name, languages, test.expected))
assert.Equal(s.T(), test.expectedSafe, safe, fmt.Sprintf("%v: safe = %v, expected: %v", test.name, safe, test.expectedSafe))
} }
} }
func (s *SimpleLinguistTestSuite) TestGetLanguageByShebang() { func (s *SimpleLinguistTestSuite) TestGetLanguagesByShebang() {
const ( const (
multilineExecHack = `#!/bin/sh multilineExecHack = `#!/bin/sh
# Next line is comment in Tcl, but not in sh... \ # Next line is comment in Tcl, but not in sh... \
@ -162,125 +161,112 @@ println("The shell script says ",vm.arglist.concat(" "));`
) )
tests := []struct { tests := []struct {
name string name string
content []byte filename string
expectedLang string content []byte
expectedSafe bool candidates []string
expected []string
}{ }{
{name: "TestGetLanguageByShebang_1", content: []byte(`#!/unknown/interpreter`), expectedLang: OtherLanguage, expectedSafe: false}, {name: "TestGetLanguagesByShebang_1", content: []byte(`#!/unknown/interpreter`), expected: nil},
{name: "TestGetLanguageByShebang_2", content: []byte(`no shebang`), expectedLang: OtherLanguage, expectedSafe: false}, {name: "TestGetLanguagesByShebang_2", content: []byte(`no shebang`), expected: nil},
{name: "TestGetLanguageByShebang_3", content: []byte(`#!/usr/bin/env`), expectedLang: OtherLanguage, expectedSafe: false}, {name: "TestGetLanguagesByShebang_3", content: []byte(`#!/usr/bin/env`), expected: nil},
{name: "TestGetLanguageByShebang_4", content: []byte(`#!/usr/bin/python -tt`), expectedLang: "Python", expectedSafe: true}, {name: "TestGetLanguagesByShebang_4", content: []byte(`#!/usr/bin/python -tt`), expected: []string{"Python"}},
{name: "TestGetLanguageByShebang_5", content: []byte(`#!/usr/bin/env python2.6`), expectedLang: "Python", expectedSafe: true}, {name: "TestGetLanguagesByShebang_5", content: []byte(`#!/usr/bin/env python2.6`), expected: []string{"Python"}},
{name: "TestGetLanguageByShebang_6", content: []byte(`#!/usr/bin/env perl`), expectedLang: "Perl", expectedSafe: true}, {name: "TestGetLanguagesByShebang_6", content: []byte(`#!/usr/bin/env perl`), expected: []string{"Perl"}},
{name: "TestGetLanguageByShebang_7", content: []byte(`#! /bin/sh`), expectedLang: "Shell", expectedSafe: true}, {name: "TestGetLanguagesByShebang_7", content: []byte(`#! /bin/sh`), expected: []string{"Shell"}},
{name: "TestGetLanguageByShebang_8", content: []byte(`#!bash`), expectedLang: "Shell", expectedSafe: true}, {name: "TestGetLanguagesByShebang_8", content: []byte(`#!bash`), expected: []string{"Shell"}},
{name: "TestGetLanguageByShebang_9", content: []byte(multilineExecHack), expectedLang: "Tcl", expectedSafe: true}, {name: "TestGetLanguagesByShebang_9", content: []byte(multilineExecHack), expected: []string{"Tcl"}},
{name: "TestGetLanguageByShebang_10", content: []byte(multilineNoExecHack), expectedLang: "Shell", expectedSafe: true}, {name: "TestGetLanguagesByShebang_10", content: []byte(multilineNoExecHack), expected: []string{"Shell"}},
} }
for _, test := range tests { for _, test := range tests {
lang, safe := GetLanguageByShebang(test.content) languages := GetLanguagesByShebang(test.filename, test.content, test.candidates)
assert.Equal(s.T(), test.expectedLang, lang, fmt.Sprintf("%v: lang = %v, expected: %v", test.name, lang, test.expectedLang)) assert.Equal(s.T(), test.expected, languages, fmt.Sprintf("%v: languages = %v, expected: %v", test.name, languages, test.expected))
assert.Equal(s.T(), test.expectedSafe, safe, fmt.Sprintf("%v: safe = %v, expected: %v", test.name, safe, test.expectedSafe))
} }
} }
func (s *SimpleLinguistTestSuite) TestGetLanguageByExtension() { func (s *SimpleLinguistTestSuite) TestGetLanguagesByExtension() {
tests := []struct { tests := []struct {
name string name string
filename string filename string
expectedLang string content []byte
expectedSafe bool candidates []string
expected []string
}{ }{
{name: "TestGetLanguageByExtension_1", filename: "foo.foo", expectedLang: OtherLanguage, expectedSafe: false}, {name: "TestGetLanguagesByExtension_1", filename: "foo.foo", expected: nil},
{name: "TestGetLanguageByExtension_2", filename: "foo.go", expectedLang: "Go", expectedSafe: true}, {name: "TestGetLanguagesByExtension_2", filename: "foo.go", expected: []string{"Go"}},
{name: "TestGetLanguageByExtension_3", filename: "foo.go.php", expectedLang: "Hack", expectedSafe: false}, {name: "TestGetLanguagesByExtension_3", filename: "foo.go.php", expected: []string{"Hack", "PHP"}},
} }
for _, test := range tests { for _, test := range tests {
lang, safe := GetLanguageByExtension(test.filename) languages := GetLanguagesByExtension(test.filename, test.content, test.candidates)
assert.Equal(s.T(), test.expectedLang, lang, fmt.Sprintf("%v: lang = %v, expected: %v", test.name, lang, test.expectedLang)) assert.Equal(s.T(), test.expected, languages, fmt.Sprintf("%v: languages = %v, expected: %v", test.name, languages, test.expected))
assert.Equal(s.T(), test.expectedSafe, safe, fmt.Sprintf("%v: safe = %v, expected: %v", test.name, safe, test.expectedSafe))
} }
} }
func (s *SimpleLinguistTestSuite) TestGetLanguageByContentLinguistCorpus() { func (s *SimpleLinguistTestSuite) TestGetLanguagesByClassifier() {
var total, failed, ok, other, unsafe int
w := new(tabwriter.Writer)
w.Init(os.Stdout, 0, 8, 0, '\t', 0)
filepath.Walk(".linguist/samples", func(path string, f os.FileInfo, err error) error {
if f.IsDir() {
if f.Name() == "filenames" {
return filepath.SkipDir
}
return nil
}
expected := filepath.Base(filepath.Dir(path))
filename := filepath.Base(path)
extension := filepath.Ext(path)
content, _ := ioutil.ReadFile(path)
if extension == "" {
return nil
}
total++
obtained, safe := GetLanguageByContent(filename, content)
if obtained == OtherLanguage {
other++
}
var status string
if expected == obtained {
status = "ok"
ok++
} else {
status = "failed"
failed++
if !safe {
unsafe++
}
}
fmt.Fprintf(w, "%s\t%s\t%s\t%v\t%s\n", filename, expected, obtained, safe, status)
return nil
})
fmt.Fprintln(w)
w.Flush()
fmt.Printf("total files: %d, ok: %d, failed: %d, unsafe: %d, other: %d\n", total, ok, failed, unsafe, other)
}
func (s *SimpleLinguistTestSuite) TestGetLanguageByClassifier() {
const samples = `.linguist/samples/` const samples = `.linguist/samples/`
test := []struct { test := []struct {
name string name string
filename string filename string
candidates map[string]float64 candidates []string
expected string expected string
}{ }{
{name: "TestGetLanguageByClassifier_1", filename: filepath.Join(samples, "C/blob.c"), candidates: map[string]float64{"python": 1.00, "ruby": 1.00, "c": 1.00, "c++": 1.00}, expected: "C"}, {name: "TestGetLanguagesByClassifier_1", filename: filepath.Join(samples, "C/blob.c"), candidates: []string{"python", "ruby", "c", "c++"}, expected: "C"},
{name: "TestGetLanguageByClassifier_2", filename: filepath.Join(samples, "C/blob.c"), candidates: nil, expected: "C"}, {name: "TestGetLanguagesByClassifier_2", filename: filepath.Join(samples, "C/blob.c"), candidates: nil, expected: OtherLanguage},
{name: "TestGetLanguageByClassifier_3", filename: filepath.Join(samples, "C/main.c"), candidates: nil, expected: "C"}, {name: "TestGetLanguagesByClassifier_3", filename: filepath.Join(samples, "C/main.c"), candidates: []string{}, expected: OtherLanguage},
{name: "TestGetLanguageByClassifier_4", filename: filepath.Join(samples, "C/blob.c"), candidates: map[string]float64{"python": 1.00, "ruby": 1.00, "c++": 1.00}, expected: "C++"}, {name: "TestGetLanguagesByClassifier_4", filename: filepath.Join(samples, "C/blob.c"), candidates: []string{"python", "ruby", "c++"}, expected: "C++"},
{name: "TestGetLanguageByClassifier_5", filename: filepath.Join(samples, "C/blob.c"), candidates: map[string]float64{"ruby": 1.00}, expected: "Ruby"}, {name: "TestGetLanguagesByClassifier_5", filename: filepath.Join(samples, "C/blob.c"), candidates: []string{"ruby"}, expected: "Ruby"},
{name: "TestGetLanguageByClassifier_6", filename: filepath.Join(samples, "Python/django-models-base.py"), candidates: map[string]float64{"python": 1.00, "ruby": 1.00, "c": 1.00, "c++": 1.00}, expected: "Python"}, {name: "TestGetLanguagesByClassifier_6", filename: filepath.Join(samples, "Python/django-models-base.py"), candidates: []string{"python", "ruby", "c", "c++"}, expected: "Python"},
{name: "TestGetLanguageByClassifier_7", filename: filepath.Join(samples, "Python/django-models-base.py"), candidates: nil, expected: "Python"},
} }
for _, test := range test { for _, test := range test {
content, err := ioutil.ReadFile(test.filename) content, err := ioutil.ReadFile(test.filename)
assert.NoError(s.T(), err) assert.NoError(s.T(), err)
lang := GetLanguageByClassifier(content, test.candidates, nil) languages := GetLanguagesByClassifier(test.filename, content, test.candidates)
assert.Equal(s.T(), test.expected, lang, fmt.Sprintf("%v: lang = %v, expected: %v", test.name, lang, test.expected)) var language string
if len(languages) == 0 {
language = OtherLanguage
} else {
language = languages[0]
}
assert.Equal(s.T(), test.expected, language, fmt.Sprintf("%v: language = %v, expected: %v", test.name, language, test.expected))
}
}
func (s *SimpleLinguistTestSuite) TestGetLanguagesBySpecificClassifier() {
const samples = `.linguist/samples/`
test := []struct {
name string
filename string
candidates []string
classifier Classifier
expected string
}{
{name: "TestGetLanguagesByClassifier_1", filename: filepath.Join(samples, "C/blob.c"), candidates: []string{"python", "ruby", "c", "c++"}, classifier: DefaultClassifier, expected: "C"},
{name: "TestGetLanguagesByClassifier_2", filename: filepath.Join(samples, "C/blob.c"), candidates: nil, classifier: DefaultClassifier, expected: "C"},
{name: "TestGetLanguagesByClassifier_3", filename: filepath.Join(samples, "C/main.c"), candidates: []string{}, classifier: DefaultClassifier, expected: "C"},
{name: "TestGetLanguagesByClassifier_4", filename: filepath.Join(samples, "C/blob.c"), candidates: []string{"python", "ruby", "c++"}, classifier: DefaultClassifier, expected: "C++"},
{name: "TestGetLanguagesByClassifier_5", filename: filepath.Join(samples, "C/blob.c"), candidates: []string{"ruby"}, classifier: DefaultClassifier, expected: "Ruby"},
{name: "TestGetLanguagesByClassifier_6", filename: filepath.Join(samples, "Python/django-models-base.py"), candidates: []string{"python", "ruby", "c", "c++"}, classifier: DefaultClassifier, expected: "Python"},
{name: "TestGetLanguagesByClassifier_6", filename: os.DevNull, candidates: nil, classifier: DefaultClassifier, expected: OtherLanguage},
}
for _, test := range test {
content, err := ioutil.ReadFile(test.filename)
assert.NoError(s.T(), err)
languages := GetLanguagesBySpecificClassifier(content, test.candidates, test.classifier)
var language string
if len(languages) == 0 {
language = OtherLanguage
} else {
language = languages[0]
}
assert.Equal(s.T(), test.expected, language, fmt.Sprintf("%v: language = %v, expected: %v", test.name, language, test.expected))
} }
} }
@ -349,3 +335,56 @@ func (s *SimpleLinguistTestSuite) TestGetLanguageByAlias() {
assert.Equal(s.T(), test.expectedOk, ok, fmt.Sprintf("%v: ok = %v, expected: %v", test.name, ok, test.expectedOk)) assert.Equal(s.T(), test.expectedOk, ok, fmt.Sprintf("%v: ok = %v, expected: %v", test.name, ok, test.expectedOk))
} }
} }
func (s *SimpleLinguistTestSuite) TestLinguistCorpus() {
const (
samplesDir = ".linguist/samples"
filenamesDir = "filenames"
)
var cornerCases = map[string]bool{
"hello.ms": true,
}
var total, failed, ok, other int
var expected string
filepath.Walk(samplesDir, func(path string, f os.FileInfo, err error) error {
if f.IsDir() {
if f.Name() != filenamesDir {
expected = f.Name()
}
return nil
}
filename := filepath.Base(path)
content, _ := ioutil.ReadFile(path)
total++
obtained := GetLanguage(filename, content)
if obtained == OtherLanguage {
obtained = "Other"
other++
}
var status string
if expected == obtained {
status = "ok"
ok++
} else {
status = "failed"
failed++
}
if _, ok := cornerCases[filename]; ok {
fmt.Printf("\t\t[considered corner case] %s\texpected: %s\tobtained: %s\tstatus: %s\n", filename, expected, obtained, status)
} else {
assert.Equal(s.T(), expected, obtained, fmt.Sprintf("%s\texpected: %s\tobtained: %s\tstatus: %s\n", filename, expected, obtained, status))
}
return nil
})
fmt.Printf("\t\ttotal files: %d, ok: %d, failed: %d, other: %d\n", total, ok, failed, other)
}

View File

@ -507,7 +507,7 @@ var (
m_Matlab_Matcher_0 = regexp.MustCompile(`(?m)^\s*%`) m_Matlab_Matcher_0 = regexp.MustCompile(`(?m)^\s*%`)
m_Limbo_Matcher_0 = regexp.MustCompile(`(?m)^\w+\s*:\s*module\s*{`) m_Limbo_Matcher_0 = regexp.MustCompile(`(?m)^\w+\s*:\s*module\s*{`)
md_Markdown_Matcher_0 = regexp.MustCompile(`(?mi)(^[-a-z0-9=#!\*\[|>])|<\/`) md_Markdown_Matcher_0 = regexp.MustCompile(`(?mi)(^[-a-z0-9=#!\*\[|>])|<\/`)
md_Markdown_Matcher_1 = regexp.MustCompile(`(?m)^$`) md_Markdown_Matcher_1 = regexp.MustCompile(`^$`)
md_GCCMachineDescription_Matcher_0 = regexp.MustCompile(`(?m)^(;;|\(define_)`) md_GCCMachineDescription_Matcher_0 = regexp.MustCompile(`(?m)^(;;|\(define_)`)
ml_OCaml_Matcher_0 = regexp.MustCompile(`(?m)(^\s*module)|let rec |match\s+(\S+\s)+with`) ml_OCaml_Matcher_0 = regexp.MustCompile(`(?m)(^\s*module)|let rec |match\s+(\S+\s)+with`)
ml_StandardML_Matcher_0 = regexp.MustCompile(`(?m)=> |case\s+(\S+\s)+of`) ml_StandardML_Matcher_0 = regexp.MustCompile(`(?m)=> |case\s+(\S+\s)+of`)

View File

@ -28,9 +28,6 @@ var languagesByExtension = map[string][]string{
".8xp": {"TI Program"}, ".8xp": {"TI Program"},
".8xp.txt": {"TI Program"}, ".8xp.txt": {"TI Program"},
".9": {"Roff"}, ".9": {"Roff"},
".E": {"E"},
".JSON-tmLanguage": {"JSON"},
".ML": {"Standard ML"},
"._coffee": {"CoffeeScript"}, "._coffee": {"CoffeeScript"},
"._js": {"JavaScript"}, "._js": {"JavaScript"},
"._ls": {"LiveScript"}, "._ls": {"LiveScript"},
@ -228,7 +225,7 @@ var languagesByExtension = map[string][]string{
".dyalog": {"APL"}, ".dyalog": {"APL"},
".dyl": {"Dylan"}, ".dyl": {"Dylan"},
".dylan": {"Dylan"}, ".dylan": {"Dylan"},
".e": {"Eiffel"}, ".e": {"E", "Eiffel"},
".eam.fs": {"Formatted"}, ".eam.fs": {"Formatted"},
".ebnf": {"EBNF"}, ".ebnf": {"EBNF"},
".ebuild": {"Gentoo Ebuild"}, ".ebuild": {"Gentoo Ebuild"},
@ -415,6 +412,7 @@ var languagesByExtension = map[string][]string{
".jsfl": {"JavaScript"}, ".jsfl": {"JavaScript"},
".jsm": {"JavaScript"}, ".jsm": {"JavaScript"},
".json": {"JSON"}, ".json": {"JSON"},
".json-tmlanguage": {"JSON"},
".json5": {"JSON5"}, ".json5": {"JSON5"},
".jsonld": {"JSONLD"}, ".jsonld": {"JSONLD"},
".jsp": {"Java Server Pages"}, ".jsp": {"Java Server Pages"},
@ -508,7 +506,7 @@ var languagesByExtension = map[string][]string{
".mkii": {"TeX"}, ".mkii": {"TeX"},
".mkiv": {"TeX"}, ".mkiv": {"TeX"},
".mkvi": {"TeX"}, ".mkvi": {"TeX"},
".ml": {"OCaml"}, ".ml": {"OCaml", "Standard ML"},
".ml4": {"OCaml"}, ".ml4": {"OCaml"},
".mli": {"OCaml"}, ".mli": {"OCaml"},
".mll": {"OCaml"}, ".mll": {"OCaml"},
@ -807,11 +805,11 @@ var languagesByExtension = map[string][]string{
".ss": {"Scheme"}, ".ss": {"Scheme"},
".ssjs": {"JavaScript"}, ".ssjs": {"JavaScript"},
".st": {"HTML", "Smalltalk"}, ".st": {"HTML", "Smalltalk"},
".stTheme": {"XML"},
".stan": {"Stan"}, ".stan": {"Stan"},
".sthlp": {"Stata"}, ".sthlp": {"Stata"},
".ston": {"STON"}, ".ston": {"STON"},
".storyboard": {"XML"}, ".storyboard": {"XML"},
".sttheme": {"XML"},
".sty": {"TeX"}, ".sty": {"TeX"},
".styl": {"Stylus"}, ".styl": {"Stylus"},
".sublime-build": {"Sublime Text Config"}, ".sublime-build": {"Sublime Text Config"},
@ -853,13 +851,13 @@ var languagesByExtension = map[string][]string{
".tl": {"Type Language"}, ".tl": {"Type Language"},
".tla": {"TLA"}, ".tla": {"TLA"},
".tm": {"Tcl"}, ".tm": {"Tcl"},
".tmCommand": {"XML"},
".tmLanguage": {"XML"},
".tmPreferences": {"XML"},
".tmSnippet": {"XML"},
".tmTheme": {"XML"},
".tmac": {"Roff"}, ".tmac": {"Roff"},
".tmcommand": {"XML"},
".tml": {"XML"}, ".tml": {"XML"},
".tmlanguage": {"XML"},
".tmpreferences": {"XML"},
".tmsnippet": {"XML"},
".tmtheme": {"XML"},
".tmux": {"Shell"}, ".tmux": {"Shell"},
".toc": {"TeX", "World of Warcraft Addon Data"}, ".toc": {"TeX", "World of Warcraft Addon Data"},
".toml": {"TOML"}, ".toml": {"TOML"},
@ -1099,7 +1097,7 @@ var extensionsByLanguage = map[string][]string{
"Dockerfile": {".dockerfile"}, "Dockerfile": {".dockerfile"},
"Dogescript": {".djs"}, "Dogescript": {".djs"},
"Dylan": {".dylan", ".dyl", ".intr", ".lid"}, "Dylan": {".dylan", ".dyl", ".intr", ".lid"},
"E": {".E"}, "E": {".e"},
"EBNF": {".ebnf"}, "EBNF": {".ebnf"},
"ECL": {".ecl", ".eclxml"}, "ECL": {".ecl", ".eclxml"},
"ECLiPSe": {".ecl"}, "ECLiPSe": {".ecl"},
@ -1182,7 +1180,7 @@ var extensionsByLanguage = map[string][]string{
"Isabelle": {".thy"}, "Isabelle": {".thy"},
"J": {".ijs"}, "J": {".ijs"},
"JFlex": {".flex", ".jflex"}, "JFlex": {".flex", ".jflex"},
"JSON": {".json", ".geojson", ".JSON-tmLanguage", ".topojson"}, "JSON": {".json", ".geojson", ".json-tmlanguage", ".topojson"},
"JSON5": {".json5"}, "JSON5": {".json5"},
"JSONLD": {".jsonld"}, "JSONLD": {".jsonld"},
"JSONiq": {".jq"}, "JSONiq": {".jq"},
@ -1379,7 +1377,7 @@ var extensionsByLanguage = map[string][]string{
"Spline Font Database": {".sfd"}, "Spline Font Database": {".sfd"},
"Squirrel": {".nut"}, "Squirrel": {".nut"},
"Stan": {".stan"}, "Stan": {".stan"},
"Standard ML": {".ML", ".fun", ".sig", ".sml"}, "Standard ML": {".ml", ".fun", ".sig", ".sml"},
"Stata": {".do", ".ado", ".doh", ".ihlp", ".mata", ".matah", ".sthlp"}, "Stata": {".do", ".ado", ".doh", ".ihlp", ".mata", ".matah", ".sthlp"},
"Stylus": {".styl"}, "Stylus": {".styl"},
"SubRip Text": {".srt"}, "SubRip Text": {".srt"},
@ -1426,7 +1424,7 @@ var extensionsByLanguage = map[string][]string{
"World of Warcraft Addon Data": {".toc"}, "World of Warcraft Addon Data": {".toc"},
"X10": {".x10"}, "X10": {".x10"},
"XC": {".xc"}, "XC": {".xc"},
"XML": {".xml", ".adml", ".admx", ".ant", ".axml", ".builds", ".ccxml", ".clixml", ".cproject", ".csl", ".csproj", ".ct", ".dita", ".ditamap", ".ditaval", ".dll.config", ".dotsettings", ".filters", ".fsproj", ".fxml", ".glade", ".gml", ".grxml", ".iml", ".ivy", ".jelly", ".jsproj", ".kml", ".launch", ".mdpolicy", ".mjml", ".mm", ".mod", ".mxml", ".nproj", ".nuspec", ".odd", ".osm", ".pkgproj", ".plist", ".pluginspec", ".props", ".ps1xml", ".psc1", ".pt", ".rdf", ".resx", ".rss", ".sch", ".scxml", ".sfproj", ".srdf", ".storyboard", ".stTheme", ".sublime-snippet", ".targets", ".tmCommand", ".tml", ".tmLanguage", ".tmPreferences", ".tmSnippet", ".tmTheme", ".ts", ".tsx", ".ui", ".urdf", ".ux", ".vbproj", ".vcxproj", ".vsixmanifest", ".vssettings", ".vstemplate", ".vxml", ".wixproj", ".wsdl", ".wsf", ".wxi", ".wxl", ".wxs", ".x3d", ".xacro", ".xaml", ".xib", ".xlf", ".xliff", ".xmi", ".xml.dist", ".xproj", ".xsd", ".xul", ".zcml"}, "XML": {".xml", ".adml", ".admx", ".ant", ".axml", ".builds", ".ccxml", ".clixml", ".cproject", ".csl", ".csproj", ".ct", ".dita", ".ditamap", ".ditaval", ".dll.config", ".dotsettings", ".filters", ".fsproj", ".fxml", ".glade", ".gml", ".grxml", ".iml", ".ivy", ".jelly", ".jsproj", ".kml", ".launch", ".mdpolicy", ".mjml", ".mm", ".mod", ".mxml", ".nproj", ".nuspec", ".odd", ".osm", ".pkgproj", ".plist", ".pluginspec", ".props", ".ps1xml", ".psc1", ".pt", ".rdf", ".resx", ".rss", ".sch", ".scxml", ".sfproj", ".srdf", ".storyboard", ".sttheme", ".sublime-snippet", ".targets", ".tmcommand", ".tml", ".tmlanguage", ".tmpreferences", ".tmsnippet", ".tmtheme", ".ts", ".tsx", ".ui", ".urdf", ".ux", ".vbproj", ".vcxproj", ".vsixmanifest", ".vssettings", ".vstemplate", ".vxml", ".wixproj", ".wsdl", ".wsf", ".wxi", ".wxl", ".wxs", ".x3d", ".xacro", ".xaml", ".xib", ".xlf", ".xliff", ".xmi", ".xml.dist", ".xproj", ".xsd", ".xul", ".zcml"},
"XPages": {".xsp-config", ".xsp.metadata"}, "XPages": {".xsp-config", ".xsp.metadata"},
"XProc": {".xpl", ".xproc"}, "XProc": {".xpl", ".xproc"},
"XQuery": {".xquery", ".xq", ".xql", ".xqm", ".xqy"}, "XQuery": {".xquery", ".xq", ".xql", ".xqm", ".xqy"},

View File

@ -16,22 +16,39 @@ var languagesByFilename = map[string][]string{
".bashrc": {"Shell"}, ".bashrc": {"Shell"},
".clang-format": {"YAML"}, ".clang-format": {"YAML"},
".classpath": {"XML"}, ".classpath": {"XML"},
".cproject": {"XML"},
".cshrc": {"Shell"},
".editorconfig": {"INI"},
".emacs": {"Emacs Lisp"}, ".emacs": {"Emacs Lisp"},
".emacs.desktop": {"Emacs Lisp"}, ".emacs.desktop": {"Emacs Lisp"},
".factor-boot-rc": {"Factor"}, ".factor-boot-rc": {"Factor"},
".factor-rc": {"Factor"}, ".factor-rc": {"Factor"},
".gclient": {"Python"}, ".gclient": {"Python"},
".gemrc": {"YAML"},
".gitconfig": {"INI"},
".gn": {"GN"},
".gnus": {"Emacs Lisp"}, ".gnus": {"Emacs Lisp"},
".gvimrc": {"Vim script"},
".htaccess": {"ApacheConf"},
".irbrc": {"Ruby"}, ".irbrc": {"Ruby"},
".jshintrc": {"JSON"}, ".jshintrc": {"JSON"},
".login": {"Shell"},
".nvimrc": {"Vim script"}, ".nvimrc": {"Vim script"},
".php": {"PHP"},
".php_cs": {"PHP"}, ".php_cs": {"PHP"},
".php_cs.dist": {"PHP"}, ".php_cs.dist": {"PHP"},
".profile": {"Shell"},
".project": {"XML"}, ".project": {"XML"},
".pryrc": {"Ruby"}, ".pryrc": {"Ruby"},
".spacemacs": {"Emacs Lisp"}, ".spacemacs": {"Emacs Lisp"},
".vimrc": {"Vim script"}, ".vimrc": {"Vim script"},
".viper": {"Emacs Lisp"}, ".viper": {"Emacs Lisp"},
".zlogin": {"Shell"},
".zlogout": {"Shell"},
".zprofile": {"Shell"},
".zshenv": {"Shell"},
".zshrc": {"Shell"},
"9fs": {"Shell"},
"APKBUILD": {"Alpine Abuild"}, "APKBUILD": {"Alpine Abuild"},
"App.config": {"XML"}, "App.config": {"XML"},
"Appraisals": {"Ruby"}, "Appraisals": {"Ruby"},
@ -43,8 +60,10 @@ var languagesByFilename = map[string][]string{
"Buildfile": {"Ruby"}, "Buildfile": {"Ruby"},
"CMakeLists.txt": {"CMake"}, "CMakeLists.txt": {"CMake"},
"COPYING": {"Text"}, "COPYING": {"Text"},
"COPYING.regex": {"Text"},
"COPYRIGHT.regex": {"Text"}, "COPYRIGHT.regex": {"Text"},
"Cakefile": {"CoffeeScript"}, "Cakefile": {"CoffeeScript"},
"Capfile": {"Ruby"},
"Cask": {"Emacs Lisp"}, "Cask": {"Emacs Lisp"},
"Dangerfile": {"Ruby"}, "Dangerfile": {"Ruby"},
"Deliverfile": {"Ruby"}, "Deliverfile": {"Ruby"},
@ -105,21 +124,31 @@ var languagesByFilename = map[string][]string{
"_emacs": {"Emacs Lisp"}, "_emacs": {"Emacs Lisp"},
"_vimrc": {"Vim script"}, "_vimrc": {"Vim script"},
"abbrev_defs": {"Emacs Lisp"}, "abbrev_defs": {"Emacs Lisp"},
"ack": {"Perl"},
"ant.xml": {"Ant Build System"}, "ant.xml": {"Ant Build System"},
"apache2.conf": {"ApacheConf"},
"bash_logout": {"Shell"},
"bash_profile": {"Shell"},
"bashrc": {"Shell"},
"build.xml": {"Ant Build System"}, "build.xml": {"Ant Build System"},
"buildfile": {"Ruby"}, "buildfile": {"Ruby"},
"click.me": {"Text"}, "click.me": {"Text"},
"composer.lock": {"JSON"}, "composer.lock": {"JSON"},
"configure.ac": {"M4Sugar"}, "configure.ac": {"M4Sugar"},
"cshrc": {"Shell"},
"delete.me": {"Text"}, "delete.me": {"Text"},
"descrip.mmk": {"Module Management System"}, "descrip.mmk": {"Module Management System"},
"descrip.mms": {"Module Management System"}, "descrip.mms": {"Module Management System"},
"expr-dist": {"R"},
"gradlew": {"Shell"}, "gradlew": {"Shell"},
"gvimrc": {"Vim script"}, "gvimrc": {"Vim script"},
"httpd.conf": {"ApacheConf"},
"keep.me": {"Text"}, "keep.me": {"Text"},
"ld.script": {"Linker Script"}, "ld.script": {"Linker Script"},
"login": {"Shell"},
"makefile": {"Makefile"}, "makefile": {"Makefile"},
"makefile.sco": {"Makefile"}, "makefile.sco": {"Makefile"},
"man": {"Shell"},
"mcmod.info": {"JSON"}, "mcmod.info": {"JSON"},
"meson.build": {"Meson"}, "meson.build": {"Meson"},
"meson_options.txt": {"Meson"}, "meson_options.txt": {"Meson"},
@ -129,15 +158,25 @@ var languagesByFilename = map[string][]string{
"mmt": {"Roff"}, "mmt": {"Roff"},
"nginx.conf": {"Nginx"}, "nginx.conf": {"Nginx"},
"nvimrc": {"Vim script"}, "nvimrc": {"Vim script"},
"owh": {"Tcl"},
"packages.config": {"XML"}, "packages.config": {"XML"},
"pom.xml": {"Maven POM"}, "pom.xml": {"Maven POM"},
"profile": {"Shell"},
"read.me": {"Text"}, "read.me": {"Text"},
"readme.1st": {"Text"},
"rebar.config": {"Erlang"}, "rebar.config": {"Erlang"},
"rebar.config.lock": {"Erlang"}, "rebar.config.lock": {"Erlang"},
"rebar.lock": {"Erlang"}, "rebar.lock": {"Erlang"},
"riemann.config": {"Clojure"}, "riemann.config": {"Clojure"},
"script": {"C"},
"starfield": {"Tcl"},
"test.me": {"Text"}, "test.me": {"Text"},
"vimrc": {"Vim script"}, "vimrc": {"Vim script"},
"wscript": {"Python"}, "wscript": {"Python"},
"xcompose": {"XCompose"}, "xcompose": {"XCompose"},
"zlogin": {"Shell"},
"zlogout": {"Shell"},
"zprofile": {"Shell"},
"zshenv": {"Shell"},
"zshrc": {"Shell"},
} }

File diff suppressed because it is too large Load Diff

View File

@ -3,28 +3,34 @@ package generator
import ( import (
"bytes" "bytes"
"io" "io"
"io/ioutil"
"strings" "strings"
"text/template" "text/template"
yaml "gopkg.in/yaml.v2" yaml "gopkg.in/yaml.v2"
) )
// Aliases reads from buf and builds source file from aliasesTmplPath. // Aliases reads from fileToParse and builds source file from tmplPath. It's comply with type File signature.
func Aliases(data []byte, aliasesTmplPath, aliasesTmplName, commit string) ([]byte, error) { func Aliases(fileToParse, samplesDir, outPath, tmplPath, tmplName, commit string) error {
data, err := ioutil.ReadFile(fileToParse)
if err != nil {
return err
}
languages := make(map[string]*languageInfo) languages := make(map[string]*languageInfo)
if err := yaml.Unmarshal(data, &languages); err != nil { if err := yaml.Unmarshal(data, &languages); err != nil {
return nil, err return err
} }
orderedLangList := getAlphabeticalOrderedKeys(languages) orderedLangList := getAlphabeticalOrderedKeys(languages)
languagesByAlias := buildAliasLanguageMap(languages, orderedLangList) languagesByAlias := buildAliasLanguageMap(languages, orderedLangList)
buf := &bytes.Buffer{} buf := &bytes.Buffer{}
if err := executeAliasesTemplate(buf, languagesByAlias, aliasesTmplPath, aliasesTmplName, commit); err != nil { if err := executeAliasesTemplate(buf, languagesByAlias, tmplPath, tmplName, commit); err != nil {
return nil, err return err
} }
return buf.Bytes(), nil return formatedWrite(outPath, buf.Bytes())
} }
func buildAliasLanguageMap(languages map[string]*languageInfo, orderedLangList []string) map[string]string { func buildAliasLanguageMap(languages map[string]*languageInfo, orderedLangList []string) map[string]string {

View File

@ -3,32 +3,38 @@ package generator
import ( import (
"bytes" "bytes"
"io" "io"
"io/ioutil"
"text/template" "text/template"
yaml "gopkg.in/yaml.v2" yaml "gopkg.in/yaml.v2"
) )
// Documentation reads from buf and builds source file from documentationTmplPath. // Documentation reads from fileToParse and builds source file from tmplPath. It's comply with type File signature.
func Documentation(data []byte, documentationTmplPath, documentationTmplName, commit string) ([]byte, error) { func Documentation(fileToParse, samplesDir, outPath, tmplPath, tmplName, commit string) error {
data, err := ioutil.ReadFile(fileToParse)
if err != nil {
return err
}
var regexpList []string var regexpList []string
if err := yaml.Unmarshal(data, &regexpList); err != nil { if err := yaml.Unmarshal(data, &regexpList); err != nil {
return nil, err return err
} }
buf := &bytes.Buffer{} buf := &bytes.Buffer{}
if err := executeDocumentationTemplate(buf, regexpList, documentationTmplPath, documentationTmplName, commit); err != nil { if err := executeDocumentationTemplate(buf, regexpList, tmplPath, tmplName, commit); err != nil {
return nil, err return err
} }
return buf.Bytes(), nil return formatedWrite(outPath, buf.Bytes())
} }
func executeDocumentationTemplate(out io.Writer, regexpList []string, documentationTmplPath, documentationTmpl, commit string) error { func executeDocumentationTemplate(out io.Writer, regexpList []string, tmplPath, tmplName, commit string) error {
fmap := template.FuncMap{ fmap := template.FuncMap{
"getCommit": func() string { return commit }, "getCommit": func() string { return commit },
} }
t := template.Must(template.New(documentationTmpl).Funcs(fmap).ParseFiles(documentationTmplPath)) t := template.Must(template.New(tmplName).Funcs(fmap).ParseFiles(tmplPath))
if err := t.Execute(out, regexpList); err != nil { if err := t.Execute(out, regexpList); err != nil {
return err return err
} }

View File

@ -3,6 +3,7 @@ package generator
import ( import (
"bytes" "bytes"
"io" "io"
"io/ioutil"
"strings" "strings"
"text/template" "text/template"
@ -14,24 +15,45 @@ type extensionsInfo struct {
ExtensionsByLanguage map[string][]string ExtensionsByLanguage map[string][]string
} }
// Extensions reads from buf and builds source file from extensionsTmplPath. // Extensions reads from fileToParse and builds source file from tmplPath. It's comply with type File signature.
func Extensions(data []byte, extensionsTmplPath, extensionsTmplName, commit string) ([]byte, error) { func Extensions(fileToParse, samplesDir, outPath, tmplPath, tmplName, commit string) error {
languages := make(map[string]*languageInfo) data, err := ioutil.ReadFile(fileToParse)
if err := yaml.Unmarshal(data, &languages); err != nil { if err != nil {
return nil, err return err
} }
languages := make(map[string]*languageInfo)
if err := yaml.Unmarshal(data, &languages); err != nil {
return err
}
extensionsToLower(languages)
extInfo := &extensionsInfo{} extInfo := &extensionsInfo{}
orderedKeyList := getAlphabeticalOrderedKeys(languages) orderedKeyList := getAlphabeticalOrderedKeys(languages)
extInfo.LanguagesByExtension = buildExtensionLanguageMap(languages, orderedKeyList) extInfo.LanguagesByExtension = buildExtensionLanguageMap(languages, orderedKeyList)
extInfo.ExtensionsByLanguage = buildLanguageExtensionsMap(languages) extInfo.ExtensionsByLanguage = buildLanguageExtensionsMap(languages)
buf := &bytes.Buffer{} buf := &bytes.Buffer{}
if err := executeExtensionsTemplate(buf, extInfo, extensionsTmplPath, extensionsTmplName, commit); err != nil { if err := executeExtensionsTemplate(buf, extInfo, tmplPath, tmplName, commit); err != nil {
return nil, err return err
} }
return buf.Bytes(), nil return formatedWrite(outPath, buf.Bytes())
}
func extensionsToLower(languages map[string]*languageInfo) {
for _, info := range languages {
info.Extensions = stringSliceToLower(info.Extensions)
}
}
func stringSliceToLower(slice []string) []string {
toLower := make([]string, 0, len(slice))
for _, s := range slice {
toLower = append(toLower, strings.ToLower(s))
}
return toLower
} }
func buildExtensionLanguageMap(languages map[string]*languageInfo, orderedKeyList []string) map[string][]string { func buildExtensionLanguageMap(languages map[string]*languageInfo, orderedKeyList []string) map[string][]string {
@ -57,13 +79,13 @@ func buildLanguageExtensionsMap(languages map[string]*languageInfo) map[string][
return langExtensionMap return langExtensionMap
} }
func executeExtensionsTemplate(out io.Writer, extInfo *extensionsInfo, extensionsTmplPath, extensionsTmpl, commit string) error { func executeExtensionsTemplate(out io.Writer, extInfo *extensionsInfo, tmplPath, tmplName, commit string) error {
fmap := template.FuncMap{ fmap := template.FuncMap{
"getCommit": func() string { return commit }, "getCommit": func() string { return commit },
"formatStringSlice": func(slice []string) string { return `"` + strings.Join(slice, `","`) + `"` }, "formatStringSlice": func(slice []string) string { return `"` + strings.Join(slice, `","`) + `"` },
} }
t := template.Must(template.New(extensionsTmpl).Funcs(fmap).ParseFiles(extensionsTmplPath)) t := template.Must(template.New(tmplName).Funcs(fmap).ParseFiles(tmplPath))
if err := t.Execute(out, extInfo); err != nil { if err := t.Execute(out, extInfo); err != nil {
return err return err
} }

View File

@ -3,27 +3,79 @@ package generator
import ( import (
"bytes" "bytes"
"io" "io"
"io/ioutil"
"os"
"path/filepath"
"strings" "strings"
"text/template" "text/template"
yaml "gopkg.in/yaml.v2" yaml "gopkg.in/yaml.v2"
) )
// Filenames reads from buf and builds source file from filenamesTmplPath. // Filenames reads from fileToParse and builds source file from tmplPath. It's comply with type File signature.
func Filenames(data []byte, filenamesTmplPath, filenamesTmplName, commit string) ([]byte, error) { func Filenames(fileToParse, samplesDir, outPath, tmplPath, tmplName, commit string) error {
data, err := ioutil.ReadFile(fileToParse)
if err != nil {
return err
}
languages := make(map[string]*languageInfo) languages := make(map[string]*languageInfo)
if err := yaml.Unmarshal(data, &languages); err != nil { if err := yaml.Unmarshal(data, &languages); err != nil {
return nil, err return err
}
if err := walkSamplesFilenames(samplesDir, languages); err != nil {
return err
} }
languagesByFilename := buildFilenameLanguageMap(languages) languagesByFilename := buildFilenameLanguageMap(languages)
buf := &bytes.Buffer{} buf := &bytes.Buffer{}
if err := executeFilenamesTemplate(buf, languagesByFilename, filenamesTmplPath, filenamesTmplName, commit); err != nil { if err := executeFilenamesTemplate(buf, languagesByFilename, tmplPath, tmplName, commit); err != nil {
return nil, err return err
} }
return buf.Bytes(), nil return formatedWrite(outPath, buf.Bytes())
}
func walkSamplesFilenames(samplesDir string, languages map[string]*languageInfo) error {
const filenamesDir = "filenames"
var language string
err := filepath.Walk(samplesDir, func(path string, f os.FileInfo, err error) error {
if err != nil {
return err
}
if f.IsDir() {
if f.Name() != filenamesDir {
language = f.Name()
}
return nil
}
parentDir := filepath.Base(filepath.Dir(path))
if parentDir != filenamesDir {
return nil
}
info, ok := languages[language]
if !ok {
info = &languageInfo{Filenames: []string{}}
}
for _, filename := range info.Filenames {
if filename == f.Name() {
return nil
}
}
info.Filenames = append(info.Filenames, f.Name())
return nil
})
return err
} }
func buildFilenameLanguageMap(languages map[string]*languageInfo) map[string][]string { func buildFilenameLanguageMap(languages map[string]*languageInfo) map[string][]string {
@ -37,13 +89,13 @@ func buildFilenameLanguageMap(languages map[string]*languageInfo) map[string][]s
return filenameLangMap return filenameLangMap
} }
func executeFilenamesTemplate(out io.Writer, languagesByFilename map[string][]string, filenamesTmplPath, filenamesTmpl, commit string) error { func executeFilenamesTemplate(out io.Writer, languagesByFilename map[string][]string, tmplPath, tmplName, commit string) error {
fmap := template.FuncMap{ fmap := template.FuncMap{
"getCommit": func() string { return commit }, "getCommit": func() string { return commit },
"formatStringSlice": func(slice []string) string { return `"` + strings.Join(slice, `","`) + `"` }, "formatStringSlice": func(slice []string) string { return `"` + strings.Join(slice, `","`) + `"` },
} }
t := template.Must(template.New(filenamesTmpl).Funcs(fmap).ParseFiles(filenamesTmplPath)) t := template.Must(template.New(tmplName).Funcs(fmap).ParseFiles(tmplPath))
if err := t.Execute(out, languagesByFilename); err != nil { if err := t.Execute(out, languagesByFilename); err != nil {
return err return err
} }

View File

@ -5,24 +5,8 @@ import (
"io/ioutil" "io/ioutil"
) )
// Func is the function's type that generate source file from a data to be parsed and a template. // File is the function's type that generate source file from a file to be parsed, linguist's samples dir and a template.
type Func func(dataToParse []byte, templatePath string, template string, commit string) ([]byte, error) type File func(fileToParse, samplesDir, outPath, tmplPath, tmplName, commit string) error
// FromFile read data to parse from a file named fileToParse and write the generated source code to a file named outPath. The generated
// source code is formated with gofmt and tagged with commit.
func FromFile(fileToParse, outPath, tmplPath, tmplName, commit string, generate Func) error {
buf, err := ioutil.ReadFile(fileToParse)
if err != nil {
return err
}
source, err := generate(buf, tmplPath, tmplName, commit)
if err != nil {
return err
}
return formatedWrite(outPath, source)
}
func formatedWrite(outPath string, source []byte) error { func formatedWrite(outPath string, source []byte) error {
formatedSource, err := format.Source(source) formatedSource, err := format.Source(source)

View File

@ -13,60 +13,55 @@ import (
) )
const ( const (
lingustURL = "https://github.com/github/linguist.git" lingustURL = "https://github.com/github/linguist.git"
commitTree = "60f864a138650dd17fafc94814be9ee2d3aaef8c" commit = "b6460f8ed6b249281ada099ca28bd8f1230b8892"
commitTest = "0123456789abcdef0123456789abcdef01234567" samplesDir = "samples"
languagesFile = "lib/linguist/languages.yml"
// Extensions test // Extensions test
extensionsTestFile = "test_files/extensions.test.yml" extensionGold = "test_files/extension.gold"
extensionsGold = "test_files/extensions.gold" extensionTestTmplPath = "../assets/extension.go.tmpl"
extensionsTestTmplPath = "../assets/extensions.go.tmpl" extensionTestTmplName = "extension.go.tmpl"
extensionsTestTmplName = "extensions.go.tmpl"
// Heuristics test // Heuristics test
heuristicsTestFile = "test_files/heuristics.test.rb" heuristicsTestFile = "lib/linguist/heuristics.rb"
contentGold = "test_files/content.gold" contentGold = "test_files/content.gold"
contentTestTmplPath = "../assets/content.go.tmpl" contentTestTmplPath = "../assets/content.go.tmpl"
contentTestTmplName = "content.go.tmpl" contentTestTmplName = "content.go.tmpl"
// Vendor test // Vendor test
vendorTestFile = "test_files/vendor.test.yml" vendorTestFile = "lib/linguist/vendor.yml"
vendorGold = "test_files/vendor.gold" vendorGold = "test_files/vendor.gold"
vendorTestTmplPath = "../assets/vendor.go.tmpl" vendorTestTmplPath = "../assets/vendor.go.tmpl"
vendorTestTmplName = "vendor.go.tmpl" vendorTestTmplName = "vendor.go.tmpl"
// Documentation test // Documentation test
documentationTestFile = "test_files/documentation.test.yml" documentationTestFile = "lib/linguist/documentation.yml"
documentationGold = "test_files/documentation.gold" documentationGold = "test_files/documentation.gold"
documentationTestTmplPath = "../assets/documentation.go.tmpl" documentationTestTmplPath = "../assets/documentation.go.tmpl"
documentationTestTmplName = "documentation.go.tmpl" documentationTestTmplName = "documentation.go.tmpl"
// Types test // Types test
typesTestFile = "test_files/types.test.yml" typeGold = "test_files/type.gold"
typesGold = "test_files/types.gold" typeTestTmplPath = "../assets/type.go.tmpl"
typesTestTmplPath = "../assets/types.go.tmpl" typeTestTmplName = "type.go.tmpl"
typesTestTmplName = "types.go.tmpl"
// Interpreters test // Interpreters test
interpretersTestFile = "test_files/interpreters.test.yml" interpreterGold = "test_files/interpreter.gold"
interpretersGold = "test_files/interpreters.gold" interpreterTestTmplPath = "../assets/interpreter.go.tmpl"
interpretersTestTmplPath = "../assets/interpreters.go.tmpl" interpreterTestTmplName = "interpreter.go.tmpl"
interpretersTestTmplName = "interpreters.go.tmpl"
// Filenames test // Filenames test
filenamesTestFile = "test_files/filenames.test.yml" filenameGold = "test_files/filename.gold"
filenamesGold = "test_files/filenames.gold" filenameTestTmplPath = "../assets/filename.go.tmpl"
filenamesTestTmplPath = "../assets/filenames.go.tmpl" filenameTestTmplName = "filename.go.tmpl"
filenamesTestTmplName = "filenames.go.tmpl"
// Aliases test // Aliases test
aliasesTestFile = "test_files/aliases.test.yml" aliasGold = "test_files/alias.gold"
aliasesGold = "test_files/aliases.gold" aliasTestTmplPath = "../assets/alias.go.tmpl"
aliasesTestTmplPath = "../assets/aliases.go.tmpl" aliasTestTmplName = "alias.go.tmpl"
aliasesTestTmplName = "aliases.go.tmpl"
// Frequencies test // Frequencies test
frequenciesTestDir = "/samples"
frequenciesGold = "test_files/frequencies.gold" frequenciesGold = "test_files/frequencies.gold"
frequenciesTestTmplPath = "../assets/frequencies.go.tmpl" frequenciesTestTmplPath = "../assets/frequencies.go.tmpl"
frequenciesTestTmplName = "frequencies.go.tmpl" frequenciesTestTmplName = "frequencies.go.tmpl"
@ -96,7 +91,7 @@ func (g *GeneratorTestSuite) SetupSuite() {
err = os.Chdir(tmpLinguist) err = os.Chdir(tmpLinguist)
assert.NoError(g.T(), err) assert.NoError(g.T(), err)
cmd = exec.Command("git", "checkout", commitTree) cmd = exec.Command("git", "checkout", commit)
err = cmd.Run() err = cmd.Run()
assert.NoError(g.T(), err) assert.NoError(g.T(), err)
@ -109,87 +104,105 @@ func (g *GeneratorTestSuite) TearDownSuite() {
assert.NoError(g.T(), err) assert.NoError(g.T(), err)
} }
func (g *GeneratorTestSuite) TestFromFile() { func (g *GeneratorTestSuite) TestGenerationFiles() {
tests := []struct { tests := []struct {
name string name string
fileToParse string fileToParse string
samplesDir string
tmplPath string tmplPath string
tmplName string tmplName string
commit string commit string
generate Func generate File
wantOut string wantOut string
}{ }{
{ {
name: "TestFromFile_Extensions", name: "Extensions()",
fileToParse: extensionsTestFile, fileToParse: filepath.Join(g.tmpLinguist, languagesFile),
tmplPath: extensionsTestTmplPath, samplesDir: "",
tmplName: extensionsTestTmplName, tmplPath: extensionTestTmplPath,
commit: commitTest, tmplName: extensionTestTmplName,
commit: commit,
generate: Extensions, generate: Extensions,
wantOut: extensionsGold, wantOut: extensionGold,
}, },
{ {
name: "TestFromFile_Heuristics", name: "Heuristics()",
fileToParse: heuristicsTestFile, fileToParse: filepath.Join(g.tmpLinguist, heuristicsTestFile),
samplesDir: "",
tmplPath: contentTestTmplPath, tmplPath: contentTestTmplPath,
tmplName: contentTestTmplName, tmplName: contentTestTmplName,
commit: commitTest, commit: commit,
generate: Heuristics, generate: Heuristics,
wantOut: contentGold, wantOut: contentGold,
}, },
{ {
name: "TestFromFile_Vendor", name: "Vendor()",
fileToParse: vendorTestFile, fileToParse: filepath.Join(g.tmpLinguist, vendorTestFile),
samplesDir: "",
tmplPath: vendorTestTmplPath, tmplPath: vendorTestTmplPath,
tmplName: vendorTestTmplName, tmplName: vendorTestTmplName,
commit: commitTest, commit: commit,
generate: Vendor, generate: Vendor,
wantOut: vendorGold, wantOut: vendorGold,
}, },
{ {
name: "TestFromFile_Documentation", name: "Documentation()",
fileToParse: documentationTestFile, fileToParse: filepath.Join(g.tmpLinguist, documentationTestFile),
samplesDir: "",
tmplPath: documentationTestTmplPath, tmplPath: documentationTestTmplPath,
tmplName: documentationTestTmplName, tmplName: documentationTestTmplName,
commit: commitTest, commit: commit,
generate: Documentation, generate: Documentation,
wantOut: documentationGold, wantOut: documentationGold,
}, },
{ {
name: "TestFromFile_Types", name: "Types()",
fileToParse: typesTestFile, fileToParse: filepath.Join(g.tmpLinguist, languagesFile),
tmplPath: typesTestTmplPath, samplesDir: "",
tmplName: typesTestTmplName, tmplPath: typeTestTmplPath,
commit: commitTest, tmplName: typeTestTmplName,
commit: commit,
generate: Types, generate: Types,
wantOut: typesGold, wantOut: typeGold,
}, },
{ {
name: "TestFromFile_Interpreters", name: "Interpreters()",
fileToParse: interpretersTestFile, fileToParse: filepath.Join(g.tmpLinguist, languagesFile),
tmplPath: interpretersTestTmplPath, samplesDir: "",
tmplName: interpretersTestTmplName, tmplPath: interpreterTestTmplPath,
commit: commitTest, tmplName: interpreterTestTmplName,
commit: commit,
generate: Interpreters, generate: Interpreters,
wantOut: interpretersGold, wantOut: interpreterGold,
}, },
{ {
name: "TestFromFile_Filenames", name: "Filenames()",
fileToParse: filenamesTestFile, fileToParse: filepath.Join(g.tmpLinguist, languagesFile),
tmplPath: filenamesTestTmplPath, samplesDir: filepath.Join(g.tmpLinguist, samplesDir),
tmplName: filenamesTestTmplName, tmplPath: filenameTestTmplPath,
commit: commitTest, tmplName: filenameTestTmplName,
commit: commit,
generate: Filenames, generate: Filenames,
wantOut: filenamesGold, wantOut: filenameGold,
}, },
{ {
name: "TestFromFile_Aliases", name: "Aliases()",
fileToParse: aliasesTestFile, fileToParse: filepath.Join(g.tmpLinguist, languagesFile),
tmplPath: aliasesTestTmplPath, samplesDir: "",
tmplName: aliasesTestTmplName, tmplPath: aliasTestTmplPath,
commit: commitTest, tmplName: aliasTestTmplName,
commit: commit,
generate: Aliases, generate: Aliases,
wantOut: aliasesGold, wantOut: aliasGold,
},
{
name: "Frequencies()",
samplesDir: filepath.Join(g.tmpLinguist, samplesDir),
tmplPath: frequenciesTestTmplPath,
tmplName: frequenciesTestTmplName,
commit: commit,
generate: Frequencies,
wantOut: frequenciesGold,
}, },
} }
@ -201,45 +214,10 @@ func (g *GeneratorTestSuite) TestFromFile() {
assert.NoError(g.T(), err) assert.NoError(g.T(), err)
defer os.Remove(outPath.Name()) defer os.Remove(outPath.Name())
err = FromFile(test.fileToParse, outPath.Name(), test.tmplPath, test.tmplName, test.commit, test.generate) err = test.generate(test.fileToParse, test.samplesDir, outPath.Name(), test.tmplPath, test.tmplName, test.commit)
assert.NoError(g.T(), err) assert.NoError(g.T(), err)
out, err := ioutil.ReadFile(outPath.Name()) out, err := ioutil.ReadFile(outPath.Name())
assert.NoError(g.T(), err) assert.NoError(g.T(), err)
assert.EqualValues(g.T(), gold, out, fmt.Sprintf("%v: %v, expected: %v", test.name, string(out), string(test.wantOut))) assert.EqualValues(g.T(), gold, out, fmt.Sprintf("%v: %v, expected: %v", test.name, string(out), string(gold)))
}
}
func (g *GeneratorTestSuite) TestFrequencies() {
tests := []struct {
name string
samplesDir string
tmplPath string
tmplName string
commit string
wantOut string
}{
{
name: "Frequencies_1",
samplesDir: filepath.Join(g.tmpLinguist, frequenciesTestDir),
tmplPath: frequenciesTestTmplPath,
tmplName: frequenciesTestTmplName,
commit: commitTree,
wantOut: frequenciesGold,
},
}
for _, test := range tests {
gold, err := ioutil.ReadFile(test.wantOut)
assert.NoError(g.T(), err)
outPath, err := ioutil.TempFile("/tmp", "frequencies-test-")
assert.NoError(g.T(), err)
defer os.Remove(outPath.Name())
err = Frequencies(test.samplesDir, test.tmplPath, test.tmplName, test.commit, outPath.Name())
assert.NoError(g.T(), err)
out, err := ioutil.ReadFile(outPath.Name())
assert.NoError(g.T(), err)
assert.EqualValues(g.T(), gold, out, fmt.Sprintf("%v: %v, expected: %v", test.name, string(out), string(test.wantOut)))
} }
} }

View File

@ -5,30 +5,37 @@ import (
"bytes" "bytes"
"fmt" "fmt"
"io" "io"
"io/ioutil"
"regexp" "regexp"
"strconv" "strconv"
"strings" "strings"
"text/template" "text/template"
) )
// Heuristics reads from buf and builds source file from contentTmplPath. // Heuristics reads from fileToParse and builds source file from tmplPath. It's comply with type File signature.
func Heuristics(heuristics []byte, contentTmplPath, contentTmplName, commit string) ([]byte, error) { func Heuristics(fileToParse, samplesDir, outPath, tmplPath, tmplName, commit string) error {
disambiguators, err := getDisambiguators(heuristics) data, err := ioutil.ReadFile(fileToParse)
if err != nil { if err != nil {
return nil, err return err
}
disambiguators, err := getDisambiguators(data)
if err != nil {
return err
} }
buf := &bytes.Buffer{} buf := &bytes.Buffer{}
if err := executeContentTemplate(buf, disambiguators, contentTmplPath, contentTmplName, commit); err != nil { if err := executeContentTemplate(buf, disambiguators, tmplPath, tmplName, commit); err != nil {
return nil, err return err
} }
return buf.Bytes(), nil return formatedWrite(outPath, buf.Bytes())
// fmt.Println(string(buf.Bytes()))
// return nil, nil
} }
const unknownLanguage = "OtherLanguage" const (
unknownLanguage = "OtherLanguage"
emptyFile = "^$"
)
var ( var (
disambLine = regexp.MustCompile(`^(\s*)disambiguate`) disambLine = regexp.MustCompile(`^(\s*)disambiguate`)
@ -297,7 +304,7 @@ func getHeuristics(line string) []*heuristic {
} }
if strings.Contains(v, ".empty?") { if strings.Contains(v, ".empty?") {
reg = `^$` reg = emptyFile
} }
if reg != "" { if reg != "" {
@ -340,6 +347,10 @@ func convertToValidRegexp(reg string) string {
rubyMultiLine = "m" rubyMultiLine = "m"
) )
if reg == emptyFile {
return reg
}
reg = strings.TrimPrefix(reg, `/`) reg = strings.TrimPrefix(reg, `/`)
flags := "(?m" flags := "(?m"
lastSlash := strings.LastIndex(reg, `/`) lastSlash := strings.LastIndex(reg, `/`)
@ -415,7 +426,7 @@ func buildLanguagesHeuristics(langsList [][]string, heuristicsList [][]*heuristi
return langsHeuristics return langsHeuristics
} }
func executeContentTemplate(out io.Writer, disambiguators []*disambiguator, contentTmplPath, contentTmpl, commit string) error { func executeContentTemplate(out io.Writer, disambiguators []*disambiguator, tmplPath, tmplName, commit string) error {
fmap := template.FuncMap{ fmap := template.FuncMap{
"getCommit": func() string { return commit }, "getCommit": func() string { return commit },
"getAllHeuristics": getAllHeuristics, "getAllHeuristics": getAllHeuristics,
@ -430,7 +441,7 @@ func executeContentTemplate(out io.Writer, disambiguators []*disambiguator, cont
"avoidLanguage": avoidLanguage, "avoidLanguage": avoidLanguage,
} }
t := template.Must(template.New(contentTmpl).Funcs(fmap).ParseFiles(contentTmplPath)) t := template.Must(template.New(tmplName).Funcs(fmap).ParseFiles(tmplPath))
if err := t.Execute(out, disambiguators); err != nil { if err := t.Execute(out, disambiguators); err != nil {
return err return err
} }

View File

@ -3,28 +3,34 @@ package generator
import ( import (
"bytes" "bytes"
"io" "io"
"io/ioutil"
"strings" "strings"
"text/template" "text/template"
"gopkg.in/yaml.v2" "gopkg.in/yaml.v2"
) )
// Interpreters reads from buf and builds source file from interpretersTmplPath. // Interpreters reads from fileToParse and builds source file from tmplPath. It's comply with type File signature.
func Interpreters(data []byte, interpretersTmplPath, interpretersTmplName, commit string) ([]byte, error) { func Interpreters(fileToParse, samplesDir, outPath, tmplPath, tmplName, commit string) error {
data, err := ioutil.ReadFile(fileToParse)
if err != nil {
return err
}
languages := make(map[string]*languageInfo) languages := make(map[string]*languageInfo)
if err := yaml.Unmarshal(data, &languages); err != nil { if err := yaml.Unmarshal(data, &languages); err != nil {
return nil, err return err
} }
orderedKeys := getAlphabeticalOrderedKeys(languages) orderedKeys := getAlphabeticalOrderedKeys(languages)
languagesByInterpreter := buildInterpreterLanguagesMap(languages, orderedKeys) languagesByInterpreter := buildInterpreterLanguagesMap(languages, orderedKeys)
buf := &bytes.Buffer{} buf := &bytes.Buffer{}
if err := executeInterpretersTemplate(buf, languagesByInterpreter, interpretersTmplPath, interpretersTmplName, commit); err != nil { if err := executeInterpretersTemplate(buf, languagesByInterpreter, tmplPath, tmplName, commit); err != nil {
return nil, err return err
} }
return buf.Bytes(), nil return formatedWrite(outPath, buf.Bytes())
} }
func buildInterpreterLanguagesMap(languages map[string]*languageInfo, orderedKeys []string) map[string][]string { func buildInterpreterLanguagesMap(languages map[string]*languageInfo, orderedKeys []string) map[string][]string {
@ -39,13 +45,13 @@ func buildInterpreterLanguagesMap(languages map[string]*languageInfo, orderedKey
return interpreterLangsMap return interpreterLangsMap
} }
func executeInterpretersTemplate(out io.Writer, languagesByInterpreter map[string][]string, interpretersTmplPath, interpretersTmpl, commit string) error { func executeInterpretersTemplate(out io.Writer, languagesByInterpreter map[string][]string, tmplPath, tmplName, commit string) error {
fmap := template.FuncMap{ fmap := template.FuncMap{
"getCommit": func() string { return commit }, "getCommit": func() string { return commit },
"formatStringSlice": func(slice []string) string { return `"` + strings.Join(slice, `","`) + `"` }, "formatStringSlice": func(slice []string) string { return `"` + strings.Join(slice, `","`) + `"` },
} }
t := template.Must(template.New(interpretersTmpl).Funcs(fmap).ParseFiles(interpretersTmplPath)) t := template.Must(template.New(tmplName).Funcs(fmap).ParseFiles(tmplPath))
if err := t.Execute(out, languagesByInterpreter); err != nil { if err := t.Execute(out, languagesByInterpreter); err != nil {
return err return err
} }

View File

@ -16,8 +16,6 @@ import (
"gopkg.in/src-d/enry.v1/internal/tokenizer" "gopkg.in/src-d/enry.v1/internal/tokenizer"
) )
const samplesSubDir = "filenames"
type samplesFrequencies struct { type samplesFrequencies struct {
LanguageTotal int `json:"language_total,omitempty"` LanguageTotal int `json:"language_total,omitempty"`
Languages map[string]int `json:"languages,omitempty"` Languages map[string]int `json:"languages,omitempty"`
@ -27,15 +25,15 @@ type samplesFrequencies struct {
} }
// Frequencies reads directories in samplesDir, retrieves information about frequencies of languages and tokens, and write // Frequencies reads directories in samplesDir, retrieves information about frequencies of languages and tokens, and write
// the file outPath using frequenciesTmplName as a template. // the file outPath using tmplName as a template.
func Frequencies(samplesDir, frequenciesTmplPath, frequenciesTmplName, commit, outPath string) error { func Frequencies(fileToParse, samplesDir, outPath, tmplPath, tmplName, commit string) error {
freqs, err := getFrequencies(samplesDir) freqs, err := getFrequencies(samplesDir)
if err != nil { if err != nil {
return err return err
} }
buf := &bytes.Buffer{} buf := &bytes.Buffer{}
if err := executeFrequenciesTemplate(buf, freqs, frequenciesTmplPath, frequenciesTmplName, commit); err != nil { if err := executeFrequenciesTemplate(buf, freqs, tmplPath, tmplName, commit); err != nil {
return err return err
} }
@ -96,6 +94,7 @@ func getFrequencies(samplesDir string) (*samplesFrequencies, error) {
} }
func getSamples(samplesDir string, langDir os.FileInfo) ([]string, error) { func getSamples(samplesDir string, langDir os.FileInfo) ([]string, error) {
const samplesSubDir = "filenames"
samples := []string{} samples := []string{}
path := filepath.Join(samplesDir, langDir.Name()) path := filepath.Join(samplesDir, langDir.Name())
entries, err := ioutil.ReadDir(path) entries, err := ioutil.ReadDir(path)
@ -156,7 +155,7 @@ func getTokens(samples []string) ([]string, error) {
return tokens, anyError return tokens, anyError
} }
func executeFrequenciesTemplate(out io.Writer, freqs *samplesFrequencies, frequenciesTmplPath, frequenciesTmpl, commit string) error { func executeFrequenciesTemplate(out io.Writer, freqs *samplesFrequencies, tmplPath, tmplName, commit string) error {
fmap := template.FuncMap{ fmap := template.FuncMap{
"getCommit": func() string { return commit }, "getCommit": func() string { return commit },
"toFloat64": func(num int) string { return fmt.Sprintf("%f", float64(num)) }, "toFloat64": func(num int) string { return fmt.Sprintf("%f", float64(num)) },
@ -189,7 +188,7 @@ func executeFrequenciesTemplate(out io.Writer, freqs *samplesFrequencies, freque
"quote": strconv.Quote, "quote": strconv.Quote,
} }
t := template.Must(template.New(frequenciesTmpl).Funcs(fmap).ParseFiles(frequenciesTmplPath)) t := template.Must(template.New(tmplName).Funcs(fmap).ParseFiles(tmplPath))
if err := t.Execute(out, freqs); err != nil { if err := t.Execute(out, freqs); err != nil {
return err return err
} }

View File

@ -0,0 +1,647 @@
package enry
// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/enry.v1/internal/code-generator
// THIS FILE SHOULD NOT BE EDITED BY HAND
// Extracted from github/linguist commit: b6460f8ed6b249281ada099ca28bd8f1230b8892
// languagesByAlias keeps alias for different languages and use the name of the languages as an alias too.
// All the keys (alias or not) are written in lower case and the whitespaces has been replaced by underscores.
var languagesByAlias = map[string]string{
"1c_enterprise": "1C Enterprise",
"abap": "ABAP",
"abl": "OpenEdge ABL",
"abnf": "ABNF",
"abuild": "Alpine Abuild",
"aconf": "ApacheConf",
"actionscript": "ActionScript",
"actionscript3": "ActionScript",
"actionscript_3": "ActionScript",
"ada": "Ada",
"ada2005": "Ada",
"ada95": "Ada",
"advpl": "xBase",
"afdko": "OpenType Feature File",
"agda": "Agda",
"ags": "AGS Script",
"ags_script": "AGS Script",
"ahk": "AutoHotkey",
"alloy": "Alloy",
"alpine_abuild": "Alpine Abuild",
"ampl": "AMPL",
"ant_build_system": "Ant Build System",
"antlr": "ANTLR",
"apache": "ApacheConf",
"apacheconf": "ApacheConf",
"apex": "Apex",
"api_blueprint": "API Blueprint",
"apkbuild": "Alpine Abuild",
"apl": "APL",
"apollo_guidance_computer": "Apollo Guidance Computer",
"applescript": "AppleScript",
"arc": "Arc",
"arduino": "Arduino",
"arexx": "REXX",
"as3": "ActionScript",
"asciidoc": "AsciiDoc",
"asn.1": "ASN.1",
"asp": "ASP",
"aspectj": "AspectJ",
"aspx": "ASP",
"aspx-vb": "ASP",
"assembly": "Assembly",
"ats": "ATS",
"ats2": "ATS",
"au3": "AutoIt",
"augeas": "Augeas",
"autoconf": "M4Sugar",
"autohotkey": "AutoHotkey",
"autoit": "AutoIt",
"autoit3": "AutoIt",
"autoitscript": "AutoIt",
"awk": "Awk",
"b3d": "BlitzBasic",
"bash": "Shell",
"bash_session": "ShellSession",
"bat": "Batchfile",
"batch": "Batchfile",
"batchfile": "Batchfile",
"befunge": "Befunge",
"bison": "Bison",
"bitbake": "BitBake",
"blade": "Blade",
"blitz3d": "BlitzBasic",
"blitzbasic": "BlitzBasic",
"blitzmax": "BlitzMax",
"blitzplus": "BlitzBasic",
"bluespec": "Bluespec",
"bmax": "BlitzMax",
"boo": "Boo",
"bplus": "BlitzBasic",
"brainfuck": "Brainfuck",
"brightscript": "Brightscript",
"bro": "Bro",
"bsdmake": "Makefile",
"byond": "DM",
"c": "C",
"c#": "C#",
"c++": "C++",
"c++-objdump": "Cpp-ObjDump",
"c-objdump": "C-ObjDump",
"c2hs": "C2hs Haskell",
"c2hs_haskell": "C2hs Haskell",
"cap'n_proto": "Cap'n Proto",
"carto": "CartoCSS",
"cartocss": "CartoCSS",
"ceylon": "Ceylon",
"cfc": "ColdFusion CFC",
"cfm": "ColdFusion",
"cfml": "ColdFusion",
"chapel": "Chapel",
"charity": "Charity",
"chpl": "Chapel",
"chuck": "ChucK",
"cirru": "Cirru",
"clarion": "Clarion",
"clean": "Clean",
"click": "Click",
"clipper": "xBase",
"clips": "CLIPS",
"clojure": "Clojure",
"closure_templates": "Closure Templates",
"cmake": "CMake",
"cobol": "COBOL",
"coffee": "CoffeeScript",
"coffee-script": "CoffeeScript",
"coffeescript": "CoffeeScript",
"coldfusion": "ColdFusion",
"coldfusion_cfc": "ColdFusion CFC",
"coldfusion_html": "ColdFusion",
"collada": "COLLADA",
"common_lisp": "Common Lisp",
"component_pascal": "Component Pascal",
"console": "ShellSession",
"cool": "Cool",
"coq": "Coq",
"cpp": "C++",
"cpp-objdump": "Cpp-ObjDump",
"creole": "Creole",
"crystal": "Crystal",
"csharp": "C#",
"cson": "CSON",
"csound": "Csound",
"csound-csd": "Csound Document",
"csound-orc": "Csound",
"csound-sco": "Csound Score",
"csound_document": "Csound Document",
"csound_score": "Csound Score",
"css": "CSS",
"csv": "CSV",
"cucumber": "Gherkin",
"cuda": "Cuda",
"cweb": "CWeb",
"cycript": "Cycript",
"cython": "Cython",
"d": "D",
"d-objdump": "D-ObjDump",
"darcs_patch": "Darcs Patch",
"dart": "Dart",
"dcl": "DIGITAL Command Language",
"delphi": "Component Pascal",
"desktop": "desktop",
"diff": "Diff",
"digital_command_language": "DIGITAL Command Language",
"django": "HTML+Django",
"dm": "DM",
"dns_zone": "DNS Zone",
"dockerfile": "Dockerfile",
"dogescript": "Dogescript",
"dosbatch": "Batchfile",
"dosini": "INI",
"dpatch": "Darcs Patch",
"dtrace": "DTrace",
"dtrace-script": "DTrace",
"dylan": "Dylan",
"e": "E",
"eagle": "Eagle",
"ebnf": "EBNF",
"ec": "eC",
"ecere_projects": "Ecere Projects",
"ecl": "ECL",
"eclipse": "ECLiPSe",
"ecr": "HTML+ECR",
"edn": "edn",
"eex": "HTML+EEX",
"eiffel": "Eiffel",
"ejs": "EJS",
"elisp": "Emacs Lisp",
"elixir": "Elixir",
"elm": "Elm",
"emacs": "Emacs Lisp",
"emacs_lisp": "Emacs Lisp",
"emberscript": "EmberScript",
"eq": "EQ",
"erb": "HTML+ERB",
"erlang": "Erlang",
"f#": "F#",
"factor": "Factor",
"fancy": "Fancy",
"fantom": "Fantom",
"filebench_wml": "Filebench WML",
"filterscript": "Filterscript",
"fish": "fish",
"flex": "Lex",
"flux": "FLUX",
"formatted": "Formatted",
"forth": "Forth",
"fortran": "Fortran",
"foxpro": "xBase",
"freemarker": "FreeMarker",
"frege": "Frege",
"fsharp": "F#",
"ftl": "FreeMarker",
"fundamental": "Text",
"g-code": "G-code",
"game_maker_language": "Game Maker Language",
"gams": "GAMS",
"gap": "GAP",
"gcc_machine_description": "GCC Machine Description",
"gdb": "GDB",
"gdscript": "GDScript",
"genie": "Genie",
"genshi": "Genshi",
"gentoo_ebuild": "Gentoo Ebuild",
"gentoo_eclass": "Gentoo Eclass",
"gettext_catalog": "Gettext Catalog",
"gf": "Grammatical Framework",
"gherkin": "Gherkin",
"glsl": "GLSL",
"glyph": "Glyph",
"gn": "GN",
"gnuplot": "Gnuplot",
"go": "Go",
"golang": "Go",
"golo": "Golo",
"gosu": "Gosu",
"grace": "Grace",
"gradle": "Gradle",
"grammatical_framework": "Grammatical Framework",
"graph_modeling_language": "Graph Modeling Language",
"graphql": "GraphQL",
"graphviz_(dot)": "Graphviz (DOT)",
"groovy": "Groovy",
"groovy_server_pages": "Groovy Server Pages",
"gsp": "Groovy Server Pages",
"hack": "Hack",
"haml": "Haml",
"handlebars": "Handlebars",
"harbour": "Harbour",
"haskell": "Haskell",
"haxe": "Haxe",
"hbs": "Handlebars",
"hcl": "HCL",
"hlsl": "HLSL",
"html": "HTML",
"html+django": "HTML+Django",
"html+django/jinja": "HTML+Django",
"html+ecr": "HTML+ECR",
"html+eex": "HTML+EEX",
"html+erb": "HTML+ERB",
"html+jinja": "HTML+Django",
"html+php": "HTML+PHP",
"html+ruby": "RHTML",
"htmlbars": "Handlebars",
"htmldjango": "HTML+Django",
"http": "HTTP",
"hy": "Hy",
"hylang": "Hy",
"hyphy": "HyPhy",
"i7": "Inform 7",
"idl": "IDL",
"idris": "Idris",
"igor": "IGOR Pro",
"igor_pro": "IGOR Pro",
"igorpro": "IGOR Pro",
"inc": "PHP",
"inform7": "Inform 7",
"inform_7": "Inform 7",
"ini": "INI",
"inno_setup": "Inno Setup",
"io": "Io",
"ioke": "Ioke",
"ipython_notebook": "Jupyter Notebook",
"irc": "IRC log",
"irc_log": "IRC log",
"irc_logs": "IRC log",
"isabelle": "Isabelle",
"isabelle_root": "Isabelle ROOT",
"j": "J",
"jasmin": "Jasmin",
"java": "Java",
"java_server_page": "Groovy Server Pages",
"java_server_pages": "Java Server Pages",
"javascript": "JavaScript",
"jflex": "JFlex",
"jison": "Jison",
"jison_lex": "Jison Lex",
"jolie": "Jolie",
"jruby": "Ruby",
"js": "JavaScript",
"json": "JSON",
"json5": "JSON5",
"jsoniq": "JSONiq",
"jsonld": "JSONLD",
"jsp": "Java Server Pages",
"jsx": "JSX",
"julia": "Julia",
"jupyter_notebook": "Jupyter Notebook",
"kicad": "KiCad",
"kit": "Kit",
"kotlin": "Kotlin",
"krl": "KRL",
"labview": "LabVIEW",
"lasso": "Lasso",
"lassoscript": "Lasso",
"latex": "TeX",
"latte": "Latte",
"lean": "Lean",
"less": "Less",
"lex": "Lex",
"lfe": "LFE",
"lhaskell": "Literate Haskell",
"lhs": "Literate Haskell",
"lilypond": "LilyPond",
"limbo": "Limbo",
"linker_script": "Linker Script",
"linux_kernel_module": "Linux Kernel Module",
"liquid": "Liquid",
"lisp": "Common Lisp",
"litcoffee": "Literate CoffeeScript",
"literate_agda": "Literate Agda",
"literate_coffeescript": "Literate CoffeeScript",
"literate_haskell": "Literate Haskell",
"live-script": "LiveScript",
"livescript": "LiveScript",
"llvm": "LLVM",
"logos": "Logos",
"logtalk": "Logtalk",
"lolcode": "LOLCODE",
"lookml": "LookML",
"loomscript": "LoomScript",
"ls": "LiveScript",
"lsl": "LSL",
"lua": "Lua",
"m": "M",
"m4": "M4",
"m4sugar": "M4Sugar",
"macruby": "Ruby",
"make": "Makefile",
"makefile": "Makefile",
"mako": "Mako",
"markdown": "Markdown",
"marko": "Marko",
"markojs": "Marko",
"mask": "Mask",
"mathematica": "Mathematica",
"matlab": "Matlab",
"maven_pom": "Maven POM",
"max": "Max",
"max/msp": "Max",
"maxmsp": "Max",
"maxscript": "MAXScript",
"mediawiki": "MediaWiki",
"mercury": "Mercury",
"meson": "Meson",
"metal": "Metal",
"mf": "Makefile",
"minid": "MiniD",
"mirah": "Mirah",
"mma": "Mathematica",
"modelica": "Modelica",
"modula-2": "Modula-2",
"module_management_system": "Module Management System",
"monkey": "Monkey",
"moocode": "Moocode",
"moonscript": "MoonScript",
"mql4": "MQL4",
"mql5": "MQL5",
"mtml": "MTML",
"muf": "MUF",
"mumps": "M",
"mupad": "mupad",
"myghty": "Myghty",
"nasm": "Assembly",
"ncl": "NCL",
"nemerle": "Nemerle",
"nesc": "nesC",
"netlinx": "NetLinx",
"netlinx+erb": "NetLinx+ERB",
"netlogo": "NetLogo",
"newlisp": "NewLisp",
"nginx": "Nginx",
"nginx_configuration_file": "Nginx",
"nim": "Nim",
"ninja": "Ninja",
"nit": "Nit",
"nix": "Nix",
"nixos": "Nix",
"njk": "HTML+Django",
"nl": "NL",
"node": "JavaScript",
"nroff": "Roff",
"nsis": "NSIS",
"nu": "Nu",
"numpy": "NumPy",
"nunjucks": "HTML+Django",
"nush": "Nu",
"nvim": "Vim script",
"obj-c": "Objective-C",
"obj-c++": "Objective-C++",
"obj-j": "Objective-J",
"objc": "Objective-C",
"objc++": "Objective-C++",
"objdump": "ObjDump",
"objective-c": "Objective-C",
"objective-c++": "Objective-C++",
"objective-j": "Objective-J",
"objectivec": "Objective-C",
"objectivec++": "Objective-C++",
"objectivej": "Objective-J",
"objectpascal": "Component Pascal",
"objj": "Objective-J",
"ocaml": "OCaml",
"octave": "Matlab",
"omgrofl": "Omgrofl",
"ooc": "ooc",
"opa": "Opa",
"opal": "Opal",
"opencl": "OpenCL",
"openedge": "OpenEdge ABL",
"openedge_abl": "OpenEdge ABL",
"openrc": "OpenRC runscript",
"openrc_runscript": "OpenRC runscript",
"openscad": "OpenSCAD",
"opentype_feature_file": "OpenType Feature File",
"org": "Org",
"osascript": "AppleScript",
"ox": "Ox",
"oxygene": "Oxygene",
"oz": "Oz",
"p4": "P4",
"pan": "Pan",
"pandoc": "Markdown",
"papyrus": "Papyrus",
"parrot": "Parrot",
"parrot_assembly": "Parrot Assembly",
"parrot_internal_representation": "Parrot Internal Representation",
"pascal": "Pascal",
"pasm": "Parrot Assembly",
"pawn": "PAWN",
"pep8": "Pep8",
"perl": "Perl",
"perl6": "Perl6",
"php": "PHP",
"pic": "Pic",
"pickle": "Pickle",
"picolisp": "PicoLisp",
"piglatin": "PigLatin",
"pike": "Pike",
"pir": "Parrot Internal Representation",
"plpgsql": "PLpgSQL",
"plsql": "PLSQL",
"pod": "Pod",
"pogoscript": "PogoScript",
"pony": "Pony",
"posh": "PowerShell",
"postscr": "PostScript",
"postscript": "PostScript",
"pot": "Gettext Catalog",
"pov-ray": "POV-Ray SDL",
"pov-ray_sdl": "POV-Ray SDL",
"povray": "POV-Ray SDL",
"powerbuilder": "PowerBuilder",
"powershell": "PowerShell",
"processing": "Processing",
"progress": "OpenEdge ABL",
"prolog": "Prolog",
"propeller_spin": "Propeller Spin",
"protobuf": "Protocol Buffer",
"protocol_buffer": "Protocol Buffer",
"protocol_buffers": "Protocol Buffer",
"public_key": "Public Key",
"pug": "Pug",
"puppet": "Puppet",
"pure_data": "Pure Data",
"purebasic": "PureBasic",
"purescript": "PureScript",
"pycon": "Python console",
"pyrex": "Cython",
"python": "Python",
"python_console": "Python console",
"python_traceback": "Python traceback",
"qmake": "QMake",
"qml": "QML",
"r": "R",
"racket": "Racket",
"ragel": "Ragel",
"ragel-rb": "Ragel",
"ragel-ruby": "Ragel",
"rake": "Ruby",
"raml": "RAML",
"rascal": "Rascal",
"raw": "Raw token data",
"raw_token_data": "Raw token data",
"rb": "Ruby",
"rbx": "Ruby",
"rdoc": "RDoc",
"realbasic": "REALbasic",
"reason": "Reason",
"rebol": "Rebol",
"red": "Red",
"red/system": "Red",
"redcode": "Redcode",
"regex": "Regular Expression",
"regexp": "Regular Expression",
"regular_expression": "Regular Expression",
"ren'py": "Ren'Py",
"renderscript": "RenderScript",
"renpy": "Ren'Py",
"restructuredtext": "reStructuredText",
"rexx": "REXX",
"rhtml": "RHTML",
"rmarkdown": "RMarkdown",
"robotframework": "RobotFramework",
"roff": "Roff",
"rouge": "Rouge",
"rpm_spec": "RPM Spec",
"rscript": "R",
"rss": "XML",
"rst": "reStructuredText",
"ruby": "Ruby",
"runoff": "RUNOFF",
"rust": "Rust",
"rusthon": "Python",
"sage": "Sage",
"salt": "SaltStack",
"saltstack": "SaltStack",
"saltstate": "SaltStack",
"sas": "SAS",
"sass": "Sass",
"scala": "Scala",
"scaml": "Scaml",
"scheme": "Scheme",
"scilab": "Scilab",
"scss": "SCSS",
"self": "Self",
"sh": "Shell",
"shaderlab": "ShaderLab",
"shell": "Shell",
"shell-script": "Shell",
"shellsession": "ShellSession",
"shen": "Shen",
"slash": "Slash",
"slim": "Slim",
"smali": "Smali",
"smalltalk": "Smalltalk",
"smarty": "Smarty",
"sml": "Standard ML",
"smt": "SMT",
"sourcemod": "SourcePawn",
"sourcepawn": "SourcePawn",
"sparql": "SPARQL",
"specfile": "RPM Spec",
"spline_font_database": "Spline Font Database",
"splus": "R",
"sqf": "SQF",
"sql": "SQL",
"sqlpl": "SQLPL",
"squeak": "Smalltalk",
"squirrel": "Squirrel",
"srecode_template": "SRecode Template",
"stan": "Stan",
"standard_ml": "Standard ML",
"stata": "Stata",
"ston": "STON",
"stylus": "Stylus",
"sublime_text_config": "Sublime Text Config",
"subrip_text": "SubRip Text",
"supercollider": "SuperCollider",
"svg": "SVG",
"swift": "Swift",
"systemverilog": "SystemVerilog",
"tcl": "Tcl",
"tcsh": "Tcsh",
"tea": "Tea",
"terra": "Terra",
"tex": "TeX",
"text": "Text",
"textile": "Textile",
"thrift": "Thrift",
"ti_program": "TI Program",
"tl": "Type Language",
"tla": "TLA",
"toml": "TOML",
"ts": "TypeScript",
"turing": "Turing",
"turtle": "Turtle",
"twig": "Twig",
"txl": "TXL",
"type_language": "Type Language",
"typescript": "TypeScript",
"udiff": "Diff",
"unified_parallel_c": "Unified Parallel C",
"unity3d_asset": "Unity3D Asset",
"unix_assembly": "Unix Assembly",
"uno": "Uno",
"unrealscript": "UnrealScript",
"ur": "UrWeb",
"ur/web": "UrWeb",
"urweb": "UrWeb",
"vala": "Vala",
"vb.net": "Visual Basic",
"vbnet": "Visual Basic",
"vcl": "VCL",
"verilog": "Verilog",
"vhdl": "VHDL",
"vim": "Vim script",
"vim_script": "Vim script",
"viml": "Vim script",
"visual_basic": "Visual Basic",
"volt": "Volt",
"vue": "Vue",
"wasm": "WebAssembly",
"wast": "WebAssembly",
"wavefront_material": "Wavefront Material",
"wavefront_object": "Wavefront Object",
"web_ontology_language": "Web Ontology Language",
"webassembly": "WebAssembly",
"webidl": "WebIDL",
"winbatch": "Batchfile",
"wisp": "wisp",
"world_of_warcraft_addon_data": "World of Warcraft Addon Data",
"wsdl": "XML",
"x10": "X10",
"xbase": "xBase",
"xc": "XC",
"xcompose": "XCompose",
"xhtml": "HTML",
"xml": "XML",
"xml+genshi": "Genshi",
"xml+kid": "Genshi",
"xojo": "Xojo",
"xpages": "XPages",
"xproc": "XProc",
"xquery": "XQuery",
"xs": "XS",
"xsd": "XML",
"xsl": "XSLT",
"xslt": "XSLT",
"xten": "X10",
"xtend": "Xtend",
"yacc": "Yacc",
"yaml": "YAML",
"yang": "YANG",
"yml": "YAML",
"zephir": "Zephir",
"zimpl": "Zimpl",
"zsh": "Shell",
}

View File

@ -1,42 +0,0 @@
package enry
// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/enry.v1/internal/code-generator
// THIS FILE SHOULD NOT BE EDITED BY HAND
// Extracted from github/linguist commit: 0123456789abcdef0123456789abcdef01234567
// languagesByAlias keeps alias for different languages and use the name of the languages as an alias too.
// All the keys (alias or not) are written in lower case and the whitespaces has been replaced by underscores.
var languagesByAlias = map[string]string{
"asp": "ASP",
"aspx": "ASP",
"aspx-vb": "ASP",
"au3": "AutoIt",
"autoit": "AutoIt",
"autoit3": "AutoIt",
"autoitscript": "AutoIt",
"bat": "Batchfile",
"batch": "Batchfile",
"batchfile": "Batchfile",
"bsdmake": "Makefile",
"c++": "C++",
"cpp": "C++",
"django": "HTML+Django",
"dosbatch": "Batchfile",
"go": "Go",
"golang": "Go",
"gradle": "Gradle",
"html+django": "HTML+Django",
"html+django/jinja": "HTML+Django",
"html+jinja": "HTML+Django",
"htmldjango": "HTML+Django",
"make": "Makefile",
"makefile": "Makefile",
"mf": "Makefile",
"njk": "HTML+Django",
"nunjucks": "HTML+Django",
"obj-c": "Objective-C",
"objc": "Objective-C",
"objective-c": "Objective-C",
"objectivec": "Objective-C",
"winbatch": "Batchfile",
}

View File

@ -1,42 +0,0 @@
---
ASP:
aliases:
- aspx
- aspx-vb
AutoIt:
aliases:
- au3
- AutoIt3
- AutoItScript
Batchfile:
aliases:
- bat
- batch
- dosbatch
- winbatch
C++:
aliases:
- cpp
Go:
aliases:
- golang
Gradle:
type: data
HTML+Django:
aliases:
- django
- html+django/jinja
- html+jinja
- htmldjango
- njk
- nunjucks
Makefile:
aliases:
- bsdmake
- make
- mf
Objective-C:
aliases:
- obj-c
- objc
- objectivec

View File

@ -2,7 +2,7 @@ package enry
// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/enry.v1/internal/code-generator // CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/enry.v1/internal/code-generator
// THIS FILE SHOULD NOT BE EDITED BY HAND // THIS FILE SHOULD NOT BE EDITED BY HAND
// Extracted from github/linguist commit: 0123456789abcdef0123456789abcdef01234567 // Extracted from github/linguist commit: b6460f8ed6b249281ada099ca28bd8f1230b8892
import ( import (
"regexp" "regexp"
@ -22,13 +22,128 @@ var contentMatchers = map[string]languageMatcher{
return nil return nil
}, },
".bb": func(i []byte) []string {
if bb_BlitzBasic_Matcher_0.Match(i) || bb_BlitzBasic_Matcher_1.Match(i) {
return []string{"BlitzBasic"}
} else if bb_BitBake_Matcher_0.Match(i) {
return []string{"BitBake"}
}
return nil
},
".builds": func(i []byte) []string {
if builds_XML_Matcher_0.Match(i) {
return []string{"XML"}
}
return []string{"Text"}
},
".ch": func(i []byte) []string {
if ch_xBase_Matcher_0.Match(i) {
return []string{"xBase"}
}
return nil
},
".cl": func(i []byte) []string {
if cl_CommonLisp_Matcher_0.Match(i) {
return []string{"Common Lisp"}
} else if cl_Cool_Matcher_0.Match(i) {
return []string{"Cool"}
} else if cl_OpenCL_Matcher_0.Match(i) {
return []string{"OpenCL"}
}
return nil
},
".cls": func(i []byte) []string {
if cls_TeX_Matcher_0.Match(i) {
return []string{"TeX"}
}
return nil
},
".cs": func(i []byte) []string {
if cs_Smalltalk_Matcher_0.Match(i) {
return []string{"Smalltalk"}
} else if cs_CSharp_Matcher_0.Match(i) || cs_CSharp_Matcher_1.Match(i) {
return []string{"C#"}
}
return nil
},
".d": func(i []byte) []string {
if d_D_Matcher_0.Match(i) {
return []string{"D"}
} else if d_DTrace_Matcher_0.Match(i) {
return []string{"DTrace"}
} else if d_Makefile_Matcher_0.Match(i) {
return []string{"Makefile"}
}
return nil
},
".ecl": func(i []byte) []string {
if ecl_ECLiPSe_Matcher_0.Match(i) {
return []string{"ECLiPSe"}
} else if ecl_ECL_Matcher_0.Match(i) {
return []string{"ECL"}
}
return nil
},
".es": func(i []byte) []string {
if es_Erlang_Matcher_0.Match(i) {
return []string{"Erlang"}
}
return nil
},
".f": func(i []byte) []string { ".f": func(i []byte) []string {
if f_Forth_Matcher_0.Match(i) { if f_Forth_Matcher_0.Match(i) {
return []string{"Forth"} return []string{"Forth"}
} else if f_FilebenchWML_Matcher_0.Match(i) { } else if f_FilebenchWML_Matcher_0.Match(i) {
return []string{"Filebench WML"} return []string{"Filebench WML"}
} else if f_FORTRAN_Matcher_0.Match(i) { } else if f_Fortran_Matcher_0.Match(i) {
return []string{"FORTRAN"} return []string{"Fortran"}
}
return nil
},
".for": func(i []byte) []string {
if for_Forth_Matcher_0.Match(i) {
return []string{"Forth"}
} else if for_Fortran_Matcher_0.Match(i) {
return []string{"Fortran"}
}
return nil
},
".fr": func(i []byte) []string {
if fr_Forth_Matcher_0.Match(i) {
return []string{"Forth"}
} else if fr_Frege_Matcher_0.Match(i) {
return []string{"Frege"}
}
return []string{"Text"}
},
".fs": func(i []byte) []string {
if fs_Forth_Matcher_0.Match(i) {
return []string{"Forth"}
} else if fs_FSharp_Matcher_0.Match(i) {
return []string{"F#"}
} else if fs_GLSL_Matcher_0.Match(i) {
return []string{"GLSL"}
} else if fs_Filterscript_Matcher_0.Match(i) {
return []string{"Filterscript"}
}
return nil
},
".gs": func(i []byte) []string {
if gs_Gosu_Matcher_0.Match(i) {
return []string{"Gosu"}
} }
return nil return nil
@ -42,6 +157,35 @@ var contentMatchers = map[string]languageMatcher{
return nil return nil
}, },
".inc": func(i []byte) []string {
if inc_PHP_Matcher_0.Match(i) {
return []string{"PHP"}
} else if inc_POVDashRaySDL_Matcher_0.Match(i) {
return []string{"POV-Ray SDL"}
}
return nil
},
".l": func(i []byte) []string {
if l_CommonLisp_Matcher_0.Match(i) {
return []string{"Common Lisp"}
} else if l_Lex_Matcher_0.Match(i) {
return []string{"Lex"}
} else if l_Roff_Matcher_0.Match(i) {
return []string{"Roff"}
} else if l_PicoLisp_Matcher_0.Match(i) {
return []string{"PicoLisp"}
}
return nil
},
".ls": func(i []byte) []string {
if ls_LoomScript_Matcher_0.Match(i) {
return []string{"LoomScript"}
}
return []string{"LiveScript"}
},
".lsp": func(i []byte) []string { ".lsp": func(i []byte) []string {
if lsp_CommonLisp_Matcher_0.Match(i) { if lsp_CommonLisp_Matcher_0.Match(i) {
return []string{"Common Lisp"} return []string{"Common Lisp"}
@ -60,21 +204,42 @@ var contentMatchers = map[string]languageMatcher{
return nil return nil
}, },
".m": func(i []byte) []string {
if m_ObjectiveDashC_Matcher_0.Match(i) {
return []string{"Objective-C"}
} else if m_Mercury_Matcher_0.Match(i) {
return []string{"Mercury"}
} else if m_MUF_Matcher_0.Match(i) {
return []string{"MUF"}
} else if m_M_Matcher_0.Match(i) {
return []string{"M"}
} else if m_Mathematica_Matcher_0.Match(i) {
return []string{"Mathematica"}
} else if m_Matlab_Matcher_0.Match(i) {
return []string{"Matlab"}
} else if m_Limbo_Matcher_0.Match(i) {
return []string{"Limbo"}
}
return nil
},
".md": func(i []byte) []string { ".md": func(i []byte) []string {
if md_Markdown_Matcher_0.Match(i) || md_Markdown_Matcher_1.Match(i) { if md_Markdown_Matcher_0.Match(i) || md_Markdown_Matcher_1.Match(i) {
return []string{"Markdown"} return []string{"Markdown"}
} else if md_GCCmachinedescription_Matcher_0.Match(i) { } else if md_GCCMachineDescription_Matcher_0.Match(i) {
return []string{"GCC machine description"} return []string{"GCC Machine Description"}
} }
return []string{"Markdown"} return []string{"Markdown"}
}, },
".ms": func(i []byte) []string { ".ml": func(i []byte) []string {
if ms_Groff_Matcher_0.Match(i) { if ml_OCaml_Matcher_0.Match(i) {
return []string{"Groff"} return []string{"OCaml"}
} else if ml_StandardML_Matcher_0.Match(i) {
return []string{"Standard ML"}
} }
return []string{"MAXScript"} return nil
}, },
".mod": func(i []byte) []string { ".mod": func(i []byte) []string {
if mod_XML_Matcher_0.Match(i) { if mod_XML_Matcher_0.Match(i) {
@ -85,6 +250,72 @@ var contentMatchers = map[string]languageMatcher{
return []string{"Linux Kernel Module", "AMPL"} return []string{"Linux Kernel Module", "AMPL"}
}, },
".ms": func(i []byte) []string {
if ms_Roff_Matcher_0.Match(i) {
return []string{"Roff"}
}
return []string{"MAXScript"}
},
".n": func(i []byte) []string {
if n_Roff_Matcher_0.Match(i) {
return []string{"Roff"}
} else if n_Nemerle_Matcher_0.Match(i) {
return []string{"Nemerle"}
}
return nil
},
".ncl": func(i []byte) []string {
if ncl_Text_Matcher_0.Match(i) {
return []string{"Text"}
}
return nil
},
".nl": func(i []byte) []string {
if nl_NL_Matcher_0.Match(i) {
return []string{"NL"}
}
return []string{"NewLisp"}
},
".php": func(i []byte) []string {
if php_Hack_Matcher_0.Match(i) {
return []string{"Hack"}
} else if php_PHP_Matcher_0.Match(i) {
return []string{"PHP"}
}
return nil
},
".pl": func(i []byte) []string {
if pl_Prolog_Matcher_0.Match(i) {
return []string{"Prolog"}
} else if pl_Perl_Matcher_0.Match(i) {
return []string{"Perl"}
} else if pl_Perl6_Matcher_0.Match(i) {
return []string{"Perl6"}
}
return nil
},
".pm": func(i []byte) []string {
if pm_Perl6_Matcher_0.Match(i) {
return []string{"Perl6"}
} else if pm_Perl_Matcher_0.Match(i) {
return []string{"Perl"}
}
return nil
},
".pod": func(i []byte) []string {
if pod_Pod_Matcher_0.Match(i) {
return []string{"Pod"}
}
return []string{"Perl"}
},
".pro": func(i []byte) []string { ".pro": func(i []byte) []string {
if pro_Prolog_Matcher_0.Match(i) { if pro_Prolog_Matcher_0.Match(i) {
return []string{"Prolog"} return []string{"Prolog"}
@ -98,6 +329,33 @@ var contentMatchers = map[string]languageMatcher{
return nil return nil
}, },
".props": func(i []byte) []string {
if props_XML_Matcher_0.Match(i) {
return []string{"XML"}
} else if props_INI_Matcher_0.Match(i) {
return []string{"INI"}
}
return nil
},
".r": func(i []byte) []string {
if r_Rebol_Matcher_0.Match(i) {
return []string{"Rebol"}
} else if r_R_Matcher_0.Match(i) {
return []string{"R"}
}
return nil
},
".rno": func(i []byte) []string {
if rno_RUNOFF_Matcher_0.Match(i) {
return []string{"RUNOFF"}
} else if rno_Roff_Matcher_0.Match(i) {
return []string{"Roff"}
}
return nil
},
".rpy": func(i []byte) []string { ".rpy": func(i []byte) []string {
if rpy_Python_Matcher_0.Match(i) { if rpy_Python_Matcher_0.Match(i) {
return []string{"Python"} return []string{"Python"}
@ -105,38 +363,206 @@ var contentMatchers = map[string]languageMatcher{
return []string{"Ren'Py"} return []string{"Ren'Py"}
}, },
".rs": func(i []byte) []string {
if rs_Rust_Matcher_0.Match(i) {
return []string{"Rust"}
} else if rs_RenderScript_Matcher_0.Match(i) {
return []string{"RenderScript"}
}
return nil
},
".sc": func(i []byte) []string {
if sc_SuperCollider_Matcher_0.Match(i) || sc_SuperCollider_Matcher_1.Match(i) || sc_SuperCollider_Matcher_2.Match(i) {
return []string{"SuperCollider"}
} else if sc_Scala_Matcher_0.Match(i) || sc_Scala_Matcher_1.Match(i) || sc_Scala_Matcher_2.Match(i) {
return []string{"Scala"}
}
return nil
},
".sql": func(i []byte) []string {
if sql_PLpgSQL_Matcher_0.Match(i) || sql_PLpgSQL_Matcher_1.Match(i) || sql_PLpgSQL_Matcher_2.Match(i) {
return []string{"PLpgSQL"}
} else if sql_SQLPL_Matcher_0.Match(i) || sql_SQLPL_Matcher_1.Match(i) {
return []string{"SQLPL"}
} else if sql_PLSQL_Matcher_0.Match(i) || sql_PLSQL_Matcher_1.Match(i) {
return []string{"PLSQL"}
} else if sql_SQL_Matcher_0.Match(i) {
return []string{"SQL"}
}
return nil
},
".srt": func(i []byte) []string {
if srt_SubRipText_Matcher_0.Match(i) {
return []string{"SubRip Text"}
}
return nil
},
".t": func(i []byte) []string {
if t_Turing_Matcher_0.Match(i) {
return []string{"Turing"}
} else if t_Perl6_Matcher_0.Match(i) {
return []string{"Perl6"}
} else if t_Perl_Matcher_0.Match(i) {
return []string{"Perl"}
}
return nil
},
".toc": func(i []byte) []string {
if toc_WorldofWarcraftAddonData_Matcher_0.Match(i) {
return []string{"World of Warcraft Addon Data"}
} else if toc_TeX_Matcher_0.Match(i) {
return []string{"TeX"}
}
return nil
},
".ts": func(i []byte) []string {
if ts_XML_Matcher_0.Match(i) {
return []string{"XML"}
}
return []string{"TypeScript"}
},
".tst": func(i []byte) []string {
if tst_GAP_Matcher_0.Match(i) {
return []string{"GAP"}
}
return []string{"Scilab"}
},
".tsx": func(i []byte) []string {
if tsx_TypeScript_Matcher_0.Match(i) {
return []string{"TypeScript"}
} else if tsx_XML_Matcher_0.Match(i) {
return []string{"XML"}
}
return nil
},
} }
var ( var (
asc_PublicKey_Matcher_0 = regexp.MustCompile(`(?m)^(----[- ]BEGIN|ssh-(rsa|dss)) `) asc_PublicKey_Matcher_0 = regexp.MustCompile(`(?m)^(----[- ]BEGIN|ssh-(rsa|dss)) `)
asc_AsciiDoc_Matcher_0 = regexp.MustCompile(`(?m)^[=-]+(\s|\n)|{{[A-Za-z]`) asc_AsciiDoc_Matcher_0 = regexp.MustCompile(`(?m)^[=-]+(\s|\n)|{{[A-Za-z]`)
asc_AGSScript_Matcher_0 = regexp.MustCompile(`(?m)^(\/\/.+|((import|export)\s+)?(function|int|float|char)\s+((room|repeatedly|on|game)_)?([A-Za-z]+[A-Za-z_0-9]+)\s*[;\(])`) asc_AGSScript_Matcher_0 = regexp.MustCompile(`(?m)^(\/\/.+|((import|export)\s+)?(function|int|float|char)\s+((room|repeatedly|on|game)_)?([A-Za-z]+[A-Za-z_0-9]+)\s*[;\(])`)
f_Forth_Matcher_0 = regexp.MustCompile(`(?m)^: `) bb_BlitzBasic_Matcher_0 = regexp.MustCompile(`(?m)^\s*; `)
f_FilebenchWML_Matcher_0 = regexp.MustCompile(`(?m)flowop`) bb_BlitzBasic_Matcher_1 = regexp.MustCompile(`(?m)End Function`)
f_FORTRAN_Matcher_0 = regexp.MustCompile(`(?mi)^([c*][^abd-z]| (subroutine|program|end|data)\s|\s*!)`) bb_BitBake_Matcher_0 = regexp.MustCompile(`(?m)^\s*(# |include|require)\b`)
h_ObjectiveDashC_Matcher_0 = regexp.MustCompile(`(?m)^\s*(@(interface|class|protocol|property|end|synchronised|selector|implementation)\b|#import\s+.+\.h[">])`) builds_XML_Matcher_0 = regexp.MustCompile(`(?mi)^(\s*)(<Project|<Import|<Property|<?xml|xmlns)`)
h_CPlusPlus_Matcher_0 = regexp.MustCompile(`(?m)^\s*#\s*include <(cstdint|string|vector|map|list|array|bitset|queue|stack|forward_list|unordered_map|unordered_set|(i|o|io)stream)>`) ch_xBase_Matcher_0 = regexp.MustCompile(`(?mi)^\s*#\s*(if|ifdef|ifndef|define|command|xcommand|translate|xtranslate|include|pragma|undef)\b`)
h_CPlusPlus_Matcher_1 = regexp.MustCompile(`(?m)^\s*template\s*<`) cl_CommonLisp_Matcher_0 = regexp.MustCompile(`(?mi)^\s*\((defun|in-package|defpackage) `)
h_CPlusPlus_Matcher_2 = regexp.MustCompile(`(?m)^[ \t]*try`) cl_Cool_Matcher_0 = regexp.MustCompile(`(?m)^class`)
h_CPlusPlus_Matcher_3 = regexp.MustCompile(`(?m)^[ \t]*catch\s*\(`) cl_OpenCL_Matcher_0 = regexp.MustCompile(`(?m)\/\* |\/\/ |^\}`)
h_CPlusPlus_Matcher_4 = regexp.MustCompile(`(?m)^[ \t]*(class|(using[ \t]+)?namespace)\s+\w+`) cls_TeX_Matcher_0 = regexp.MustCompile(`(?m)\\\w+{`)
h_CPlusPlus_Matcher_5 = regexp.MustCompile(`(?m)^[ \t]*(private|public|protected):$`) cs_Smalltalk_Matcher_0 = regexp.MustCompile(`(?m)![\w\s]+methodsFor: `)
h_CPlusPlus_Matcher_6 = regexp.MustCompile(`(?m)std::\w+`) cs_CSharp_Matcher_0 = regexp.MustCompile(`(?m)^\s*namespace\s*[\w\.]+\s*{`)
lsp_CommonLisp_Matcher_0 = regexp.MustCompile(`(?mi)^\s*\((defun|in-package|defpackage) `) cs_CSharp_Matcher_1 = regexp.MustCompile(`(?m)^\s*\/\/`)
lsp_NewLisp_Matcher_0 = regexp.MustCompile(`(?m)^\s*\(define `) d_D_Matcher_0 = regexp.MustCompile(`(?m)^module\s+[\w.]*\s*;|import\s+[\w\s,.:]*;|\w+\s+\w+\s*\(.*\)(?:\(.*\))?\s*{[^}]*}|unittest\s*(?:\(.*\))?\s*{[^}]*}`)
lisp_CommonLisp_Matcher_0 = regexp.MustCompile(`(?mi)^\s*\((defun|in-package|defpackage) `) d_DTrace_Matcher_0 = regexp.MustCompile(`(?m)^(\w+:\w*:\w*:\w*|BEGIN|END|provider\s+|(tick|profile)-\w+\s+{[^}]*}|#pragma\s+D\s+(option|attributes|depends_on)\s|#pragma\s+ident\s)`)
lisp_NewLisp_Matcher_0 = regexp.MustCompile(`(?m)^\s*\(define `) d_Makefile_Matcher_0 = regexp.MustCompile(`(?m)([\/\\].*:\s+.*\s\\$|: \\$|^ : |^[\w\s\/\\.]+\w+\.\w+\s*:\s+[\w\s\/\\.]+\w+\.\w+)`)
md_Markdown_Matcher_0 = regexp.MustCompile(`(?mi)(^[-a-z0-9=#!\*\[|>])|<\/`) ecl_ECLiPSe_Matcher_0 = regexp.MustCompile(`(?m)^[^#]+:-`)
md_Markdown_Matcher_1 = regexp.MustCompile(`(?m)^$`) ecl_ECL_Matcher_0 = regexp.MustCompile(`(?m):=`)
md_GCCmachinedescription_Matcher_0 = regexp.MustCompile(`(?m)^(;;|\(define_)`) es_Erlang_Matcher_0 = regexp.MustCompile(`(?m)^\s*(?:%%|main\s*\(.*?\)\s*->)`)
ms_Groff_Matcher_0 = regexp.MustCompile(`(?mi)^[.'][a-z][a-z](\s|$)`) f_Forth_Matcher_0 = regexp.MustCompile(`(?m)^: `)
mod_XML_Matcher_0 = regexp.MustCompile(`(?m)<!ENTITY `) f_FilebenchWML_Matcher_0 = regexp.MustCompile(`(?m)flowop`)
mod_ModulaDash2_Matcher_0 = regexp.MustCompile(`(?mi)^\s*MODULE [\w\.]+;`) f_Fortran_Matcher_0 = regexp.MustCompile(`(?mi)^([c*][^abd-z]| (subroutine|program|end|data)\s|\s*!)`)
mod_ModulaDash2_Matcher_1 = regexp.MustCompile(`(?mi)^\s*END [\w\.]+;`) for_Forth_Matcher_0 = regexp.MustCompile(`(?m)^: `)
pro_Prolog_Matcher_0 = regexp.MustCompile(`(?m)^[^#]+:-`) for_Fortran_Matcher_0 = regexp.MustCompile(`(?mi)^([c*][^abd-z]| (subroutine|program|end|data)\s|\s*!)`)
pro_INI_Matcher_0 = regexp.MustCompile(`(?m)last_client=`) fr_Forth_Matcher_0 = regexp.MustCompile(`(?m)^(: |also |new-device|previous )`)
pro_QMake_Matcher_0 = regexp.MustCompile(`(?m)HEADERS`) fr_Frege_Matcher_0 = regexp.MustCompile(`(?m)^\s*(import|module|package|data|type) `)
pro_QMake_Matcher_1 = regexp.MustCompile(`(?m)SOURCES`) fs_Forth_Matcher_0 = regexp.MustCompile(`(?m)^(: |new-device)`)
pro_IDL_Matcher_0 = regexp.MustCompile(`(?m)^\s*function[ \w,]+$`) fs_FSharp_Matcher_0 = regexp.MustCompile(`(?m)^\s*(#light|import|let|module|namespace|open|type)`)
rpy_Python_Matcher_0 = regexp.MustCompile(`(?ms)(^(import|from|class|def)\s)`) fs_GLSL_Matcher_0 = regexp.MustCompile(`(?m)^\s*(#version|precision|uniform|varying|vec[234])`)
fs_Filterscript_Matcher_0 = regexp.MustCompile(`(?m)#include|#pragma\s+(rs|version)|__attribute__`)
gs_Gosu_Matcher_0 = regexp.MustCompile(`(?m)^uses java\.`)
h_ObjectiveDashC_Matcher_0 = regexp.MustCompile(`(?m)^\s*(@(interface|class|protocol|property|end|synchronised|selector|implementation)\b|#import\s+.+\.h[">])`)
h_CPlusPlus_Matcher_0 = regexp.MustCompile(`(?m)^\s*#\s*include <(cstdint|string|vector|map|list|array|bitset|queue|stack|forward_list|unordered_map|unordered_set|(i|o|io)stream)>`)
h_CPlusPlus_Matcher_1 = regexp.MustCompile(`(?m)^\s*template\s*<`)
h_CPlusPlus_Matcher_2 = regexp.MustCompile(`(?m)^[ \t]*try`)
h_CPlusPlus_Matcher_3 = regexp.MustCompile(`(?m)^[ \t]*catch\s*\(`)
h_CPlusPlus_Matcher_4 = regexp.MustCompile(`(?m)^[ \t]*(class|(using[ \t]+)?namespace)\s+\w+`)
h_CPlusPlus_Matcher_5 = regexp.MustCompile(`(?m)^[ \t]*(private|public|protected):$`)
h_CPlusPlus_Matcher_6 = regexp.MustCompile(`(?m)std::\w+`)
inc_PHP_Matcher_0 = regexp.MustCompile(`(?m)^<\?(?:php)?`)
inc_POVDashRaySDL_Matcher_0 = regexp.MustCompile(`(?m)^\s*#(declare|local|macro|while)\s`)
l_CommonLisp_Matcher_0 = regexp.MustCompile(`(?m)\(def(un|macro)\s`)
l_Lex_Matcher_0 = regexp.MustCompile(`(?m)^(%[%{}]xs|<.*>)`)
l_Roff_Matcher_0 = regexp.MustCompile(`(?mi)^\.[a-z][a-z](\s|$)`)
l_PicoLisp_Matcher_0 = regexp.MustCompile(`(?m)^\((de|class|rel|code|data|must)\s`)
ls_LoomScript_Matcher_0 = regexp.MustCompile(`(?m)^\s*package\s*[\w\.\/\*\s]*\s*{`)
lsp_CommonLisp_Matcher_0 = regexp.MustCompile(`(?mi)^\s*\((defun|in-package|defpackage) `)
lsp_NewLisp_Matcher_0 = regexp.MustCompile(`(?m)^\s*\(define `)
lisp_CommonLisp_Matcher_0 = regexp.MustCompile(`(?mi)^\s*\((defun|in-package|defpackage) `)
lisp_NewLisp_Matcher_0 = regexp.MustCompile(`(?m)^\s*\(define `)
m_ObjectiveDashC_Matcher_0 = regexp.MustCompile(`(?m)^\s*(@(interface|class|protocol|property|end|synchronised|selector|implementation)\b|#import\s+.+\.h[">])`)
m_Mercury_Matcher_0 = regexp.MustCompile(`(?m):- module`)
m_MUF_Matcher_0 = regexp.MustCompile(`(?m)^: `)
m_M_Matcher_0 = regexp.MustCompile(`(?m)^\s*;`)
m_Mathematica_Matcher_0 = regexp.MustCompile(`(?m)\*\)$`)
m_Matlab_Matcher_0 = regexp.MustCompile(`(?m)^\s*%`)
m_Limbo_Matcher_0 = regexp.MustCompile(`(?m)^\w+\s*:\s*module\s*{`)
md_Markdown_Matcher_0 = regexp.MustCompile(`(?mi)(^[-a-z0-9=#!\*\[|>])|<\/`)
md_Markdown_Matcher_1 = regexp.MustCompile(`^$`)
md_GCCMachineDescription_Matcher_0 = regexp.MustCompile(`(?m)^(;;|\(define_)`)
ml_OCaml_Matcher_0 = regexp.MustCompile(`(?m)(^\s*module)|let rec |match\s+(\S+\s)+with`)
ml_StandardML_Matcher_0 = regexp.MustCompile(`(?m)=> |case\s+(\S+\s)+of`)
mod_XML_Matcher_0 = regexp.MustCompile(`(?m)<!ENTITY `)
mod_ModulaDash2_Matcher_0 = regexp.MustCompile(`(?mi)^\s*MODULE [\w\.]+;`)
mod_ModulaDash2_Matcher_1 = regexp.MustCompile(`(?mi)^\s*END [\w\.]+;`)
ms_Roff_Matcher_0 = regexp.MustCompile(`(?mi)^[.'][a-z][a-z](\s|$)`)
n_Roff_Matcher_0 = regexp.MustCompile(`(?m)^[.']`)
n_Nemerle_Matcher_0 = regexp.MustCompile(`(?m)^(module|namespace|using)\s`)
ncl_Text_Matcher_0 = regexp.MustCompile(`(?m)THE_TITLE`)
nl_NL_Matcher_0 = regexp.MustCompile(`(?m)^(b|g)[0-9]+ `)
php_Hack_Matcher_0 = regexp.MustCompile(`(?m)<\?hh`)
php_PHP_Matcher_0 = regexp.MustCompile(`(?m)<?[^h]`)
pl_Prolog_Matcher_0 = regexp.MustCompile(`(?m)^[^#]*:-`)
pl_Perl_Matcher_0 = regexp.MustCompile(`(?m)use strict|use\s+v?5\.`)
pl_Perl6_Matcher_0 = regexp.MustCompile(`(?m)^(use v6|(my )?class|module)`)
pm_Perl6_Matcher_0 = regexp.MustCompile(`(?m)^\s*(?:use\s+v6\s*;|(?:\bmy\s+)?class|module)\b`)
pm_Perl_Matcher_0 = regexp.MustCompile(`(?m)\buse\s+(?:strict\b|v?5\.)`)
pod_Pod_Matcher_0 = regexp.MustCompile(`(?m)^=\w+\b`)
pro_Prolog_Matcher_0 = regexp.MustCompile(`(?m)^[^#]+:-`)
pro_INI_Matcher_0 = regexp.MustCompile(`(?m)last_client=`)
pro_QMake_Matcher_0 = regexp.MustCompile(`(?m)HEADERS`)
pro_QMake_Matcher_1 = regexp.MustCompile(`(?m)SOURCES`)
pro_IDL_Matcher_0 = regexp.MustCompile(`(?m)^\s*function[ \w,]+$`)
props_XML_Matcher_0 = regexp.MustCompile(`(?mi)^(\s*)(<Project|<Import|<Property|<?xml|xmlns)`)
props_INI_Matcher_0 = regexp.MustCompile(`(?mi)\w+\s*=\s*`)
r_Rebol_Matcher_0 = regexp.MustCompile(`(?mi)\bRebol\b`)
r_R_Matcher_0 = regexp.MustCompile(`(?m)<-|^\s*#`)
rno_RUNOFF_Matcher_0 = regexp.MustCompile(`(?mi)^\.!|^\.end lit(?:eral)?\b`)
rno_Roff_Matcher_0 = regexp.MustCompile(`(?m)^\.\\" `)
rpy_Python_Matcher_0 = regexp.MustCompile(`(?ms)(^(import|from|class|def)\s)`)
rs_Rust_Matcher_0 = regexp.MustCompile(`(?m)^(use |fn |mod |pub |macro_rules|impl|#!?\[)`)
rs_RenderScript_Matcher_0 = regexp.MustCompile(`(?m)#include|#pragma\s+(rs|version)|__attribute__`)
sc_SuperCollider_Matcher_0 = regexp.MustCompile(`(?m)\^(this|super)\.`)
sc_SuperCollider_Matcher_1 = regexp.MustCompile(`(?m)^\s*(\+|\*)\s*\w+\s*{`)
sc_SuperCollider_Matcher_2 = regexp.MustCompile(`(?m)^\s*~\w+\s*=\.`)
sc_Scala_Matcher_0 = regexp.MustCompile(`(?m)^\s*import (scala|java)\.`)
sc_Scala_Matcher_1 = regexp.MustCompile(`(?m)^\s*val\s+\w+\s*=`)
sc_Scala_Matcher_2 = regexp.MustCompile(`(?m)^\s*class\b`)
sql_PLpgSQL_Matcher_0 = regexp.MustCompile(`(?mi)^\\i\b|AS \$\$|LANGUAGE '?plpgsql'?`)
sql_PLpgSQL_Matcher_1 = regexp.MustCompile(`(?mi)SECURITY (DEFINER|INVOKER)`)
sql_PLpgSQL_Matcher_2 = regexp.MustCompile(`(?mi)BEGIN( WORK| TRANSACTION)?;`)
sql_SQLPL_Matcher_0 = regexp.MustCompile(`(?mi)(alter module)|(language sql)|(begin( NOT)+ atomic)`)
sql_SQLPL_Matcher_1 = regexp.MustCompile(`(?mi)signal SQLSTATE '[0-9]+'`)
sql_PLSQL_Matcher_0 = regexp.MustCompile(`(?mi)\$\$PLSQL_|XMLTYPE|sysdate|systimestamp|\.nextval|connect by|AUTHID (DEFINER|CURRENT_USER)`)
sql_PLSQL_Matcher_1 = regexp.MustCompile(`(?mi)constructor\W+function`)
sql_SQL_Matcher_0 = regexp.MustCompile(`(?mi)! /begin|boolean|package|exception`)
srt_SubRipText_Matcher_0 = regexp.MustCompile(`(?m)^(\d{2}:\d{2}:\d{2},\d{3})\s*(-->)\s*(\d{2}:\d{2}:\d{2},\d{3})$`)
t_Turing_Matcher_0 = regexp.MustCompile(`(?m)^\s*%[ \t]+|^\s*var\s+\w+\s*:=\s*\w+`)
t_Perl6_Matcher_0 = regexp.MustCompile(`(?m)^\s*(?:use\s+v6\s*;|\bmodule\b|\b(?:my\s+)?class\b)`)
t_Perl_Matcher_0 = regexp.MustCompile(`(?m)\buse\s+(?:strict\b|v?5\.)`)
toc_WorldofWarcraftAddonData_Matcher_0 = regexp.MustCompile(`(?m)^## |@no-lib-strip@`)
toc_TeX_Matcher_0 = regexp.MustCompile(`(?m)^\\(contentsline|defcounter|beamer|boolfalse)`)
ts_XML_Matcher_0 = regexp.MustCompile(`(?m)<TS`)
tst_GAP_Matcher_0 = regexp.MustCompile(`(?m)gap> `)
tsx_TypeScript_Matcher_0 = regexp.MustCompile(`(?m)^\s*(import.+(from\s+|require\()['"]react|\/\/\/\s*<reference\s)`)
tsx_XML_Matcher_0 = regexp.MustCompile(`(?mi)^\s*<\?xml\s+version`)
) )

View File

@ -2,7 +2,7 @@ package enry
// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/enry.v1/internal/code-generator // CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/enry.v1/internal/code-generator
// THIS FILE SHOULD NOT BE EDITED BY HAND // THIS FILE SHOULD NOT BE EDITED BY HAND
// Extracted from github/linguist commit: 0123456789abcdef0123456789abcdef01234567 // Extracted from github/linguist commit: b6460f8ed6b249281ada099ca28bd8f1230b8892
import "gopkg.in/toqueteos/substring.v1" import "gopkg.in/toqueteos/substring.v1"

View File

@ -1,22 +0,0 @@
## Documentation directories ##
- ^[Dd]ocs?/
- (^|/)[Dd]ocumentation/
- (^|/)[Jj]avadoc/
- ^[Mm]an/
- ^[Ee]xamples/
- ^[Dd]emos?/
## Documentation files ##
- (^|/)CHANGE(S|LOG)?(\.|$)
- (^|/)CONTRIBUTING(\.|$)
- (^|/)COPYING(\.|$)
- (^|/)INSTALL(\.|$)
- (^|/)LICEN[CS]E(\.|$)
- (^|/)[Ll]icen[cs]e(\.|$)
- (^|/)README(\.|$)
- (^|/)[Rr]eadme(\.|$)
# Samples folders
- ^[Ss]amples?/

File diff suppressed because it is too large Load Diff

View File

@ -1,18 +0,0 @@
package enry
// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/enry.v1/internal/code-generator
// THIS FILE SHOULD NOT BE EDITED BY HAND
// Extracted from github/linguist commit: 0123456789abcdef0123456789abcdef01234567
var languagesByExtension = map[string][]string{
".abap": {"ABAP"},
".abnf": {"ABNF"},
".bsl": {"1C Enterprise"},
".os": {"1C Enterprise"},
}
var extensionsByLanguage = map[string][]string{
"1C Enterprise": {".bsl", ".os"},
"ABAP": {".abap"},
"ABNF": {".abnf"},
}

View File

@ -1,11 +0,0 @@
---
1C Enterprise:
extensions:
- ".bsl"
- ".os"
ABAP:
extensions:
- ".abap"
ABNF:
extensions:
- ".abnf"

View File

@ -0,0 +1,182 @@
package enry
// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/enry.v1/internal/code-generator
// THIS FILE SHOULD NOT BE EDITED BY HAND
// Extracted from github/linguist commit: b6460f8ed6b249281ada099ca28bd8f1230b8892
var languagesByFilename = map[string][]string{
".Rprofile": {"R"},
".XCompose": {"XCompose"},
".abbrev_defs": {"Emacs Lisp"},
".arcconfig": {"JSON"},
".babelrc": {"JSON5"},
".bash_history": {"Shell"},
".bash_logout": {"Shell"},
".bash_profile": {"Shell"},
".bashrc": {"Shell"},
".clang-format": {"YAML"},
".classpath": {"XML"},
".cproject": {"XML"},
".cshrc": {"Shell"},
".editorconfig": {"INI"},
".emacs": {"Emacs Lisp"},
".emacs.desktop": {"Emacs Lisp"},
".factor-boot-rc": {"Factor"},
".factor-rc": {"Factor"},
".gclient": {"Python"},
".gemrc": {"YAML"},
".gitconfig": {"INI"},
".gn": {"GN"},
".gnus": {"Emacs Lisp"},
".gvimrc": {"Vim script"},
".htaccess": {"ApacheConf"},
".irbrc": {"Ruby"},
".jshintrc": {"JSON"},
".login": {"Shell"},
".nvimrc": {"Vim script"},
".php": {"PHP"},
".php_cs": {"PHP"},
".php_cs.dist": {"PHP"},
".profile": {"Shell"},
".project": {"XML"},
".pryrc": {"Ruby"},
".spacemacs": {"Emacs Lisp"},
".vimrc": {"Vim script"},
".viper": {"Emacs Lisp"},
".zlogin": {"Shell"},
".zlogout": {"Shell"},
".zprofile": {"Shell"},
".zshenv": {"Shell"},
".zshrc": {"Shell"},
"9fs": {"Shell"},
"APKBUILD": {"Alpine Abuild"},
"App.config": {"XML"},
"Appraisals": {"Ruby"},
"BSDmakefile": {"Makefile"},
"BUCK": {"Python"},
"BUILD": {"Python"},
"Berksfile": {"Ruby"},
"Brewfile": {"Ruby"},
"Buildfile": {"Ruby"},
"CMakeLists.txt": {"CMake"},
"COPYING": {"Text"},
"COPYING.regex": {"Text"},
"COPYRIGHT.regex": {"Text"},
"Cakefile": {"CoffeeScript"},
"Capfile": {"Ruby"},
"Cask": {"Emacs Lisp"},
"Dangerfile": {"Ruby"},
"Deliverfile": {"Ruby"},
"Dockerfile": {"Dockerfile"},
"Emakefile": {"Erlang"},
"FONTLOG": {"Text"},
"Fakefile": {"Fancy"},
"Fastfile": {"Ruby"},
"GNUmakefile": {"Makefile"},
"Gemfile": {"Ruby"},
"Gemfile.lock": {"Ruby"},
"Guardfile": {"Ruby"},
"INSTALL": {"Text"},
"INSTALL.mysql": {"Text"},
"Jakefile": {"JavaScript"},
"Jarfile": {"Ruby"},
"Jenkinsfile": {"Groovy"},
"Kbuild": {"Makefile"},
"LICENSE": {"Text"},
"LICENSE.mysql": {"Text"},
"Makefile": {"Makefile"},
"Makefile.am": {"Makefile"},
"Makefile.boot": {"Makefile"},
"Makefile.frag": {"Makefile"},
"Makefile.in": {"Makefile"},
"Makefile.inc": {"Makefile"},
"Makefile.wat": {"Makefile"},
"Mavenfile": {"Ruby"},
"Modulefile": {"Puppet"},
"NEWS": {"Text"},
"Notebook": {"Jupyter Notebook"},
"NuGet.config": {"XML"},
"Nukefile": {"Nu"},
"PKGBUILD": {"Shell"},
"Phakefile": {"PHP"},
"Podfile": {"Ruby"},
"Project.ede": {"Emacs Lisp"},
"Puppetfile": {"Ruby"},
"README.1ST": {"Text"},
"README.me": {"Text"},
"README.mysql": {"Text"},
"ROOT": {"Isabelle ROOT"},
"Rakefile": {"Ruby"},
"Rexfile": {"Perl6"},
"SConscript": {"Python"},
"SConstruct": {"Python"},
"Settings.StyleCop": {"XML"},
"Slakefile": {"LiveScript"},
"Snakefile": {"Python"},
"Snapfile": {"Ruby"},
"Thorfile": {"Ruby"},
"Vagrantfile": {"Ruby"},
"WORKSPACE": {"Python"},
"Web.Debug.config": {"XML"},
"Web.Release.config": {"XML"},
"Web.config": {"XML"},
"XCompose": {"XCompose"},
"_emacs": {"Emacs Lisp"},
"_vimrc": {"Vim script"},
"abbrev_defs": {"Emacs Lisp"},
"ack": {"Perl"},
"ant.xml": {"Ant Build System"},
"apache2.conf": {"ApacheConf"},
"bash_logout": {"Shell"},
"bash_profile": {"Shell"},
"bashrc": {"Shell"},
"build.xml": {"Ant Build System"},
"buildfile": {"Ruby"},
"click.me": {"Text"},
"composer.lock": {"JSON"},
"configure.ac": {"M4Sugar"},
"cshrc": {"Shell"},
"delete.me": {"Text"},
"descrip.mmk": {"Module Management System"},
"descrip.mms": {"Module Management System"},
"expr-dist": {"R"},
"gradlew": {"Shell"},
"gvimrc": {"Vim script"},
"httpd.conf": {"ApacheConf"},
"keep.me": {"Text"},
"ld.script": {"Linker Script"},
"login": {"Shell"},
"makefile": {"Makefile"},
"makefile.sco": {"Makefile"},
"man": {"Shell"},
"mcmod.info": {"JSON"},
"meson.build": {"Meson"},
"meson_options.txt": {"Meson"},
"mix.lock": {"Elixir"},
"mkfile": {"Makefile"},
"mmn": {"Roff"},
"mmt": {"Roff"},
"nginx.conf": {"Nginx"},
"nvimrc": {"Vim script"},
"owh": {"Tcl"},
"packages.config": {"XML"},
"pom.xml": {"Maven POM"},
"profile": {"Shell"},
"read.me": {"Text"},
"readme.1st": {"Text"},
"rebar.config": {"Erlang"},
"rebar.config.lock": {"Erlang"},
"rebar.lock": {"Erlang"},
"riemann.config": {"Clojure"},
"script": {"C"},
"starfield": {"Tcl"},
"test.me": {"Text"},
"vimrc": {"Vim script"},
"wscript": {"Python"},
"xcompose": {"XCompose"},
"zlogin": {"Shell"},
"zlogout": {"Shell"},
"zprofile": {"Shell"},
"zshenv": {"Shell"},
"zshrc": {"Shell"},
}

View File

@ -1,12 +0,0 @@
package enry
// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/enry.v1/internal/code-generator
// THIS FILE SHOULD NOT BE EDITED BY HAND
// Extracted from github/linguist commit: 0123456789abcdef0123456789abcdef01234567
var languagesByFilename = map[string][]string{
"APKBUILD": {"Alpine Abuild"},
"CMakeLists.txt": {"CMake"},
"Cakefile": {"CoffeeScript"},
"mix.lock": {"Elixir"},
}

View File

@ -1,13 +0,0 @@
---
Alpine Abuild:
filenames:
- APKBUILD
CMake:
filenames:
- CMakeLists.txt
CoffeeScript:
filenames:
- Cakefile
Elixir:
filenames:
- mix.lock

File diff suppressed because it is too large Load Diff

View File

@ -1,91 +0,0 @@
# Common heuristics
ObjectiveCRegex = /^\s*(@(interface|class|protocol|property|end|synchronised|selector|implementation)\b|#import\s+.+\.h[">])/
disambiguate ".asc" do |data|
if /^(----[- ]BEGIN|ssh-(rsa|dss)) /.match(data)
Language["Public Key"]
elsif /^[=-]+(\s|\n)|{{[A-Za-z]/.match(data)
Language["AsciiDoc"]
elsif /^(\/\/.+|((import|export)\s+)?(function|int|float|char)\s+((room|repeatedly|on|game)_)?([A-Za-z]+[A-Za-z_0-9]+)\s*[;\(])/.match(data)
Language["AGS Script"]
end
end
fortran_rx = /^([c*][^abd-z]| (subroutine|program|end|data)\s|\s*!)/i
disambiguate ".f" do |data|
if /^: /.match(data)
Language["Forth"]
elsif data.include?("flowop")
Language["Filebench WML"]
elsif fortran_rx.match(data)
Language["FORTRAN"]
end
end
disambiguate ".h" do |data|
if ObjectiveCRegex.match(data)
Language["Objective-C"]
elsif (/^\s*#\s*include <(cstdint|string|vector|map|list|array|bitset|queue|stack|forward_list|unordered_map|unordered_set|(i|o|io)stream)>/.match(data) ||
/^\s*template\s*</.match(data) || /^[ \t]*try/.match(data) || /^[ \t]*catch\s*\(/.match(data) || /^[ \t]*(class|(using[ \t]+)?namespace)\s+\w+/.match(data) || /^[ \t]*(private|public|protected):$/.match(data) || /std::\w+/.match(data))
Language["C++"]
end
end
disambiguate ".lsp", ".lisp" do |data|
if /^\s*\((defun|in-package|defpackage) /i.match(data)
Language["Common Lisp"]
elsif /^\s*\(define /.match(data)
Language["NewLisp"]
end
end
disambiguate ".md" do |data|
if /(^[-a-z0-9=#!\*\[|>])|<\//i.match(data) || data.empty?
Language["Markdown"]
elsif /^(;;|\(define_)/.match(data)
Language["GCC machine description"]
else
Language["Markdown"]
end
end
disambiguate ".ms" do |data|
if /^[.'][a-z][a-z](\s|$)/i.match(data)
Language["Groff"]
elsif /(?<!\S)\.(include|globa?l)\s/.match(data) || /(?<!\/\*)(\A|\n)\s*\.[A-Za-z]/.match(data.gsub(/"([^\\"]|\\.)*"|'([^\\']|\\.)*'|\\\s*(?:--.*)?\n/, ""))
Language["GAS"]
else
Language["MAXScript"]
end
end
disambiguate ".mod" do |data|
if data.include?('<!ENTITY ')
Language["XML"]
elsif /^\s*MODULE [\w\.]+;/i.match(data) || /^\s*END [\w\.]+;/i.match(data)
Language["Modula-2"]
else
[Language["Linux Kernel Module"], Language["AMPL"]]
end
end
disambiguate ".pro" do |data|
if /^[^#]+:-/.match(data)
Language["Prolog"]
elsif data.include?("last_client=")
Language["INI"]
elsif data.include?("HEADERS") && data.include?("SOURCES")
Language["QMake"]
elsif /^\s*function[ \w,]+$/.match(data)
Language["IDL"]
end
end
disambiguate ".rpy" do |data|
if /(^(import|from|class|def)\s)/m.match(data)
Language["Python"]
else
Language["Ren'Py"]
end
end

View File

@ -0,0 +1,99 @@
package enry
// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/enry.v1/internal/code-generator
// THIS FILE SHOULD NOT BE EDITED BY HAND
// Extracted from github/linguist commit: b6460f8ed6b249281ada099ca28bd8f1230b8892
var languagesByInterpreter = map[string][]string{
"Rscript": {"R"},
"apl": {"APL"},
"aplx": {"APL"},
"awk": {"Awk"},
"bash": {"Shell"},
"bigloo": {"Scheme"},
"boolector": {"SMT"},
"ccl": {"Common Lisp"},
"chicken": {"Scheme"},
"clisp": {"Common Lisp"},
"coffee": {"CoffeeScript"},
"crystal": {"Crystal"},
"csi": {"Scheme"},
"cvc4": {"SMT"},
"dart": {"Dart"},
"dtrace": {"DTrace"},
"dyalog": {"APL"},
"ecl": {"Common Lisp"},
"elixir": {"Elixir"},
"escript": {"Erlang"},
"fish": {"fish"},
"gawk": {"Awk"},
"gn": {"GN"},
"gnuplot": {"Gnuplot"},
"gosh": {"Scheme"},
"groovy": {"Groovy"},
"guile": {"Scheme"},
"instantfpc": {"Pascal"},
"io": {"Io"},
"ioke": {"Ioke"},
"jconsole": {"J"},
"jolie": {"Jolie"},
"jruby": {"Ruby"},
"julia": {"Julia"},
"lisp": {"Common Lisp"},
"lsl": {"LSL"},
"lua": {"Lua", "Terra"},
"macruby": {"Ruby"},
"make": {"Makefile"},
"mathsat5": {"SMT"},
"mawk": {"Awk"},
"mmi": {"Mercury"},
"moon": {"MoonScript"},
"nawk": {"Awk"},
"newlisp": {"NewLisp"},
"node": {"JavaScript"},
"nush": {"Nu"},
"ocaml": {"OCaml", "Reason"},
"ocamlrun": {"OCaml"},
"ocamlscript": {"OCaml"},
"openrc-run": {"OpenRC runscript"},
"opensmt": {"SMT"},
"osascript": {"AppleScript"},
"parrot": {"Parrot Assembly", "Parrot Internal Representation"},
"perl": {"Perl"},
"perl6": {"Perl6"},
"php": {"PHP"},
"picolisp": {"PicoLisp"},
"pike": {"Pike"},
"pil": {"PicoLisp"},
"python": {"Python"},
"python2": {"Python"},
"python3": {"Python"},
"qmake": {"QMake"},
"r6rs": {"Scheme"},
"racket": {"Racket"},
"rake": {"Ruby"},
"rbx": {"Ruby"},
"rc": {"Shell"},
"regina": {"REXX"},
"rexx": {"REXX"},
"ruby": {"Ruby"},
"rune": {"E"},
"runhaskell": {"Haskell"},
"sbcl": {"Common Lisp"},
"scala": {"Scala"},
"sclang": {"SuperCollider"},
"scsynth": {"SuperCollider"},
"sh": {"Shell"},
"smt-rat": {"SMT"},
"smtinterpol": {"SMT"},
"stp": {"SMT"},
"swipl": {"Prolog"},
"tcc": {"C"},
"tclsh": {"Tcl"},
"verit": {"SMT"},
"wish": {"Tcl"},
"yap": {"Prolog"},
"yices2": {"SMT"},
"z3": {"SMT"},
"zsh": {"Shell"},
}

View File

@ -1,16 +0,0 @@
package enry
// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/enry.v1/internal/code-generator
// THIS FILE SHOULD NOT BE EDITED BY HAND
// Extracted from github/linguist commit: 0123456789abcdef0123456789abcdef01234567
var languagesByInterpreter = map[string][]string{
"bash": {"Shell"},
"nush": {"Nu"},
"python": {"Python"},
"python2": {"Python"},
"python3": {"Python"},
"rc": {"Shell"},
"sh": {"Shell"},
"zsh": {"Shell"},
}

View File

@ -1,15 +0,0 @@
---
Nu:
interpreters:
- nush
Shell:
interpreters:
- bash
- rc
- sh
- zsh
Python:
interpreters:
- python
- python2
- python3

View File

@ -0,0 +1,459 @@
package enry
// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/enry.v1/internal/code-generator
// THIS FILE SHOULD NOT BE EDITED BY HAND
// Extracted from github/linguist commit: b6460f8ed6b249281ada099ca28bd8f1230b8892
var languagesType = map[string]Type{
"1C Enterprise": Programming,
"ABAP": Programming,
"ABNF": Data,
"AGS Script": Programming,
"AMPL": Programming,
"ANTLR": Programming,
"API Blueprint": Markup,
"APL": Programming,
"ASN.1": Data,
"ASP": Programming,
"ATS": Programming,
"ActionScript": Programming,
"Ada": Programming,
"Agda": Programming,
"Alloy": Programming,
"Alpine Abuild": Programming,
"Ant Build System": Data,
"ApacheConf": Markup,
"Apex": Programming,
"Apollo Guidance Computer": Programming,
"AppleScript": Programming,
"Arc": Programming,
"Arduino": Programming,
"AsciiDoc": Prose,
"AspectJ": Programming,
"Assembly": Programming,
"Augeas": Programming,
"AutoHotkey": Programming,
"AutoIt": Programming,
"Awk": Programming,
"Batchfile": Programming,
"Befunge": Programming,
"Bison": Programming,
"BitBake": Programming,
"Blade": Markup,
"BlitzBasic": Programming,
"BlitzMax": Programming,
"Bluespec": Programming,
"Boo": Programming,
"Brainfuck": Programming,
"Brightscript": Programming,
"Bro": Programming,
"C": Programming,
"C#": Programming,
"C++": Programming,
"C-ObjDump": Data,
"C2hs Haskell": Programming,
"CLIPS": Programming,
"CMake": Programming,
"COBOL": Programming,
"COLLADA": Data,
"CSON": Data,
"CSS": Markup,
"CSV": Data,
"CWeb": Programming,
"Cap'n Proto": Programming,
"CartoCSS": Programming,
"Ceylon": Programming,
"Chapel": Programming,
"Charity": Programming,
"ChucK": Programming,
"Cirru": Programming,
"Clarion": Programming,
"Clean": Programming,
"Click": Programming,
"Clojure": Programming,
"Closure Templates": Markup,
"CoffeeScript": Programming,
"ColdFusion": Programming,
"ColdFusion CFC": Programming,
"Common Lisp": Programming,
"Component Pascal": Programming,
"Cool": Programming,
"Coq": Programming,
"Cpp-ObjDump": Data,
"Creole": Prose,
"Crystal": Programming,
"Csound": Programming,
"Csound Document": Programming,
"Csound Score": Programming,
"Cuda": Programming,
"Cycript": Programming,
"Cython": Programming,
"D": Programming,
"D-ObjDump": Data,
"DIGITAL Command Language": Programming,
"DM": Programming,
"DNS Zone": Data,
"DTrace": Programming,
"Darcs Patch": Data,
"Dart": Programming,
"Diff": Data,
"Dockerfile": Data,
"Dogescript": Programming,
"Dylan": Programming,
"E": Programming,
"EBNF": Data,
"ECL": Programming,
"ECLiPSe": Programming,
"EJS": Markup,
"EQ": Programming,
"Eagle": Markup,
"Ecere Projects": Data,
"Eiffel": Programming,
"Elixir": Programming,
"Elm": Programming,
"Emacs Lisp": Programming,
"EmberScript": Programming,
"Erlang": Programming,
"F#": Programming,
"FLUX": Programming,
"Factor": Programming,
"Fancy": Programming,
"Fantom": Programming,
"Filebench WML": Programming,
"Filterscript": Programming,
"Formatted": Data,
"Forth": Programming,
"Fortran": Programming,
"FreeMarker": Programming,
"Frege": Programming,
"G-code": Data,
"GAMS": Programming,
"GAP": Programming,
"GCC Machine Description": Programming,
"GDB": Programming,
"GDScript": Programming,
"GLSL": Programming,
"GN": Data,
"Game Maker Language": Programming,
"Genie": Programming,
"Genshi": Programming,
"Gentoo Ebuild": Programming,
"Gentoo Eclass": Programming,
"Gettext Catalog": Prose,
"Gherkin": Programming,
"Glyph": Programming,
"Gnuplot": Programming,
"Go": Programming,
"Golo": Programming,
"Gosu": Programming,
"Grace": Programming,
"Gradle": Data,
"Grammatical Framework": Programming,
"Graph Modeling Language": Data,
"GraphQL": Data,
"Graphviz (DOT)": Data,
"Groovy": Programming,
"Groovy Server Pages": Programming,
"HCL": Programming,
"HLSL": Programming,
"HTML": Markup,
"HTML+Django": Markup,
"HTML+ECR": Markup,
"HTML+EEX": Markup,
"HTML+ERB": Markup,
"HTML+PHP": Markup,
"HTTP": Data,
"Hack": Programming,
"Haml": Markup,
"Handlebars": Markup,
"Harbour": Programming,
"Haskell": Programming,
"Haxe": Programming,
"Hy": Programming,
"HyPhy": Programming,
"IDL": Programming,
"IGOR Pro": Programming,
"INI": Data,
"IRC log": Data,
"Idris": Programming,
"Inform 7": Programming,
"Inno Setup": Programming,
"Io": Programming,
"Ioke": Programming,
"Isabelle": Programming,
"Isabelle ROOT": Programming,
"J": Programming,
"JFlex": Programming,
"JSON": Data,
"JSON5": Data,
"JSONLD": Data,
"JSONiq": Programming,
"JSX": Programming,
"Jasmin": Programming,
"Java": Programming,
"Java Server Pages": Programming,
"JavaScript": Programming,
"Jison": Programming,
"Jison Lex": Programming,
"Jolie": Programming,
"Julia": Programming,
"Jupyter Notebook": Markup,
"KRL": Programming,
"KiCad": Programming,
"Kit": Markup,
"Kotlin": Programming,
"LFE": Programming,
"LLVM": Programming,
"LOLCODE": Programming,
"LSL": Programming,
"LabVIEW": Programming,
"Lasso": Programming,
"Latte": Markup,
"Lean": Programming,
"Less": Markup,
"Lex": Programming,
"LilyPond": Programming,
"Limbo": Programming,
"Linker Script": Data,
"Linux Kernel Module": Data,
"Liquid": Markup,
"Literate Agda": Programming,
"Literate CoffeeScript": Programming,
"Literate Haskell": Programming,
"LiveScript": Programming,
"Logos": Programming,
"Logtalk": Programming,
"LookML": Programming,
"LoomScript": Programming,
"Lua": Programming,
"M": Programming,
"M4": Programming,
"M4Sugar": Programming,
"MAXScript": Programming,
"MQL4": Programming,
"MQL5": Programming,
"MTML": Markup,
"MUF": Programming,
"Makefile": Programming,
"Mako": Programming,
"Markdown": Prose,
"Marko": Markup,
"Mask": Markup,
"Mathematica": Programming,
"Matlab": Programming,
"Maven POM": Data,
"Max": Programming,
"MediaWiki": Prose,
"Mercury": Programming,
"Meson": Programming,
"Metal": Programming,
"MiniD": Programming,
"Mirah": Programming,
"Modelica": Programming,
"Modula-2": Programming,
"Module Management System": Programming,
"Monkey": Programming,
"Moocode": Programming,
"MoonScript": Programming,
"Myghty": Programming,
"NCL": Programming,
"NL": Data,
"NSIS": Programming,
"Nemerle": Programming,
"NetLinx": Programming,
"NetLinx+ERB": Programming,
"NetLogo": Programming,
"NewLisp": Programming,
"Nginx": Markup,
"Nim": Programming,
"Ninja": Data,
"Nit": Programming,
"Nix": Programming,
"Nu": Programming,
"NumPy": Programming,
"OCaml": Programming,
"ObjDump": Data,
"Objective-C": Programming,
"Objective-C++": Programming,
"Objective-J": Programming,
"Omgrofl": Programming,
"Opa": Programming,
"Opal": Programming,
"OpenCL": Programming,
"OpenEdge ABL": Programming,
"OpenRC runscript": Programming,
"OpenSCAD": Programming,
"OpenType Feature File": Data,
"Org": Prose,
"Ox": Programming,
"Oxygene": Programming,
"Oz": Programming,
"P4": Programming,
"PAWN": Programming,
"PHP": Programming,
"PLSQL": Programming,
"PLpgSQL": Programming,
"POV-Ray SDL": Programming,
"Pan": Programming,
"Papyrus": Programming,
"Parrot": Programming,
"Parrot Assembly": Programming,
"Parrot Internal Representation": Programming,
"Pascal": Programming,
"Pep8": Programming,
"Perl": Programming,
"Perl6": Programming,
"Pic": Markup,
"Pickle": Data,
"PicoLisp": Programming,
"PigLatin": Programming,
"Pike": Programming,
"Pod": Prose,
"PogoScript": Programming,
"Pony": Programming,
"PostScript": Markup,
"PowerBuilder": Programming,
"PowerShell": Programming,
"Processing": Programming,
"Prolog": Programming,
"Propeller Spin": Programming,
"Protocol Buffer": Markup,
"Public Key": Data,
"Pug": Markup,
"Puppet": Programming,
"Pure Data": Programming,
"PureBasic": Programming,
"PureScript": Programming,
"Python": Programming,
"Python console": Programming,
"Python traceback": Data,
"QML": Programming,
"QMake": Programming,
"R": Programming,
"RAML": Markup,
"RDoc": Prose,
"REALbasic": Programming,
"REXX": Programming,
"RHTML": Markup,
"RMarkdown": Prose,
"RPM Spec": Data,
"RUNOFF": Markup,
"Racket": Programming,
"Ragel": Programming,
"Rascal": Programming,
"Raw token data": Data,
"Reason": Programming,
"Rebol": Programming,
"Red": Programming,
"Redcode": Programming,
"Regular Expression": Data,
"Ren'Py": Programming,
"RenderScript": Programming,
"RobotFramework": Programming,
"Roff": Markup,
"Rouge": Programming,
"Ruby": Programming,
"Rust": Programming,
"SAS": Programming,
"SCSS": Markup,
"SMT": Programming,
"SPARQL": Data,
"SQF": Programming,
"SQL": Data,
"SQLPL": Programming,
"SRecode Template": Markup,
"STON": Data,
"SVG": Data,
"Sage": Programming,
"SaltStack": Programming,
"Sass": Markup,
"Scala": Programming,
"Scaml": Markup,
"Scheme": Programming,
"Scilab": Programming,
"Self": Programming,
"ShaderLab": Programming,
"Shell": Programming,
"ShellSession": Programming,
"Shen": Programming,
"Slash": Programming,
"Slim": Markup,
"Smali": Programming,
"Smalltalk": Programming,
"Smarty": Programming,
"SourcePawn": Programming,
"Spline Font Database": Data,
"Squirrel": Programming,
"Stan": Programming,
"Standard ML": Programming,
"Stata": Programming,
"Stylus": Markup,
"SubRip Text": Data,
"Sublime Text Config": Data,
"SuperCollider": Programming,
"Swift": Programming,
"SystemVerilog": Programming,
"TI Program": Programming,
"TLA": Programming,
"TOML": Data,
"TXL": Programming,
"Tcl": Programming,
"Tcsh": Programming,
"TeX": Markup,
"Tea": Markup,
"Terra": Programming,
"Text": Prose,
"Textile": Prose,
"Thrift": Programming,
"Turing": Programming,
"Turtle": Data,
"Twig": Markup,
"Type Language": Data,
"TypeScript": Programming,
"Unified Parallel C": Programming,
"Unity3D Asset": Data,
"Unix Assembly": Programming,
"Uno": Programming,
"UnrealScript": Programming,
"UrWeb": Programming,
"VCL": Programming,
"VHDL": Programming,
"Vala": Programming,
"Verilog": Programming,
"Vim script": Programming,
"Visual Basic": Programming,
"Volt": Programming,
"Vue": Markup,
"Wavefront Material": Data,
"Wavefront Object": Data,
"Web Ontology Language": Markup,
"WebAssembly": Programming,
"WebIDL": Programming,
"World of Warcraft Addon Data": Data,
"X10": Programming,
"XC": Programming,
"XCompose": Data,
"XML": Data,
"XPages": Programming,
"XProc": Programming,
"XQuery": Programming,
"XS": Programming,
"XSLT": Programming,
"Xojo": Programming,
"Xtend": Programming,
"YAML": Data,
"YANG": Data,
"Yacc": Programming,
"Zephir": Programming,
"Zimpl": Programming,
"desktop": Data,
"eC": Programming,
"edn": Data,
"fish": Programming,
"mupad": Programming,
"nesC": Programming,
"ooc": Programming,
"reStructuredText": Prose,
"wisp": Programming,
"xBase": Programming,
}

View File

@ -1,12 +0,0 @@
package enry
// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/enry.v1/internal/code-generator
// THIS FILE SHOULD NOT BE EDITED BY HAND
// Extracted from github/linguist commit: 0123456789abcdef0123456789abcdef01234567
var languagesType = map[string]Type{
"Scaml": Markup,
"Scheme": Programming,
"Scilab": Programming,
"Self": Programming,
}

View File

@ -1,11 +0,0 @@
---
Scaml:
type: markup
Scheme:
type: programming
Scilab:
type: programming
Self:
type: programming

View File

@ -2,7 +2,7 @@ package enry
// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/enry.v1/internal/code-generator // CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/enry.v1/internal/code-generator
// THIS FILE SHOULD NOT BE EDITED BY HAND // THIS FILE SHOULD NOT BE EDITED BY HAND
// Extracted from github/linguist commit: 0123456789abcdef0123456789abcdef01234567 // Extracted from github/linguist commit: b6460f8ed6b249281ada099ca28bd8f1230b8892
import "gopkg.in/toqueteos/substring.v1" import "gopkg.in/toqueteos/substring.v1"
@ -21,4 +21,136 @@ var vendorMatchers = substring.Or(
substring.Regexp(`(^|/)ltsugar.m4`), substring.Regexp(`(^|/)ltsugar.m4`),
substring.Regexp(`(^|/)ltversion.m4`), substring.Regexp(`(^|/)ltversion.m4`),
substring.Regexp(`(^|/)lt~obsolete.m4`), substring.Regexp(`(^|/)lt~obsolete.m4`),
substring.Regexp(`cpplint.py`),
substring.Regexp(`node_modules/`),
substring.Regexp(`bower_components/`),
substring.Regexp(`^rebar$`),
substring.Regexp(`erlang.mk`),
substring.Regexp(`Godeps/_workspace/`),
substring.Regexp(`.indent.pro`),
substring.Regexp(`(\.|-)min\.(js|css)$`),
substring.Regexp(`([^\s]*)import\.(css|less|scss|styl)$`),
substring.Regexp(`(^|/)bootstrap([^.]*)\.(js|css|less|scss|styl)$`),
substring.Regexp(`(^|/)custom\.bootstrap([^\s]*)(js|css|less|scss|styl)$`),
substring.Regexp(`(^|/)font-awesome\.(css|less|scss|styl)$`),
substring.Regexp(`(^|/)foundation\.(css|less|scss|styl)$`),
substring.Regexp(`(^|/)normalize\.(css|less|scss|styl)$`),
substring.Regexp(`(^|/)[Bb]ourbon/.*\.(css|less|scss|styl)$`),
substring.Regexp(`(^|/)animate\.(css|less|scss|styl)$`),
substring.Regexp(`third[-_]?party/`),
substring.Regexp(`3rd[-_]?party/`),
substring.Regexp(`vendors?/`),
substring.Regexp(`extern(al)?/`),
substring.Regexp(`(^|/)[Vv]+endor/`),
substring.Regexp(`^debian/`),
substring.Regexp(`run.n$`),
substring.Regexp(`bootstrap-datepicker/`),
substring.Regexp(`(^|/)jquery([^.]*)\.js$`),
substring.Regexp(`(^|/)jquery\-\d\.\d+(\.\d+)?\.js$`),
substring.Regexp(`(^|/)jquery\-ui(\-\d\.\d+(\.\d+)?)?(\.\w+)?\.(js|css)$`),
substring.Regexp(`(^|/)jquery\.(ui|effects)\.([^.]*)\.(js|css)$`),
substring.Regexp(`jquery.fn.gantt.js`),
substring.Regexp(`jquery.fancybox.(js|css)`),
substring.Regexp(`fuelux.js`),
substring.Regexp(`(^|/)jquery\.fileupload(-\w+)?\.js$`),
substring.Regexp(`(^|/)slick\.\w+.js$`),
substring.Regexp(`(^|/)Leaflet\.Coordinates-\d+\.\d+\.\d+\.src\.js$`),
substring.Regexp(`leaflet.draw-src.js`),
substring.Regexp(`leaflet.draw.css`),
substring.Regexp(`Control.FullScreen.css`),
substring.Regexp(`Control.FullScreen.js`),
substring.Regexp(`leaflet.spin.js`),
substring.Regexp(`wicket-leaflet.js`),
substring.Regexp(`.sublime-project`),
substring.Regexp(`.sublime-workspace`),
substring.Regexp(`(^|/)prototype(.*)\.js$`),
substring.Regexp(`(^|/)effects\.js$`),
substring.Regexp(`(^|/)controls\.js$`),
substring.Regexp(`(^|/)dragdrop\.js$`),
substring.Regexp(`(.*?)\.d\.ts$`),
substring.Regexp(`(^|/)mootools([^.]*)\d+\.\d+.\d+([^.]*)\.js$`),
substring.Regexp(`(^|/)dojo\.js$`),
substring.Regexp(`(^|/)MochiKit\.js$`),
substring.Regexp(`(^|/)yahoo-([^.]*)\.js$`),
substring.Regexp(`(^|/)yui([^.]*)\.js$`),
substring.Regexp(`(^|/)ckeditor\.js$`),
substring.Regexp(`(^|/)tiny_mce([^.]*)\.js$`),
substring.Regexp(`(^|/)tiny_mce/(langs|plugins|themes|utils)`),
substring.Regexp(`(^|/)ace-builds/`),
substring.Regexp(`(^|/)fontello(.*?)\.css$`),
substring.Regexp(`(^|/)MathJax/`),
substring.Regexp(`(^|/)Chart\.js$`),
substring.Regexp(`(^|/)[Cc]ode[Mm]irror/(\d+\.\d+/)?(lib|mode|theme|addon|keymap|demo)`),
substring.Regexp(`(^|/)shBrush([^.]*)\.js$`),
substring.Regexp(`(^|/)shCore\.js$`),
substring.Regexp(`(^|/)shLegacy\.js$`),
substring.Regexp(`(^|/)angular([^.]*)\.js$`),
substring.Regexp(`(^|\/)d3(\.v\d+)?([^.]*)\.js$`),
substring.Regexp(`(^|/)react(-[^.]*)?\.js$`),
substring.Regexp(`(^|/)modernizr\-\d\.\d+(\.\d+)?\.js$`),
substring.Regexp(`(^|/)modernizr\.custom\.\d+\.js$`),
substring.Regexp(`(^|/)knockout-(\d+\.){3}(debug\.)?js$`),
substring.Regexp(`(^|/)docs?/_?(build|themes?|templates?|static)/`),
substring.Regexp(`(^|/)admin_media/`),
substring.Regexp(`(^|/)env/`),
substring.Regexp(`^fabfile\.py$`),
substring.Regexp(`^waf$`),
substring.Regexp(`^.osx$`),
substring.Regexp(`\.xctemplate/`),
substring.Regexp(`\.imageset/`),
substring.Regexp(`^Carthage/`),
substring.Regexp(`^Pods/`),
substring.Regexp(`(^|/)Sparkle/`),
substring.Regexp(`Crashlytics.framework/`),
substring.Regexp(`Fabric.framework/`),
substring.Regexp(`BuddyBuildSDK.framework/`),
substring.Regexp(`Realm.framework`),
substring.Regexp(`RealmSwift.framework`),
substring.Regexp(`gitattributes$`),
substring.Regexp(`gitignore$`),
substring.Regexp(`gitmodules$`),
substring.Regexp(`(^|/)gradlew$`),
substring.Regexp(`(^|/)gradlew\.bat$`),
substring.Regexp(`(^|/)gradle/wrapper/`),
substring.Regexp(`-vsdoc\.js$`),
substring.Regexp(`\.intellisense\.js$`),
substring.Regexp(`(^|/)jquery([^.]*)\.validate(\.unobtrusive)?\.js$`),
substring.Regexp(`(^|/)jquery([^.]*)\.unobtrusive\-ajax\.js$`),
substring.Regexp(`(^|/)[Mm]icrosoft([Mm]vc)?([Aa]jax|[Vv]alidation)(\.debug)?\.js$`),
substring.Regexp(`^[Pp]ackages\/.+\.\d+\/`),
substring.Regexp(`(^|/)extjs/.*?\.js$`),
substring.Regexp(`(^|/)extjs/.*?\.xml$`),
substring.Regexp(`(^|/)extjs/.*?\.txt$`),
substring.Regexp(`(^|/)extjs/.*?\.html$`),
substring.Regexp(`(^|/)extjs/.*?\.properties$`),
substring.Regexp(`(^|/)extjs/.sencha/`),
substring.Regexp(`(^|/)extjs/docs/`),
substring.Regexp(`(^|/)extjs/builds/`),
substring.Regexp(`(^|/)extjs/cmd/`),
substring.Regexp(`(^|/)extjs/examples/`),
substring.Regexp(`(^|/)extjs/locale/`),
substring.Regexp(`(^|/)extjs/packages/`),
substring.Regexp(`(^|/)extjs/plugins/`),
substring.Regexp(`(^|/)extjs/resources/`),
substring.Regexp(`(^|/)extjs/src/`),
substring.Regexp(`(^|/)extjs/welcome/`),
substring.Regexp(`(^|/)html5shiv\.js$`),
substring.Regexp(`^[Tt]ests?/fixtures/`),
substring.Regexp(`^[Ss]pecs?/fixtures/`),
substring.Regexp(`(^|/)cordova([^.]*)\.js$`),
substring.Regexp(`(^|/)cordova\-\d\.\d(\.\d)?\.js$`),
substring.Regexp(`foundation(\..*)?\.js$`),
substring.Regexp(`^Vagrantfile$`),
substring.Regexp(`.[Dd][Ss]_[Ss]tore$`),
substring.Regexp(`^vignettes/`),
substring.Regexp(`^inst/extdata/`),
substring.Regexp(`octicons.css`),
substring.Regexp(`sprockets-octicons.scss`),
substring.Regexp(`(^|/)activator$`),
substring.Regexp(`(^|/)activator\.bat$`),
substring.Regexp(`proguard.pro`),
substring.Regexp(`proguard-rules.pro`),
substring.Regexp(`^puphpet/`),
substring.Regexp(`(^|/)\.google_apis/`),
substring.Regexp(`^Jenkinsfile$`),
) )

View File

@ -1,24 +0,0 @@
# Caches
- (^|/)cache/
# Dependencies
- ^[Dd]ependencies/
# Distributions
- (^|/)dist/
# C deps
# https://github.com/joyent/node
- ^deps/
- ^tools/
- (^|/)configure$
- (^|/)config.guess$
- (^|/)config.sub$
# stuff autogenerated by autoconf - still C deps
- (^|/)aclocal.m4
- (^|/)libtool.m4
- (^|/)ltoptions.m4
- (^|/)ltsugar.m4
- (^|/)ltversion.m4
- (^|/)lt~obsolete.m4

View File

@ -3,6 +3,7 @@ package generator
import ( import (
"bytes" "bytes"
"io" "io"
"io/ioutil"
"text/template" "text/template"
yaml "gopkg.in/yaml.v2" yaml "gopkg.in/yaml.v2"
@ -15,21 +16,26 @@ var typeToTypeConst = map[string]string{
"prose": "Prose", "prose": "Prose",
} }
// Types reads from buf and builds source file from typeTmplPath. // Types reads from fileToParse and builds source file from tmplPath. It's comply with type File signature.
func Types(data []byte, typeTmplPath, typeTmplName, commit string) ([]byte, error) { func Types(fileToParse, samplesDir, outPath, tmplPath, tmplName, commit string) error {
data, err := ioutil.ReadFile(fileToParse)
if err != nil {
return err
}
languages := make(map[string]*languageInfo) languages := make(map[string]*languageInfo)
if err := yaml.Unmarshal(data, &languages); err != nil { if err := yaml.Unmarshal(data, &languages); err != nil {
return nil, err return err
} }
langTypeMap := buildLanguageTypeMap(languages) langTypeMap := buildLanguageTypeMap(languages)
buf := &bytes.Buffer{} buf := &bytes.Buffer{}
if err := executeTypesTemplate(buf, langTypeMap, typeTmplPath, typeTmplName, commit); err != nil { if err := executeTypesTemplate(buf, langTypeMap, tmplPath, tmplName, commit); err != nil {
return nil, err return err
} }
return buf.Bytes(), nil return formatedWrite(outPath, buf.Bytes())
} }
func buildLanguageTypeMap(languages map[string]*languageInfo) map[string]string { func buildLanguageTypeMap(languages map[string]*languageInfo) map[string]string {
@ -41,12 +47,12 @@ func buildLanguageTypeMap(languages map[string]*languageInfo) map[string]string
return langTypeMap return langTypeMap
} }
func executeTypesTemplate(out io.Writer, langTypeMap map[string]string, typeTmplPath, typeTmpl, commit string) error { func executeTypesTemplate(out io.Writer, langTypeMap map[string]string, tmplPath, tmplName, commit string) error {
fmap := template.FuncMap{ fmap := template.FuncMap{
"getCommit": func() string { return commit }, "getCommit": func() string { return commit },
} }
t := template.Must(template.New(typeTmpl).Funcs(fmap).ParseFiles(typeTmplPath)) t := template.Must(template.New(tmplName).Funcs(fmap).ParseFiles(tmplPath))
if err := t.Execute(out, langTypeMap); err != nil { if err := t.Execute(out, langTypeMap); err != nil {
return err return err
} }

View File

@ -3,32 +3,38 @@ package generator
import ( import (
"bytes" "bytes"
"io" "io"
"io/ioutil"
"text/template" "text/template"
yaml "gopkg.in/yaml.v2" yaml "gopkg.in/yaml.v2"
) )
// Vendor reads from buf and builds source file from vendorTmplPath. // Vendor reads from fileToParse and builds source file from tmplPath. It's comply with type File signature.
func Vendor(data []byte, vendorTmplPath, vendorTmplName, commit string) ([]byte, error) { func Vendor(fileToParse, samplesDir, outPath, tmplPath, tmplName, commit string) error {
data, err := ioutil.ReadFile(fileToParse)
if err != nil {
return err
}
var regexpList []string var regexpList []string
if err := yaml.Unmarshal(data, &regexpList); err != nil { if err := yaml.Unmarshal(data, &regexpList); err != nil {
return nil, err return nil
} }
buf := &bytes.Buffer{} buf := &bytes.Buffer{}
if err := executeVendorTemplate(buf, regexpList, vendorTmplPath, vendorTmplName, commit); err != nil { if err := executeVendorTemplate(buf, regexpList, tmplPath, tmplName, commit); err != nil {
return nil, err return nil
} }
return buf.Bytes(), nil return formatedWrite(outPath, buf.Bytes())
} }
func executeVendorTemplate(out io.Writer, regexpList []string, vendorTmplPath, vendorTmpl, commit string) error { func executeVendorTemplate(out io.Writer, regexpList []string, tmplPath, tmplName, commit string) error {
fmap := template.FuncMap{ fmap := template.FuncMap{
"getCommit": func() string { return commit }, "getCommit": func() string { return commit },
} }
t := template.Must(template.New(vendorTmpl).Funcs(fmap).ParseFiles(vendorTmplPath)) t := template.Must(template.New(tmplName).Funcs(fmap).ParseFiles(tmplPath))
if err := t.Execute(out, regexpList); err != nil { if err := t.Execute(out, regexpList); err != nil {
return err return err
} }

View File

@ -11,10 +11,13 @@ const (
// languages info file // languages info file
languagesYAML = ".linguist/lib/linguist/languages.yml" languagesYAML = ".linguist/lib/linguist/languages.yml"
// linguist's samples directory
samplesDir = ".linguist/samples"
// extension.go generation // extension.go generation
extensionsFile = "extension.go" extensionsFile = "extension.go"
extensionsTmplPath = "internal/code-generator/assets/extensions.go.tmpl" extensionsTmplPath = "internal/code-generator/assets/extension.go.tmpl"
extensionsTmpl = "extensions.go.tmpl" extensionsTmpl = "extension.go.tmpl"
// content.go generation // content.go generation
heuristicsRuby = ".linguist/lib/linguist/heuristics.rb" heuristicsRuby = ".linguist/lib/linguist/heuristics.rb"
@ -36,26 +39,25 @@ const (
// type.go generation // type.go generation
typeFile = "type.go" typeFile = "type.go"
typeTmplPath = "internal/code-generator/assets/types.go.tmpl" typeTmplPath = "internal/code-generator/assets/type.go.tmpl"
typeTmpl = "types.go.tmpl" typeTmpl = "type.go.tmpl"
// interpreter.go generation // interpreter.go generation
interpretersFile = "interpreter.go" interpretersFile = "interpreter.go"
interpretersTmplPath = "internal/code-generator/assets/interpreters.go.tmpl" interpretersTmplPath = "internal/code-generator/assets/interpreter.go.tmpl"
interpretersTmpl = "interpreters.go.tmpl" interpretersTmpl = "interpreter.go.tmpl"
// filename.go generation // filename.go generation
filenamesFile = "filename.go" filenamesFile = "filename.go"
filenamesTmplPath = "internal/code-generator/assets/filenames.go.tmpl" filenamesTmplPath = "internal/code-generator/assets/filename.go.tmpl"
filenamesTmpl = "filenames.go.tmpl" filenamesTmpl = "filename.go.tmpl"
// alias.go generation // alias.go generation
aliasesFile = "alias.go" aliasesFile = "alias.go"
aliasesTmplPath = "internal/code-generator/assets/aliases.go.tmpl" aliasesTmplPath = "internal/code-generator/assets/alias.go.tmpl"
aliasesTmpl = "aliases.go.tmpl" aliasesTmpl = "alias.go.tmpl"
// frequencies.go generation // frequencies.go generation
samplesDir = ".linguist/samples"
frequenciesFile = "frequencies.go" frequenciesFile = "frequencies.go"
frequenciesTmplPath = "internal/code-generator/assets/frequencies.go.tmpl" frequenciesTmplPath = "internal/code-generator/assets/frequencies.go.tmpl"
frequenciesTmpl = "frequencies.go.tmpl" frequenciesTmpl = "frequencies.go.tmpl"
@ -63,13 +65,14 @@ const (
commitPath = ".linguist/.git/refs/heads/master" commitPath = ".linguist/.git/refs/heads/master"
) )
type generatorArgs struct { type generatorFiles struct {
generate generator.File
fileToParse string fileToParse string
samplesDir string
outPath string outPath string
tmplPath string tmplPath string
tmplName string tmplName string
commit string commit string
generate generator.Func
} }
func main() { func main() {
@ -78,26 +81,23 @@ func main() {
log.Printf("couldn't find commit: %v", err) log.Printf("couldn't find commit: %v", err)
} }
argsList := []*generatorArgs{ fileList := []*generatorFiles{
&generatorArgs{languagesYAML, extensionsFile, extensionsTmplPath, extensionsTmpl, commit, generator.Extensions}, &generatorFiles{generator.Extensions, languagesYAML, "", extensionsFile, extensionsTmplPath, extensionsTmpl, commit},
&generatorArgs{heuristicsRuby, contentFile, contentTmplPath, contentTmpl, commit, generator.Heuristics}, &generatorFiles{generator.Heuristics, heuristicsRuby, "", contentFile, contentTmplPath, contentTmpl, commit},
&generatorArgs{vendorYAML, vendorFile, vendorTmplPath, vendorTmpl, commit, generator.Vendor}, &generatorFiles{generator.Vendor, vendorYAML, "", vendorFile, vendorTmplPath, vendorTmpl, commit},
&generatorArgs{documentationYAML, documentationFile, documentationTmplPath, documentationTmpl, commit, generator.Documentation}, &generatorFiles{generator.Documentation, documentationYAML, "", documentationFile, documentationTmplPath, documentationTmpl, commit},
&generatorArgs{languagesYAML, typeFile, typeTmplPath, typeTmpl, commit, generator.Types}, &generatorFiles{generator.Types, languagesYAML, "", typeFile, typeTmplPath, typeTmpl, commit},
&generatorArgs{languagesYAML, interpretersFile, interpretersTmplPath, interpretersTmpl, commit, generator.Interpreters}, &generatorFiles{generator.Interpreters, languagesYAML, "", interpretersFile, interpretersTmplPath, interpretersTmpl, commit},
&generatorArgs{languagesYAML, filenamesFile, filenamesTmplPath, filenamesTmpl, commit, generator.Filenames}, &generatorFiles{generator.Filenames, languagesYAML, samplesDir, filenamesFile, filenamesTmplPath, filenamesTmpl, commit},
&generatorArgs{languagesYAML, aliasesFile, aliasesTmplPath, aliasesTmpl, commit, generator.Aliases}, &generatorFiles{generator.Aliases, languagesYAML, "", aliasesFile, aliasesTmplPath, aliasesTmpl, commit},
&generatorFiles{generator.Frequencies, "", samplesDir, frequenciesFile, frequenciesTmplPath, frequenciesTmpl, commit},
} }
for _, args := range argsList { for _, file := range fileList {
if err := generator.FromFile(args.fileToParse, args.outPath, args.tmplPath, args.tmplName, args.commit, args.generate); err != nil { if err := file.generate(file.fileToParse, file.samplesDir, file.outPath, file.tmplPath, file.tmplName, file.commit); err != nil {
log.Println(err) log.Println(err)
} }
} }
if err := generator.Frequencies(samplesDir, frequenciesTmplPath, frequenciesTmpl, commit, frequenciesFile); err != nil {
log.Println(err)
}
} }
func getCommit(path string) (string, error) { func getCommit(path string) (string, error) {

View File

@ -44,7 +44,7 @@ var (
} }
reLiteralStringQuotes = regexp.MustCompile(`(?sU)(".*"|'.*')`) reLiteralStringQuotes = regexp.MustCompile(`(?sU)(".*"|'.*')`)
reSingleLineComment = regexp.MustCompile(`(?m)(//|--|#|%|")(.*$)`) reSingleLineComment = regexp.MustCompile(`(?m)(//|--|#|%|")\s(.*$)`)
reMultilineComment = regexp.MustCompile(`(?sU)(/\*.*\*/|<!--.*-->|\{-.*-\}|\(\*.*\*\)|""".*"""|'''.*''')`) reMultilineComment = regexp.MustCompile(`(?sU)(/\*.*\*/|<!--.*-->|\{-.*-\}|\(\*.*\*\)|""".*"""|'''.*''')`)
reLiteralNumber = regexp.MustCompile(`(0x[0-9A-Fa-f]([0-9A-Fa-f]|\.)*|\d(\d|\.)*)([uU][lL]{0,2}|([eE][-+]\d*)?[fFlL]*)`) reLiteralNumber = regexp.MustCompile(`(0x[0-9A-Fa-f]([0-9A-Fa-f]|\.)*|\d(\d|\.)*)([uU][lL]{0,2}|([eE][-+]\d*)?[fFlL]*)`)
reShebang = regexp.MustCompile(`(?m)^#!(?:/\w+)*/(?:(\w+)|\w+(?:\s*\w+=\w+\s*)*\s*(\w+))(?:\s*-\w+\s*)*$`) reShebang = regexp.MustCompile(`(?m)^#!(?:/\w+)*/(?:(\w+)|\w+(?:\s*\w+=\w+\s*)*\s*(\w+))(?:\s*-\w+\s*)*$`)

View File

@ -53,6 +53,9 @@ abb (tokenByte, 0xAF02) | ,3.2L
oneBool = 3 <= 2 oneBool = 3 <= 2
varBool = 3<=2> varBool = 3<=2>
#ifndef
#i'm not a comment if the single line comment symbol is not followed by a white
PyErr_SetString(PyExc_RuntimeError, "Relative import is not supported for Python <=2.4."); PyErr_SetString(PyExc_RuntimeError, "Relative import is not supported for Python <=2.4.");
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
@ -77,13 +80,15 @@ var (
tokensFromTestContent = []string{"SHEBANG#!ruby", "SHEBANG#!node", "SHEBANG#!awk", "<!DOCTYPE>", "PUBLIC", "W3C", "DTD", "XHTML", "1", "0", tokensFromTestContent = []string{"SHEBANG#!ruby", "SHEBANG#!node", "SHEBANG#!awk", "<!DOCTYPE>", "PUBLIC", "W3C", "DTD", "XHTML", "1", "0",
"Strict", "EN", "http", "www", "w3", "org", "TR", "xhtml1", "DTD", "xhtml1", "strict", "dtd", "<html>", "<head>", "<title>", "class=", "Strict", "EN", "http", "www", "w3", "org", "TR", "xhtml1", "DTD", "xhtml1", "strict", "dtd", "<html>", "<head>", "<title>", "class=",
"</title>", "<style>", "<![CDATA[>", "example", "background", "color", "yellow", "</style>", "</head>", "<body>", "<div>", "<strong>", "</title>", "<style>", "<![CDATA[>", "example", "background", "color", "yellow", "</style>", "</head>", "<body>", "<div>", "<strong>",
"</strong>", "</div>", "</body>", "</html>", "(", "[", "]", ")", "[", "]", "{", "(", ")", "(", ")", "{", "}", "(", ")", ";", ";", "}", "</strong>", "</div>", "</body>", "</html>", "(", "[", "]", ")", "[", "]", "{", "(", ")", "(", ")", "{", "}", "(", ")", ";", "{", ";",
"]", "]", "aaa", "func", "Tokenize", "content", "byte", "string", "splitted", "bytes.Fields", "content", "tokens", "othercode", "ppp", "}", "]", "]", "#", "/usr/bin/ruby", "#", "/usr/bin/env", "node", "aaa", "#", "/usr/bin/env", "A", "B", "foo", "bar", "awk", "f", "#",
"no", "comment", "abb", "tokenByte", "notcatchasanumber", "number", "*", "anotherNumber", "if", "isTrue", "isToo", "b", "return", "python", "func", "Tokenize", "content", "byte", "string", "splitted", "bytes.Fields", "content", "tokens", "othercode", "ppp", "no",
"tokens", "oneBool", "varBool", "PyErr_SetString", "PyExc_RuntimeError", "html", "PUBLIC", "xmlns", "id", "class", "This", "is", "a", "comment", "abb", "tokenByte", "notcatchasanumber", "number", "*", "anotherNumber", "if", "isTrue", "isToo", "b", "return", "tokens",
"XHTML", "sample", "file", "type", "background", "color", "yellow", "id", "Just", "a", "simple", "XHTML", "test", "page.", "|", "+", "oneBool", "varBool", "#ifndef", "#i", "m", "not", "a", "comment", "if", "the", "single", "line", "comment", "symbol", "is", "not",
"&&", "<", "<", "-", ":", "=", ":", "=", ",", ",", "=", ">", ">", "=", "=", "=", "=", ">", ",", ">", "=", ">", "=", "=", ">", "=", ">", "followed", "by", "a", "white", "PyErr_SetString", "PyExc_RuntimeError", "html", "PUBLIC", "xmlns", "id", "class", "This", "is", "a",
":", ">", "=", ">"} "XHTML", "sample", "file", "type", "#example", "background", "color", "yellow", "id", "Just", "a", "simple", "XHTML", "test", "page.",
"-", "|", "+", "&&", "<", "<", "-", "!", "!", "!", "=", "=", "!", ":", "=", ":", "=", ",", ",", "=", ">", ">", "=", "=", "=", "=", ">",
"'", ",", ">", "=", ">", "=", "=", ">", "=", ">", ":", ">", "=", ">"}
) )
func TestTokenize(t *testing.T) { func TestTokenize(t *testing.T) {

View File

@ -1,153 +0,0 @@
package enry
import (
"bytes"
"regexp"
)
const (
searchScope = 5
)
// GetLanguagesByModeline returns a slice of possible languages for the given content, filename will be ignored.
// It accomplish the signature to be a Strategy type.
func GetLanguagesByModeline(filename string, content []byte) []string {
headFoot := getHeaderAndFooter(content)
var languages []string
for _, getLang := range modelinesFunc {
languages = getLang("", headFoot)
if len(languages) > 0 {
break
}
}
return languages
}
func getHeaderAndFooter(content []byte) []byte {
if bytes.Count(content, []byte("\n")) < 2*searchScope {
return content
}
header := headScope(content, searchScope)
footer := footScope(content, searchScope)
headerAndFooter := make([]byte, 0, len(content[:header])+len(content[footer:]))
headerAndFooter = append(headerAndFooter, content[:header]...)
headerAndFooter = append(headerAndFooter, content[footer:]...)
return headerAndFooter
}
func headScope(content []byte, scope int) (index int) {
for i := 0; i < scope; i++ {
eol := bytes.IndexAny(content, "\n")
content = content[eol+1:]
index += eol
}
return index + scope - 1
}
func footScope(content []byte, scope int) (index int) {
for i := 0; i < scope; i++ {
index = bytes.LastIndexAny(content, "\n")
content = content[:index]
}
return index + 1
}
var modelinesFunc = []func(filename string, content []byte) []string{
GetLanguagesByEmacsModeline,
GetLanguagesByVimModeline,
}
var (
reEmacsModeline = regexp.MustCompile(`.*-\*-\s*(.+?)\s*-\*-.*(?m:$)`)
reEmacsLang = regexp.MustCompile(`.*(?i:mode)\s*:\s*([^\s;]+)\s*;*.*`)
reVimModeline = regexp.MustCompile(`(?:(?m:\s|^)vi(?:m[<=>]?\d+|m)?|[\t\x20]*ex)\s*[:]\s*(.*)(?m:$)`)
reVimLang = regexp.MustCompile(`(?i:filetype|ft|syntax)\s*=(\w+)(?:\s|:|$)`)
)
// GetLanguageByEmacsModeline detecs if the content has a emacs modeline and try to get a
// language basing on alias. If couldn't retrieve a valid language, it returns OtherLanguage and false.
func GetLanguageByEmacsModeline(content []byte) (string, bool) {
languages := GetLanguagesByEmacsModeline("", content)
if len(languages) == 0 {
return OtherLanguage, false
}
return languages[0], true
}
// GetLanguagesByEmacsModeline returns a slice of possible languages for the given content, filename will be ignored.
// It accomplish the signature to be a Strategy type.
func GetLanguagesByEmacsModeline(filename string, content []byte) []string {
matched := reEmacsModeline.FindAllSubmatch(content, -1)
if matched == nil {
return nil
}
// only take the last matched line, discard previous lines
lastLineMatched := matched[len(matched)-1][1]
matchedAlias := reEmacsLang.FindSubmatch(lastLineMatched)
var alias string
if matchedAlias != nil {
alias = string(matchedAlias[1])
} else {
alias = string(lastLineMatched)
}
language, ok := GetLanguageByAlias(alias)
if !ok {
return nil
}
return []string{language}
}
// GetLanguageByVimModeline detecs if the content has a vim modeline and try to get a
// language basing on alias. If couldn't retrieve a valid language, it returns OtherLanguage and false.
func GetLanguageByVimModeline(content []byte) (string, bool) {
languages := GetLanguagesByVimModeline("", content)
if len(languages) == 0 {
return OtherLanguage, false
}
return languages[0], true
}
// GetLanguagesByVimModeline returns a slice of possible languages for the given content, filename will be ignored.
// It accomplish the signature to be a Strategy type.
func GetLanguagesByVimModeline(filename string, content []byte) []string {
matched := reVimModeline.FindAllSubmatch(content, -1)
if matched == nil {
return nil
}
// only take the last matched line, discard previous lines
lastLineMatched := matched[len(matched)-1][1]
matchedAlias := reVimLang.FindAllSubmatch(lastLineMatched, -1)
if matchedAlias == nil {
return nil
}
alias := string(matchedAlias[0][1])
if len(matchedAlias) > 1 {
// cases:
// matchedAlias = [["syntax=ruby " "ruby"] ["ft=python " "python"] ["filetype=perl " "perl"]] returns OtherLanguage;
// matchedAlias = [["syntax=python " "python"] ["ft=python " "python"] ["filetype=python " "python"]] returns "Python";
for _, match := range matchedAlias {
otherAlias := string(match[1])
if otherAlias != alias {
return nil
}
}
}
language, ok := GetLanguageByAlias(alias)
if !ok {
return nil
}
return []string{language}
}

View File

@ -1,89 +0,0 @@
package enry
import (
"bufio"
"bytes"
"regexp"
"strings"
)
const shebang = `#!`
var (
shebangExecHack = regexp.MustCompile(`exec (\w+).+\$0.+\$@`)
pythonVersion = regexp.MustCompile(`python\d\.\d+`)
)
// GetLanguagesByShebang returns a slice of possible languages for the given content, filename will be ignored.
// It accomplish the signature to be a Strategy type.
func GetLanguagesByShebang(filename string, content []byte) (languages []string) {
interpreter := getInterpreter(content)
return languagesByInterpreter[interpreter]
}
func getInterpreter(data []byte) (interpreter string) {
line := getFirstLine(data)
if !hasShebang(line) {
return ""
}
// skip shebang
line = bytes.TrimSpace(line[2:])
splitted := bytes.Fields(line)
if bytes.Contains(splitted[0], []byte("env")) {
if len(splitted) > 1 {
interpreter = string(splitted[1])
}
} else {
splittedPath := bytes.Split(splitted[0], []byte{'/'})
interpreter = string(splittedPath[len(splittedPath)-1])
}
if interpreter == "sh" {
interpreter = lookForMultilineExec(data)
}
if pythonVersion.MatchString(interpreter) {
interpreter = interpreter[:strings.Index(interpreter, `.`)]
}
return
}
func getFirstLine(data []byte) []byte {
buf := bufio.NewScanner(bytes.NewReader(data))
buf.Scan()
line := buf.Bytes()
if err := buf.Err(); err != nil {
return nil
}
return line
}
func hasShebang(line []byte) bool {
shebang := []byte(shebang)
return bytes.HasPrefix(line, shebang)
}
func lookForMultilineExec(data []byte) string {
const magicNumOfLines = 5
interpreter := "sh"
buf := bufio.NewScanner(bytes.NewReader(data))
for i := 0; i < magicNumOfLines && buf.Scan(); i++ {
line := buf.Bytes()
if shebangExecHack.Match(line) {
interpreter = shebangExecHack.FindStringSubmatch(string(line))[1]
break
}
}
if err := buf.Err(); err != nil {
return interpreter
}
return interpreter
}

View File

@ -31,9 +31,8 @@ func IsAuxiliaryLanguage(lang string) bool {
// IsConfiguration returns whether or not path is using a configuration language. // IsConfiguration returns whether or not path is using a configuration language.
func IsConfiguration(path string) bool { func IsConfiguration(path string) bool {
lang, _ := GetLanguageByExtension(path) language, _ := GetLanguageByExtension(path)
_, is := configurationLanguages[lang] _, is := configurationLanguages[language]
return is return is
} }

View File

@ -55,7 +55,7 @@ func (s *SimpleLinguistTestSuite) TestIsConfiguration() {
}{ }{
{name: "TestIsConfiguration_1", path: "foo", expected: false}, {name: "TestIsConfiguration_1", path: "foo", expected: false},
{name: "TestIsConfiguration_2", path: "foo.ini", expected: true}, {name: "TestIsConfiguration_2", path: "foo.ini", expected: true},
{name: "TestIsConfiguration_3", path: "foo.json", expected: true}, {name: "TestIsConfiguration_3", path: "/test/path/foo.json", expected: true},
} }
for _, test := range tests { for _, test := range tests {