Squashed 'go-enry/' content from commit 7e3a9a7

git-subtree-dir: go-enry
git-subtree-split: 7e3a9a7241
This commit is contained in:
2024-09-04 16:33:41 -03:00
commit f955c625ad
192 changed files with 528500 additions and 0 deletions

View File

@ -0,0 +1,35 @@
package data
import "strings"
// LanguageByAliasMap keeps alias for different languages and use the name of the languages as an alias too.
// All the keys (alias or not) are written in lower case and the whitespaces has been replaced by underscores.
var LanguageByAliasMap = map[string]string{
{{range $alias, $language := . -}}
"{{ $alias }}": {{ printf "%q" $language -}},
{{end -}}
}
// LanguageByAlias looks up the language name by it's alias or name.
// It mirrors the logic of github linguist and is needed e.g for heuristcs.yml
// that mixes names and aliases in a language field (see XPM example).
func LanguageByAlias(langOrAlias string) (lang string, ok bool) {
k := convertToAliasKey(langOrAlias)
lang, ok = LanguageByAliasMap[k]
return
}
// convertToAliasKey converts language name to a key in LanguageByAliasMap.
// Following
// - internal.code-generator.generator.convertToAliasKey()
// - GetLanguageByAlias()
//
// conventions.
// It is here to avoid dependency on "generate" and "enry" packages.
func convertToAliasKey(langName string) string {
ak := strings.SplitN(langName, `,`, 2)[0]
ak = strings.Replace(ak, ` `, `_`, -1)
ak = strings.ToLower(ak)
return ak
}

View File

@ -0,0 +1,7 @@
package data
var LanguagesColor = map[string]string{
{{range $language, $color := . -}}
"{{$language}}": "{{$color -}}",
{{end -}}
}

View File

@ -0,0 +1,4 @@
package data
// linguist's commit from which files were generated.
var LinguistCommit = "{{- getCommit -}}"

View File

@ -0,0 +1,58 @@
package data
import (
"github.com/go-enry/go-enry/v2/data/rule"
"github.com/go-enry/go-enry/v2/regex"
)
var ContentHeuristics = map[string]*Heuristics{
{{ range $ext, $rules := . -}}
{{ printf "%q" $ext }}: &Heuristics{
{{ range $rule := $rules -}}
{{template "Rule" $rule}}
{{ end -}}
},
{{ end -}}
}
{{ define "Rule" -}}
{{ if eq .Op "And" -}}
rule.And(
{{ template "Languages" .Langs -}}
{{ range $rule := .Rules -}}
{{template "Rule" $rule}}
{{ end -}}
),
{{- else if eq .Op "Or" -}}
rule.Or(
{{ template "Languages" .Langs -}}
{{ template "mustCompile" . }}
),
{{- else if eq .Op "Not" -}}
rule.Not(
{{ template "Languages" .Langs -}}
{{ template "mustCompile" . }}
),
{{- else if eq .Op "Always" -}}
rule.Always(
{{ template "Languages" .Langs -}}
),
{{ end -}}
{{ end -}}
{{define "Languages" -}}
{{with . -}}
rule.MatchingLanguages( {{range .}} {{printf "\"%s\"" .}}, {{end}} ),
{{ else -}}
rule.MatchingLanguages(""),
{{end -}}
{{end}}
{{define "mustCompile" -}}
{{ if .IsRE2 -}}
regex.MustCompileMultiline({{ .Pattern | stringVal }}),
{{- else -}}
regex.MustCompileRuby({{ .Pattern | stringVal }}),
{{ end -}}
{{end}}

View File

@ -0,0 +1,10 @@
package data
import "github.com/go-enry/go-enry/v2/regex"
var DocumentationMatchers = []regex.EnryRegexp{
{{range $regexp := . -}}
regex.MustCompile(`{{ $regexp }}`),
{{end -}}
}

View File

@ -0,0 +1,13 @@
package data
var LanguagesByExtension = map[string][]string{
{{range $extension, $languages := .LanguagesByExtension -}}
"{{ $extension }}": { {{- $languages | formatStringSlice -}} },
{{end -}}
}
var ExtensionsByLanguage = map[string][]string{
{{range $language, $extensions := .ExtensionsByLanguage -}}
"{{ $language }}": { {{- $extensions | formatStringSlice -}} },
{{end -}}
}

View File

@ -0,0 +1,7 @@
package data
var LanguagesByFilename = map[string][]string{
{{range $filename, $languages := . -}}
"{{ $filename }}": { {{- formatStringSlice $languages -}} },
{{end -}}
}

View File

@ -0,0 +1,20 @@
package data
var LanguagesLogProbabilities = map[string]float64{
{{ $freqs := . -}}
{{range $index, $language := orderKeys .Languages -}}
"{{ $language }}": {{ languageLogProbability $language -}},
{{end -}}
}
var TokensLogProbabilities = map[string]map[string]float64{
{{range $index, $language := orderMapMapKeys .Tokens -}}
"{{ $language }}": map[string]float64{
{{range $i, $token := index $freqs.Tokens $language | orderKeys -}}
{{ quote $token }}: {{ tokenLogProbability $language $token }},
{{end -}}
},
{{end -}}
}
var TokensTotal = {{ toFloat64 .TokensTotal -}}

View File

@ -0,0 +1,7 @@
package data
var LanguagesGroup = map[string]string{
{{range $language, $group := . -}}
"{{$language}}": "{{$group -}}",
{{end -}}
}

View File

@ -0,0 +1,3 @@
// Code generated by github.com/go-enry/go-enry/v2/internal/code-generator DO NOT EDIT.
// Extracted from github/linguist commit: {{ getCommit }}

View File

@ -0,0 +1,7 @@
package data
var IDByLanguage = map[string]int{
{{range $language, $id := . -}}
"{{$language}}": {{$id -}},
{{end -}}
}

View File

@ -0,0 +1,7 @@
package data
var LanguagesByInterpreter = map[string][]string{
{{range $interpreter, $languages := . -}}
"{{ $interpreter }}": { {{- $languages | formatStringSlice -}} },
{{end -}}
}

View File

@ -0,0 +1,75 @@
package data
// LanguageInfo exposes the data for a language's Linguist YAML entry as a Go struct.
// See https://github.com/github/linguist/blob/master/lib/linguist/languages.yml
type LanguageInfo struct {
// Name is the language name. May contain symbols not safe for use in some filesystems (e.g., `F*`).
Name string
// FSName is the filesystem safe name. Will only be set if Name is not safe for use in all filesystems.
FSName string
// Type is the language Type. See data.Type for values.
Type Type
// Color is the CSS hex color to represent the language. Only used if type is "programming" or "markup".
Color string
// Group is the name of the parent language. Languages in a group are counted in the statistics as the parent language.
Group string
// Aliases is a slice of additional aliases (implicitly includes name.downcase)
Aliases []string
// Extensions is a slice of associated extensions (the first one is considered the primary extension).
Extensions []string
// A slice of associated interpreters
Interpreters []string
// Filenames is a slice of filenames commonly associated with the language.
Filenames []string
// MimeType (maps to codemirror_mime_type in linguist.yaml) is the string name of the file mime type used for highlighting whenever a file is edited.
MimeType string
// TMScope is the TextMate scope that represents this programming language.
TMScope string
// AceMode is the name of the Ace Mode used for highlighting whenever a file is edited.
AceMode string
// CodeMirrorMode is the name of the CodeMirror Mode used for highlighting whenever a file is edited.
CodeMirrorMode string
// Wrap is a boolean flag to enable line wrapping in an editor.
Wrap bool
// LanguageID is the Linguist-assigned numeric ID for the language.
LanguageID int
}
// LanguageInfoByID allows accessing LanguageInfo by a language's ID.
var LanguageInfoByID = map[int]LanguageInfo{
{{range $language, $info := . -}}
{{$info.LanguageID}}: LanguageInfo{
Name: "{{$language}}",
FSName: "{{$info.FSName}}",
Type: TypeForString("{{$info.Type}}"),
Color: "{{$info.Color}}",
Group: "{{$info.Group}}",
Aliases: []string{
{{range $alias := $info.Aliases -}}
"{{$alias}}",
{{end -}}
},
Extensions: []string{
{{range $extension := $info.Extensions -}}
"{{$extension}}",
{{end -}}
},
Interpreters: []string{
{{range $interpreter := $info.Interpreters -}}
"{{$interpreter}}",
{{end -}}
},
Filenames: []string{
{{range $filename := $info.Filenames -}}
"{{$filename}}",
{{end -}}
},
MimeType: "{{$info.MimeType}}",
TMScope: "{{$info.TMScope}}",
AceMode: "{{$info.AceMode}}",
CodeMirrorMode: "{{$info.CodeMirrorMode}}",
Wrap: {{$info.Wrap}},
LanguageID: {{$info.LanguageID}},
},
{{end -}}
}

View File

@ -0,0 +1,7 @@
package data
var LanguagesMime = map[string]string{
{{range $language, $mime := . -}}
"{{$language}}": "{{$mime -}}",
{{end -}}
}

View File

@ -0,0 +1,49 @@
package data
// Type represent language's type. Either data, programming, markup, prose, or unknown.
type Type int
// Type's values.
const (
TypeUnknown Type = iota
TypeData
TypeProgramming
TypeMarkup
TypeProse
)
func (t Type) String() string {
switch t {
case TypeData:
return "data"
case TypeProgramming:
return "programming"
case TypeMarkup:
return "markup"
case TypeProse:
return "prose"
default:
return "unknown"
}
}
func TypeForString(s string) Type {
switch s {
case "data":
return TypeData
case "programming":
return TypeProgramming
case "markup":
return TypeMarkup
case "prose":
return TypeProse
default:
return TypeUnknown
}
}
var LanguagesType = map[string]int{
{{range $language, $type := . -}}
"{{ $language }}": {{ $type -}},
{{end -}}
}

View File

@ -0,0 +1,22 @@
package data
import "github.com/go-enry/go-enry/v2/regex"
{{define "mustCompile" -}}
{{ if isRE2 . -}}
regex.MustCompile({{ . | stringVal }})
{{- else -}}
regex.MustCompileRuby({{ . | stringVal }})
{{- end -}}
{{end}}
var VendorMatchers = []regex.EnryRegexp{
{{range $re := . -}}
{{ template "mustCompile" $re }},
{{end -}}
}
// FastVendorMatcher is equivalent to matching any of the VendorMatchers.
{{with $singleRE := collateAllRegexps . -}}
var FastVendorMatcher = {{template "mustCompile" $singleRE}}
{{end}}

View File

@ -0,0 +1,58 @@
package generator
import (
"bytes"
"io"
"io/ioutil"
"strings"
"gopkg.in/yaml.v2"
)
// Aliases reads from fileToParse and builds source file from tmplPath. It complies with type File signature.
func Aliases(fileToParse, samplesDir, outPath, tmplPath, tmplName, commit string) error {
data, err := ioutil.ReadFile(fileToParse)
if err != nil {
return err
}
languages := make(map[string]*languageInfo)
if err := yaml.Unmarshal(data, &languages); err != nil {
return err
}
orderedLangList := getAlphabeticalOrderedKeys(languages)
languageByAlias := buildAliasLanguageMap(languages, orderedLangList)
buf := &bytes.Buffer{}
if err := executeAliasesTemplate(buf, languageByAlias, tmplPath, tmplName, commit); err != nil {
return err
}
return formatedWrite(outPath, buf.Bytes())
}
func buildAliasLanguageMap(languages map[string]*languageInfo, orderedLangList []string) map[string]string {
aliasLangsMap := make(map[string]string)
for _, lang := range orderedLangList {
langInfo := languages[lang]
key := convertToAliasKey(lang)
aliasLangsMap[key] = lang
for _, alias := range langInfo.Aliases {
key := convertToAliasKey(alias)
aliasLangsMap[key] = lang
}
}
return aliasLangsMap
}
func convertToAliasKey(s string) (key string) {
key = strings.Replace(s, ` `, `_`, -1)
key = strings.ToLower(key)
return
}
func executeAliasesTemplate(out io.Writer, languageByAlias map[string]string, aliasesTmplPath, aliasesTmpl, commit string) error {
return executeTemplate(out, aliasesTmpl, aliasesTmplPath, commit, nil, languageByAlias)
}

View File

@ -0,0 +1,47 @@
package generator
import (
"bytes"
"io"
"io/ioutil"
"gopkg.in/yaml.v2"
)
// Colors generates a map in Go with language name -> color string.
// It is of generator.File type.
func Colors(fileToParse, samplesDir, outPath, tmplPath, tmplName, commit string) error {
data, err := ioutil.ReadFile(fileToParse)
if err != nil {
return err
}
languages := make(map[string]*languageInfo)
if err := yaml.Unmarshal(data, &languages); err != nil {
return err
}
langColorMap := buildLanguageColorMap(languages)
buf := &bytes.Buffer{}
if err := executeColorTemplate(buf, langColorMap, tmplPath, tmplName, commit); err != nil {
return err
}
return formatedWrite(outPath, buf.Bytes())
}
func buildLanguageColorMap(languages map[string]*languageInfo) map[string]string {
langColorMap := make(map[string]string)
for lang, info := range languages {
if len(info.Color) != 0 {
langColorMap[lang] = info.Color
}
}
return langColorMap
}
func executeColorTemplate(out io.Writer, langColorMap map[string]string, tmplPath, tmplName, commit string) error {
return executeTemplate(out, tmplName, tmplPath, commit, nil, langColorMap)
}

View File

@ -0,0 +1,30 @@
package generator
import (
"bytes"
"io/ioutil"
"gopkg.in/yaml.v2"
)
// Documentation generates regex matchers in Go for documentation files/dirs.
// It is of generator.File type.
func Documentation(fileToParse, _, outFile, tmplPath, tmplName, commit string) error {
data, err := ioutil.ReadFile(fileToParse)
if err != nil {
return err
}
var regexpList []string
if err := yaml.Unmarshal(data, &regexpList); err != nil {
return err
}
buf := &bytes.Buffer{}
err = executeTemplate(buf, tmplName, tmplPath, commit, nil, regexpList)
if err != nil {
return err
}
return formatedWrite(outFile, buf.Bytes())
}

View File

@ -0,0 +1,87 @@
package generator
import (
"bytes"
"io"
"io/ioutil"
"strings"
"text/template"
yaml "gopkg.in/yaml.v2"
)
type extensionsInfo struct {
LanguagesByExtension map[string][]string
ExtensionsByLanguage map[string][]string
}
// Extensions reads from fileToParse and builds source file from tmplPath. It complies with type File signature.
func Extensions(fileToParse, samplesDir, outPath, tmplPath, tmplName, commit string) error {
data, err := ioutil.ReadFile(fileToParse)
if err != nil {
return err
}
languages := make(map[string]*languageInfo)
if err := yaml.Unmarshal(data, &languages); err != nil {
return err
}
extensionsToLower(languages)
extInfo := &extensionsInfo{}
orderedKeyList := getAlphabeticalOrderedKeys(languages)
extInfo.LanguagesByExtension = buildExtensionLanguageMap(languages, orderedKeyList)
extInfo.ExtensionsByLanguage = buildLanguageExtensionsMap(languages)
buf := &bytes.Buffer{}
if err := executeExtensionsTemplate(buf, extInfo, tmplPath, tmplName, commit); err != nil {
return err
}
return formatedWrite(outPath, buf.Bytes())
}
func extensionsToLower(languages map[string]*languageInfo) {
for _, info := range languages {
info.Extensions = stringSliceToLower(info.Extensions)
}
}
func stringSliceToLower(slice []string) []string {
toLower := make([]string, 0, len(slice))
for _, s := range slice {
toLower = append(toLower, strings.ToLower(s))
}
return toLower
}
func buildExtensionLanguageMap(languages map[string]*languageInfo, orderedKeyList []string) map[string][]string {
extensionLangsMap := make(map[string][]string)
for _, lang := range orderedKeyList {
langInfo := languages[lang]
for _, extension := range langInfo.Extensions {
extensionLangsMap[extension] = append(extensionLangsMap[extension], lang)
}
}
return extensionLangsMap
}
func buildLanguageExtensionsMap(languages map[string]*languageInfo) map[string][]string {
langExtensionMap := make(map[string][]string, len(languages))
for lang, info := range languages {
if len(info.Extensions) > 0 {
langExtensionMap[lang] = info.Extensions
}
}
return langExtensionMap
}
func executeExtensionsTemplate(out io.Writer, extInfo *extensionsInfo, tmplPath, tmplName, commit string) error {
fmap := template.FuncMap{
"formatStringSlice": func(slice []string) string { return `"` + strings.Join(slice, `","`) + `"` },
}
return executeTemplate(out, tmplName, tmplPath, commit, fmap, extInfo)
}

View File

@ -0,0 +1,101 @@
package generator
import (
"bytes"
"io"
"io/ioutil"
"os"
"path/filepath"
"sort"
"strings"
"text/template"
yaml "gopkg.in/yaml.v2"
)
// Filenames reads from fileToParse and builds source file from tmplPath. It complies with type File signature.
func Filenames(fileToParse, samplesDir, outPath, tmplPath, tmplName, commit string) error {
data, err := ioutil.ReadFile(fileToParse)
if err != nil {
return err
}
languages := make(map[string]*languageInfo)
if err := yaml.Unmarshal(data, &languages); err != nil {
return err
}
if err := walkSamplesFilenames(samplesDir, languages); err != nil {
return err
}
languagesByFilename := buildFilenameLanguageMap(languages)
buf := &bytes.Buffer{}
if err := executeFilenamesTemplate(buf, languagesByFilename, tmplPath, tmplName, commit); err != nil {
return err
}
return formatedWrite(outPath, buf.Bytes())
}
func walkSamplesFilenames(samplesDir string, languages map[string]*languageInfo) error {
const filenamesDir = "filenames"
var language string
err := filepath.Walk(samplesDir, func(path string, f os.FileInfo, err error) error {
if err != nil {
return err
}
if f.IsDir() {
if f.Name() != filenamesDir {
language = f.Name()
}
return nil
}
parentDir := filepath.Base(filepath.Dir(path))
if parentDir != filenamesDir {
return nil
}
info, ok := languages[language]
if !ok {
info = &languageInfo{Filenames: []string{}}
}
for _, filename := range info.Filenames {
if filename == f.Name() {
return nil
}
}
info.Filenames = append(info.Filenames, f.Name())
return nil
})
return err
}
func buildFilenameLanguageMap(languages map[string]*languageInfo) map[string][]string {
filenameLangMap := make(map[string][]string)
for lang, langInfo := range languages {
for _, filename := range langInfo.Filenames {
filenameLangMap[filename] = append(filenameLangMap[filename], lang)
}
}
return filenameLangMap
}
func executeFilenamesTemplate(out io.Writer, languagesByFilename map[string][]string, tmplPath, tmplName, commit string) error {
fmap := template.FuncMap{
"formatStringSlice": func(slice []string) string {
sort.Strings(slice)
return `"` + strings.Join(slice, `","`) + `"`
},
}
return executeTemplate(out, tmplName, tmplPath, commit, fmap, languagesByFilename)
}

View File

@ -0,0 +1,202 @@
package generator
import (
"bytes"
"fmt"
"io"
"io/ioutil"
"log"
"math"
"os"
"path/filepath"
"sort"
"strconv"
"strings"
"text/template"
"github.com/go-enry/go-enry/v2/internal/tokenizer"
)
type samplesFrequencies struct {
LanguageTotal int `json:"language_total,omitempty"`
Languages map[string]int `json:"languages,omitempty"`
TokensTotal int `json:"tokens_total,omitempty"`
Tokens map[string]map[string]int `json:"tokens,omitempty"`
LanguageTokens map[string]int `json:"language_tokens,omitempty"`
}
// Frequencies reads directories in samplesDir, retrieves information about frequencies of languages and tokens, and write
// the file outPath using tmplName as a template. It complies with type File signature.
func Frequencies(fileToParse, samplesDir, outPath, tmplPath, tmplName, commit string) error {
freqs, err := getFrequencies(samplesDir)
if err != nil {
return err
}
if _, ok := os.LookupEnv("ENRY_DEBUG"); ok {
log.Printf("Total samples: %d\n", freqs.LanguageTotal)
log.Printf("Total tokens: %d\n", freqs.TokensTotal)
keys := make([]string, 0, len(freqs.Languages))
for k := range freqs.Languages {
keys = append(keys, k)
}
sort.Strings(keys)
for _, k := range keys {
fmt.Printf(" %s: %d\n", k, freqs.Languages[k])
}
}
buf := &bytes.Buffer{}
if err := executeFrequenciesTemplate(buf, freqs, tmplPath, tmplName, commit); err != nil {
return err
}
return formatedWrite(outPath, buf.Bytes())
}
func getFrequencies(samplesDir string) (*samplesFrequencies, error) {
langDirs, err := ioutil.ReadDir(samplesDir)
if err != nil {
return nil, err
}
var languageTotal int
var languages = make(map[string]int)
var tokensTotal int
var tokens = make(map[string]map[string]int)
var languageTokens = make(map[string]int)
for _, langDir := range langDirs {
if !langDir.IsDir() {
continue
}
lang := langDir.Name()
samples, err := readSamples(filepath.Join(samplesDir, lang))
if err != nil {
log.Println(err)
}
if len(samples) == 0 {
continue
}
samplesTokens, err := getTokens(samples)
if err != nil {
log.Println(err)
continue
}
languageTotal += len(samples)
languages[lang] = len(samples)
tokensTotal += len(samplesTokens)
languageTokens[lang] = len(samplesTokens)
tokens[lang] = make(map[string]int)
for _, token := range samplesTokens {
tokens[lang][token]++
}
}
return &samplesFrequencies{
TokensTotal: tokensTotal,
LanguageTotal: languageTotal,
Tokens: tokens,
LanguageTokens: languageTokens,
Languages: languages,
}, nil
}
// readSamples collects ./samples/ filenames from the Linguist codebase, skipping symlinks.
func readSamples(samplesLangDir string) ([]string, error) {
const specialSubDir = "filenames"
var samples []string
err := filepath.Walk(samplesLangDir, func(path string, info os.FileInfo, err error) error {
if err != nil {
fmt.Printf("failure accessing a path %q: %v\n", path, err)
return err
}
if info.IsDir() {
switch info.Name() {
case filepath.Base(samplesLangDir):
return nil
case specialSubDir:
return nil
default:
return filepath.SkipDir
}
}
// skip git file symlinks on win and *nix
if isKnownSymlinkInLinguist(path) || !info.Mode().IsRegular() {
return nil
}
samples = append(samples, path)
return nil
})
return samples, err
}
// isKnownSymlinkInLinguist checks if the file name is on the list of known symlinks.
// On Windows, there is no symlink support in Git [1] and those become regular text files,
// so we have to skip these files manually, maintaining a list here :/
// 1. https://github.com/git-for-windows/git/wiki/Symbolic-Links
//
// $ find -L .linguist/samples -xtype l
func isKnownSymlinkInLinguist(path string) bool {
return strings.HasSuffix(path, filepath.Join("Ant Build System", "filenames", "build.xml")) ||
strings.HasSuffix(path, filepath.Join("Markdown", "symlink.md"))
}
func getTokens(samples []string) ([]string, error) {
tokens := make([]string, 0, 20)
var anyError error
for _, sample := range samples {
content, err := ioutil.ReadFile(sample)
if err != nil {
anyError = err
continue
}
t := tokenizer.Tokenize(content)
tokens = append(tokens, t...)
}
return tokens, anyError
}
func executeFrequenciesTemplate(out io.Writer, freqs *samplesFrequencies, tmplPath, tmplName, commit string) error {
fmap := template.FuncMap{
"toFloat64": func(num int) string { return fmt.Sprintf("%f", float64(num)) },
"orderKeys": func(m map[string]int) []string {
keys := make([]string, 0, len(m))
for key := range m {
keys = append(keys, key)
}
sort.Strings(keys)
return keys
},
"languageLogProbability": func(language string) string {
num := math.Log(float64(freqs.Languages[language]) / float64(freqs.LanguageTotal))
return fmt.Sprintf("%f", num)
},
"orderMapMapKeys": func(mm map[string]map[string]int) []string {
keys := make([]string, 0, len(mm))
for key := range mm {
keys = append(keys, key)
}
sort.Strings(keys)
return keys
},
"tokenLogProbability": func(language, token string) string {
num := math.Log(float64(freqs.Tokens[language][token]) / float64(freqs.LanguageTokens[language]))
return fmt.Sprintf("%f", num)
},
"quote": strconv.Quote,
}
return executeTemplate(out, tmplName, tmplPath, commit, fmap, freqs)
}

View File

@ -0,0 +1,62 @@
// Package generator provides facilities to generate Go code for the
// package data in enry from YAML files describing supported languages in Linguist.
package generator
import (
"fmt"
"go/format"
"io"
"io/ioutil"
"path/filepath"
"strings"
"text/template"
)
// File is a common type for all generator functions.
// It generates Go source code file based on template in tmplPath,
// by parsing the data in fileToParse and linguist's samplesDir
// saving results to an outFile.
type File func(fileToParse, samplesDir, outPath, tmplPath, tmplName, commit string) error
func formatedWrite(outPath string, source []byte) error {
formatedSource, err := format.Source(source)
if err != nil {
err = fmt.Errorf("'go fmt' fails on %v", err)
// write un-formatter source to simplify debugging
formatedSource = source
}
if err := ioutil.WriteFile(outPath, formatedSource, 0666); err != nil {
return err
}
return err
}
func executeTemplate(w io.Writer, name, path, commit string, fmap template.FuncMap, data interface{}) error {
getCommit := func() string {
return commit
}
// stringVal returns escaped string that can be directly placed into go code.
// for value test`s it would return `test`+"`"+`s`
stringVal := func(val string) string {
val = strings.ReplaceAll(val, "`", "`+\"`\"+`")
return fmt.Sprintf("`%s`", val)
}
if fmap == nil {
fmap = make(template.FuncMap)
}
fmap["getCommit"] = getCommit
fmap["stringVal"] = stringVal
fmap["isRE2"] = isRE2
const headerTmpl = "header.go.tmpl"
headerPath := filepath.Join(filepath.Dir(path), headerTmpl)
h := template.Must(template.New(headerTmpl).Funcs(fmap).ParseFiles(headerPath))
if err := h.Execute(w, data); err != nil {
return err
}
t := template.Must(template.New(name).Funcs(fmap).ParseFiles(path))
return t.Execute(w, data)
}

View File

@ -0,0 +1,331 @@
package generator
import (
"flag"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"strings"
"testing"
"github.com/go-enry/go-enry/v2/data"
"github.com/go-enry/go-enry/v2/internal/tests"
"github.com/go-enry/go-enry/v2/internal/tokenizer"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/stretchr/testify/suite"
)
var (
linguistClonedEnvVar = "ENRY_TEST_REPO"
linguistURL = "https://github.com/github/linguist.git"
commit = "42fd3c2514375fc6ad281552368edd3fb9f6ee7f"
samplesDir = "samples"
languagesFile = filepath.Join("lib", "linguist", "languages.yml")
testDir = "test_files"
assetsDir = filepath.Join("..", "assets")
// Extensions test
extensionGold = filepath.Join(testDir, "extension.gold")
extensionTestTmplPath = filepath.Join(assetsDir, "extension.go.tmpl")
extensionTestTmplName = "extension.go.tmpl"
// Heuristics test
heuristicsTestFile = filepath.Join("lib", "linguist", "heuristics.yml")
contentGold = filepath.Join(testDir, "content.gold")
contentTestTmplPath = filepath.Join(assetsDir, "content.go.tmpl")
contentTestTmplName = "content.go.tmpl"
// Vendor test
vendorTestFile = filepath.Join("lib", "linguist", "vendor.yml")
vendorGold = filepath.Join(testDir, "vendor.gold")
vendorTestTmplPath = filepath.Join(assetsDir, "vendor.go.tmpl")
vendorTestTmplName = "vendor.go.tmpl"
// Documentation test
documentationTestFile = filepath.Join("lib", "linguist", "documentation.yml")
documentationGold = filepath.Join(testDir, "documentation.gold")
documentationTestTmplPath = filepath.Join(assetsDir, "documentation.go.tmpl")
documentationTestTmplName = "documentation.go.tmpl"
// Types test
typeGold = filepath.Join(testDir, "type.gold")
typeTestTmplPath = filepath.Join(assetsDir, "type.go.tmpl")
typeTestTmplName = "type.go.tmpl"
// Interpreters test
interpreterGold = filepath.Join(testDir, "interpreter.gold")
interpreterTestTmplPath = filepath.Join(assetsDir, "interpreter.go.tmpl")
interpreterTestTmplName = "interpreter.go.tmpl"
// Filenames test
filenameGold = filepath.Join(testDir, "filename.gold")
filenameTestTmplPath = filepath.Join(assetsDir, "filename.go.tmpl")
filenameTestTmplName = "filename.go.tmpl"
// Aliases test
aliasGold = filepath.Join(testDir, "alias.gold")
aliasTestTmplPath = filepath.Join(assetsDir, "alias.go.tmpl")
aliasTestTmplName = "alias.go.tmpl"
// Frequencies test
frequenciesGold = filepath.Join(testDir, "frequencies.gold")
frequenciesTestTmplPath = filepath.Join(assetsDir, "frequencies.go.tmpl")
frequenciesTestTmplName = "frequencies.go.tmpl"
// commit test
commitGold = filepath.Join(testDir, "commit.gold")
commitTestTmplPath = filepath.Join(assetsDir, "commit.go.tmpl")
commitTestTmplName = "commit.go.tmpl"
// mime test
mimeTypeGold = filepath.Join(testDir, "mimeType.gold")
mimeTypeTestTmplPath = filepath.Join(assetsDir, "mimeType.go.tmpl")
mimeTypeTestTmplName = "mimeType.go.tmpl"
// colors test
colorsGold = filepath.Join(testDir, "colors.gold")
colorsTestTmplPath = filepath.Join(assetsDir, "colors.go.tmpl")
colorsTestTmplName = "colors.go.tmpl"
// colors test
groupsGold = filepath.Join(testDir, "groups.gold")
groupsTestTmplPath = filepath.Join(assetsDir, "groups.go.tmpl")
groupsTestTmplName = "groups.go.tmpl"
)
type GeneratorTestSuite struct {
suite.Suite
tmpLinguistDir string
isCleanupNeeded bool
testCases []testCase
}
type testCase struct {
name string
fileToParse string
samplesDir string
tmplPath string
tmplName string
commit string
generate File
wantOut string
}
var updateGold = flag.Bool("update_gold", false, "Update golden test files")
func Test_GeneratorTestSuite(t *testing.T) {
suite.Run(t, new(GeneratorTestSuite))
}
func (s *GeneratorTestSuite) SetupSuite() {
var err error
s.tmpLinguistDir, s.isCleanupNeeded, err = tests.MaybeCloneLinguist(linguistClonedEnvVar, linguistURL, data.LinguistCommit)
require.NoError(s.T(), err)
s.testCases = []testCase{
{
name: "Extensions()",
fileToParse: filepath.Join(s.tmpLinguistDir, languagesFile),
samplesDir: "",
tmplPath: extensionTestTmplPath,
tmplName: extensionTestTmplName,
commit: commit,
generate: Extensions,
wantOut: extensionGold,
},
{
name: "Heuristics()",
fileToParse: filepath.Join(s.tmpLinguistDir, heuristicsTestFile),
samplesDir: "",
tmplPath: contentTestTmplPath,
tmplName: contentTestTmplName,
commit: commit,
generate: GenHeuristics,
wantOut: contentGold,
},
{
name: "Vendor()",
fileToParse: filepath.Join(s.tmpLinguistDir, vendorTestFile),
samplesDir: "",
tmplPath: vendorTestTmplPath,
tmplName: vendorTestTmplName,
commit: commit,
generate: Vendor,
wantOut: vendorGold,
},
{
name: "Documentation()",
fileToParse: filepath.Join(s.tmpLinguistDir, documentationTestFile),
samplesDir: "",
tmplPath: documentationTestTmplPath,
tmplName: documentationTestTmplName,
commit: commit,
generate: Documentation,
wantOut: documentationGold,
},
{
name: "Types()",
fileToParse: filepath.Join(s.tmpLinguistDir, languagesFile),
samplesDir: "",
tmplPath: typeTestTmplPath,
tmplName: typeTestTmplName,
commit: commit,
generate: Types,
wantOut: typeGold,
},
{
name: "Interpreters()",
fileToParse: filepath.Join(s.tmpLinguistDir, languagesFile),
samplesDir: "",
tmplPath: interpreterTestTmplPath,
tmplName: interpreterTestTmplName,
commit: commit,
generate: Interpreters,
wantOut: interpreterGold,
},
{
name: "Filenames()",
fileToParse: filepath.Join(s.tmpLinguistDir, languagesFile),
samplesDir: filepath.Join(s.tmpLinguistDir, samplesDir),
tmplPath: filenameTestTmplPath,
tmplName: filenameTestTmplName,
commit: commit,
generate: Filenames,
wantOut: filenameGold,
},
{
name: "Aliases()",
fileToParse: filepath.Join(s.tmpLinguistDir, languagesFile),
samplesDir: "",
tmplPath: aliasTestTmplPath,
tmplName: aliasTestTmplName,
commit: commit,
generate: Aliases,
wantOut: aliasGold,
},
{
name: "Frequencies()",
samplesDir: filepath.Join(s.tmpLinguistDir, samplesDir),
tmplPath: frequenciesTestTmplPath,
tmplName: frequenciesTestTmplName,
commit: commit,
generate: Frequencies,
wantOut: frequenciesGold,
},
{
name: "Commit()",
samplesDir: "",
tmplPath: commitTestTmplPath,
tmplName: commitTestTmplName,
commit: commit,
generate: Commit,
wantOut: commitGold,
},
{
name: "MimeType()",
fileToParse: filepath.Join(s.tmpLinguistDir, languagesFile),
samplesDir: "",
tmplPath: mimeTypeTestTmplPath,
tmplName: mimeTypeTestTmplName,
commit: commit,
generate: MimeType,
wantOut: mimeTypeGold,
},
{
name: "Colors()",
fileToParse: filepath.Join(s.tmpLinguistDir, languagesFile),
samplesDir: "",
tmplPath: colorsTestTmplPath,
tmplName: colorsTestTmplName,
commit: commit,
generate: Colors,
wantOut: colorsGold,
},
{
name: "Groups()",
fileToParse: filepath.Join(s.tmpLinguistDir, languagesFile),
samplesDir: "",
tmplPath: groupsTestTmplPath,
tmplName: groupsTestTmplName,
commit: commit,
generate: Groups,
wantOut: groupsGold,
},
}
}
func (s *GeneratorTestSuite) TearDownSuite() {
if s.isCleanupNeeded {
err := os.RemoveAll(s.tmpLinguistDir)
assert.NoError(s.T(), err)
}
}
// TestUpdateGeneratorTestSuiteGold is a Gold results generation automation.
// It should only be enabled&run manually on every new Linguist version
// to update *.gold files.
func (s *GeneratorTestSuite) TestUpdateGeneratorTestSuiteGold() {
if !*updateGold {
s.T().Skip()
}
s.T().Logf("Generating new *.gold test files")
for _, test := range s.testCases {
dst := test.wantOut
s.T().Logf("Generating %s from %s\n", dst, test.fileToParse)
err := test.generate(test.fileToParse, test.samplesDir, dst, test.tmplPath, test.tmplName, test.commit)
assert.NoError(s.T(), err)
}
}
func (s *GeneratorTestSuite) TestGenerationFiles() {
for _, test := range s.testCases {
gold, err := ioutil.ReadFile(test.wantOut)
assert.NoError(s.T(), err)
outPath, err := ioutil.TempFile("", "generator-test-")
assert.NoError(s.T(), err)
defer os.Remove(outPath.Name())
err = test.generate(test.fileToParse, test.samplesDir, outPath.Name(), test.tmplPath, test.tmplName, test.commit)
assert.NoError(s.T(), err)
out, err := ioutil.ReadFile(outPath.Name())
assert.NoError(s.T(), err)
expected := normalizeSpaces(string(gold))
actual := normalizeSpaces(string(out))
// this produces large unreadable output, so we do it 'manually' instead
// assert.Equal(s.T(), expected, actual, "Test %s", test.name)
if expected != actual {
assert.Fail(s.T(), fmt.Sprintf("%s output is different from %q", test.name, test.wantOut))
diff, err := text_diff(gold, out)
if err != nil {
s.T().Logf("Failed produce a diff between expected and actual: %s", err.Error())
s.T().Logf("Expected %q", expected[:400])
s.T().Logf("Actual %q", actual[:400])
}
s.T().Logf("\n%s", diff)
}
}
}
func (s *GeneratorTestSuite) TestTokenizerOnATS() {
const suspiciousSample = "samples/ATS/csv_parse.hats"
sFile := filepath.Join(s.tmpLinguistDir, suspiciousSample)
content, err := ioutil.ReadFile(sFile)
require.NoError(s.T(), err)
tokens := tokenizer.Tokenize(content)
assert.Equal(s.T(), 381, len(tokens), "Number of tokens using LF as line endings")
}
// normalizeSpaces returns a copy of str with whitespaces normalized.
// We use this to compare generated source as gofmt format may change.
// E.g for changes between Go 1.10 and 1.11 see
// https://go-review.googlesource.com/c/go/+/122295/
func normalizeSpaces(str string) string {
return strings.Join(strings.Fields(str), " ")
}

View File

@ -0,0 +1,66 @@
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// https://cs.opensource.google/go/x/perf/+/e8d778a6:LICENSE
package generator
import (
"bytes"
"fmt"
"os"
"os/exec"
)
// text_diff returns a human-readable description of the differences between s1 and s2.
// It essentially is https://cs.opensource.google/go/x/perf/+/e8d778a6:internal/diff/diff.go
// Used only in code generator tests, as a debugging aid.
// It is not part of any release artifact and is not distibuted with enry.
func text_diff(b1, b2 []byte) (string, error) {
if bytes.Equal(b1, b2) {
return "", nil
}
cmd := "diff"
if _, err := exec.LookPath(cmd); err != nil {
return "", fmt.Errorf("diff command unavailable\nold: %q\nnew: %q", b1, b2)
}
f1, err := writeTempFile("", "gen_test", b1)
if err != nil {
return "", err
}
defer os.Remove(f1)
f2, err := writeTempFile("", "gen_test", b2)
if err != nil {
return "", err
}
defer os.Remove(f2)
data, err := exec.Command(cmd, "-u", f1, f2).CombinedOutput()
if len(data) > 0 { // diff exits with a non-zero status when the files don't match
err = nil
}
if err != nil {
return "", err
}
return string(data), nil
}
func writeTempFile(dir, prefix string, data []byte) (string, error) {
file, err := os.CreateTemp(dir, prefix)
if err != nil {
return "", err
}
_, err = file.Write(data)
if err1 := file.Close(); err == nil {
err = err1
}
if err != nil {
os.Remove(file.Name())
return "", err
}
return file.Name(), nil
}

View File

@ -0,0 +1,47 @@
package generator
import (
"bytes"
"io"
"io/ioutil"
"gopkg.in/yaml.v2"
)
// Groups generates a map in Go with language name -> group name.
// It is of generator.File type.
func Groups(fileToParse, samplesDir, outPath, tmplPath, tmplName, commit string) error {
data, err := ioutil.ReadFile(fileToParse)
if err != nil {
return err
}
languages := make(map[string]*languageInfo)
if err := yaml.Unmarshal(data, &languages); err != nil {
return err
}
langGroupMap := buildLanguageGroupMap(languages)
buf := &bytes.Buffer{}
if err := executeGroupTemplate(buf, langGroupMap, tmplPath, tmplName, commit); err != nil {
return err
}
return formatedWrite(outPath, buf.Bytes())
}
func buildLanguageGroupMap(languages map[string]*languageInfo) map[string]string {
langGroupMap := make(map[string]string)
for lang, info := range languages {
if len(info.Group) != 0 {
langGroupMap[lang] = info.Group
}
}
return langGroupMap
}
func executeGroupTemplate(out io.Writer, langColorMap map[string]string, tmplPath, tmplName, commit string) error {
return executeTemplate(out, tmplName, tmplPath, commit, nil, langColorMap)
}

View File

@ -0,0 +1,181 @@
package generator
import (
"bytes"
"fmt"
"io/ioutil"
"log"
"strings"
yaml "gopkg.in/yaml.v2"
)
const (
multilinePrefix = "(?m)"
orPipe = "|"
)
// GenHeuristics generates language identification heuristics in Go.
// It is of generator.File type.
func GenHeuristics(fileToParse, _, outPath, tmplPath, tmplName, commit string) error {
heuristicsYaml, err := parseYaml(fileToParse)
if err != nil {
return err
}
langPatterns, err := loadHeuristics(heuristicsYaml)
if err != nil {
return err
}
buf := &bytes.Buffer{}
err = executeTemplate(buf, tmplName, tmplPath, commit, nil, langPatterns)
if err != nil {
return err
}
return formatedWrite(outPath, buf.Bytes())
}
// loadHeuristics transforms parsed YAML to map[".ext"]->IR for code generation.
func loadHeuristics(yaml *Heuristics) (map[string][]*LanguagePattern, error) {
patterns := make(map[string][]*LanguagePattern)
for _, disambiguation := range yaml.Disambiguations {
var rules []*LanguagePattern
for _, rule := range disambiguation.Rules {
langPattern := loadRule(yaml.NamedPatterns, rule)
if langPattern != nil {
rules = append(rules, langPattern)
}
}
// unroll to a single map
for _, ext := range disambiguation.Extensions {
if _, ok := patterns[ext]; ok {
return nil, fmt.Errorf("cannot add extension '%s', it already exists for %+v", ext, patterns[ext])
}
patterns[ext] = rules
}
}
return patterns, nil
}
// loadRule transforms single rule from parsed YAML to IR for code generation.
// For OrPattern case, it always combines multiple patterns into a single one.
func loadRule(namedPatterns map[string]StringArray, rule *Rule) *LanguagePattern {
var result *LanguagePattern
if len(rule.And) != 0 { // AndPattern
var subPatterns []*LanguagePattern
for _, r := range rule.And {
subp := loadRule(namedPatterns, r)
subPatterns = append(subPatterns, subp)
}
result = &LanguagePattern{"And", rule.Languages, "", subPatterns, true}
} else if len(rule.Pattern) != 0 { // OrPattern
// FIXME(bzz): this optimization should only be applied if each pattern isRE2!
pattern := strings.Join(rule.Pattern, orPipe)
// TODO(bzz): handle the common case Or(len(Languages)==0) better
// e.g. by emiting `rule.Rule(...)` instead of
// an (ugly) `rule.Or( rule.MatchingLanguages(""), ... )`
result = &LanguagePattern{"Or", rule.Languages, pattern, nil, isRE2(pattern)}
} else if rule.NegativePattern != "" { // NotPattern
pattern := rule.NegativePattern
result = &LanguagePattern{"Not", rule.Languages, pattern, nil, isRE2(pattern)}
} else if rule.NamedPattern != "" { // Named OrPattern
pattern := strings.Join(namedPatterns[rule.NamedPattern], orPipe)
result = &LanguagePattern{"Or", rule.Languages, pattern, nil, isRE2(pattern)}
} else { // AlwaysPattern
result = &LanguagePattern{"Always", rule.Languages, "", nil, true}
}
if !isRE2(result.Pattern) {
log.Printf("RE2 incompatible syntax for heuristic language:'%s', rule:'%s'\n", rule.Languages, result.Pattern)
}
return result
}
// LanguagePattern is an IR of parsed Rule suitable for code generations.
// Strings are used as this is to be be consumed by text/template.
type LanguagePattern struct {
Op string
Langs []string
Pattern string
Rules []*LanguagePattern
IsRE2 bool
}
type Heuristics struct {
Disambiguations []*Disambiguation
NamedPatterns map[string]StringArray `yaml:"named_patterns"`
}
type Disambiguation struct {
Extensions []string `yaml:"extensions,flow"`
Rules []*Rule `yaml:"rules"`
}
type Rule struct {
Patterns `yaml:",inline"`
Languages StringArray `yaml:"language"`
And []*Rule
}
type Patterns struct {
Pattern StringArray `yaml:"pattern,omitempty"`
NamedPattern string `yaml:"named_pattern,omitempty"`
NegativePattern string `yaml:"negative_pattern,omitempty"`
}
// StringArray is workaround for parsing named_pattern,
// wich is sometimes an array and sometimes is not.
// See https://github.com/go-yaml/yaml/issues/100
type StringArray []string
// UnmarshalYAML allows to parse element always as a []string
func (sa *StringArray) UnmarshalYAML(unmarshal func(interface{}) error) error {
var multi []string
if err := unmarshal(&multi); err != nil {
var single string
if err := unmarshal(&single); err != nil {
return err
}
*sa = []string{single}
} else {
*sa = multi
}
return nil
}
func parseYaml(file string) (*Heuristics, error) {
data, err := ioutil.ReadFile(file)
if err != nil {
return nil, err
}
h := &Heuristics{}
if err := yaml.Unmarshal(data, &h); err != nil {
return nil, err
}
return h, nil
}
// isUnsupportedRegexpSyntax filters regexp syntax that is not supported by RE2.
// In particular, we stumbled up on usage of next cases:
// - lookbehind & lookahead
// - non-backtracking subexpressions
// - named & numbered capturing group/after text matching
// - backreference
// - possessive quantifier
// For reference on supported syntax see https://github.com/google/re2/wiki/Syntax
func isUnsupportedRegexpSyntax(reg string) bool {
return strings.Contains(reg, `(?<`) || strings.Contains(reg, `(?=`) || strings.Contains(reg, `(?!`) ||
strings.Contains(reg, `(?>`) || strings.Contains(reg, `\1`) || strings.Contains(reg, `*+`) ||
// See https://github.com/github/linguist/pull/4243#discussion_r246105067
(strings.HasPrefix(reg, multilinePrefix+`/`) && strings.HasSuffix(reg, `/`))
}
func isRE2(s string) bool {
return !isUnsupportedRegexpSyntax(s)
}

View File

@ -0,0 +1,127 @@
package generator
import (
"bytes"
"fmt"
"go/format"
"testing"
"text/template"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestYAMLParsing(t *testing.T) {
heuristics, err := parseYaml("test_files/heuristics.yml")
require.NoError(t, err)
assert.NotNil(t, heuristics)
// extensions
require.NotNil(t, heuristics.Disambiguations)
assert.Equal(t, 4, len(heuristics.Disambiguations))
assert.Equal(t, 2, len(heuristics.Disambiguations[0].Extensions))
rules := heuristics.Disambiguations[0].Rules
assert.Equal(t, 2, len(rules))
require.Equal(t, "Objective-C", rules[0].Languages[0])
assert.Equal(t, 1, len(rules[0].Pattern))
rules = heuristics.Disambiguations[1].Rules
assert.Equal(t, 3, len(rules))
require.Equal(t, "Forth", rules[0].Languages[0])
require.Equal(t, 2, len(rules[0].Pattern))
rules = heuristics.Disambiguations[2].Rules
assert.Equal(t, 3, len(rules))
require.Equal(t, "Unix Assembly", rules[1].Languages[0])
require.NotNil(t, rules[1].And)
assert.Equal(t, 2, len(rules[1].And))
require.NotNil(t, rules[1].And[0].NegativePattern)
assert.Equal(t, "np", rules[1].And[0].NegativePattern)
rules = heuristics.Disambiguations[3].Rules
assert.Equal(t, 1, len(rules))
assert.Equal(t, "Linux Kernel Module", rules[0].Languages[0])
assert.Equal(t, "AMPL", rules[0].Languages[1])
// named_patterns
require.NotNil(t, heuristics.NamedPatterns)
assert.Equal(t, 2, len(heuristics.NamedPatterns))
assert.Equal(t, 1, len(heuristics.NamedPatterns["fortran"]))
assert.Equal(t, 2, len(heuristics.NamedPatterns["cpp"]))
}
func TestSingleRuleLoading(t *testing.T) {
namedPatterns := map[string]StringArray{"cpp": []string{"cpp_ptrn1", "cpp_ptrn2"}}
rules := []*Rule{
&Rule{Languages: []string{"a"}, Patterns: Patterns{NamedPattern: "cpp"}},
&Rule{Languages: []string{"b"}, And: []*Rule{}},
}
// named_pattern case
langPattern := loadRule(namedPatterns, rules[0])
require.Equal(t, "a", langPattern.Langs[0])
assert.NotEmpty(t, langPattern.Pattern)
// and case
langPattern = loadRule(namedPatterns, rules[1])
require.Equal(t, "b", langPattern.Langs[0])
}
func TestLoadingAllHeuristics(t *testing.T) {
parsedYaml, err := parseYaml("test_files/heuristics.yml")
require.NoError(t, err)
hs, err := loadHeuristics(parsedYaml)
// grep -Eo "extensions:\ (.*)" internal/code-generator/generator/test_files/heuristics.yml
assert.Equal(t, 5, len(hs))
}
func TestLoadingHeuristicsForSameExt(t *testing.T) {
parsedYaml := &Heuristics{
Disambiguations: []*Disambiguation{
&Disambiguation{
Extensions: []string{".a", ".b"},
Rules: []*Rule{&Rule{Languages: []string{"A"}}},
},
&Disambiguation{
Extensions: []string{".b"},
Rules: []*Rule{&Rule{Languages: []string{"B"}}},
},
},
}
_, err := loadHeuristics(parsedYaml)
require.Error(t, err)
}
func TestTemplateMatcherVars(t *testing.T) {
parsed, err := parseYaml("test_files/heuristics.yml")
require.NoError(t, err)
heuristics, err := loadHeuristics(parsed)
require.NoError(t, err)
// render a tmpl
const contentTmpl = "../assets/content.go.tmpl"
tmpl, err := template.New("content.go.tmpl").Funcs(template.FuncMap{
"stringVal": func(val string) string {
return fmt.Sprintf("`%s`", val)
},
}).ParseFiles(contentTmpl)
require.NoError(t, err)
buf := bytes.NewBuffer(nil)
err = tmpl.Execute(buf, heuristics)
require.NoError(t, err, fmt.Sprintf("%+v", tmpl))
require.NotEmpty(t, buf)
// TODO(bzz) add more advanced test using go/ast package, to verify the
// structure of generated code:
// - check key literal exists in map for each extension:
src, err := format.Source(buf.Bytes())
require.NoError(t, err, "\n%s\n", string(src))
}

View File

@ -0,0 +1,48 @@
package generator
import (
"bytes"
"io"
"io/ioutil"
yaml "gopkg.in/yaml.v2"
)
// ID generates a map in Go with language name -> language ID.
// It is of generator.File type.
func ID(fileToParse, samplesDir, outPath, tmplPath, tmplName, commit string) error {
data, err := ioutil.ReadFile(fileToParse)
if err != nil {
return err
}
languages := make(map[string]*languageInfo)
if err := yaml.Unmarshal(data, &languages); err != nil {
return err
}
langMimeMap := buildLanguageIDMap(languages)
buf := &bytes.Buffer{}
if err := executeIDTemplate(buf, langMimeMap, tmplPath, tmplName, commit); err != nil {
return err
}
return formatedWrite(outPath, buf.Bytes())
}
func buildLanguageIDMap(languages map[string]*languageInfo) map[string]int {
langIDMap := make(map[string]int)
for lang, info := range languages {
// NOTE: 0 is a valid language ID so checking the zero value would skip one language
if info.LanguageID != nil {
langIDMap[lang] = *info.LanguageID
}
}
return langIDMap
}
func executeIDTemplate(out io.Writer, langIDMap map[string]int, tmplPath, tmplName, commit string) error {
return executeTemplate(out, tmplName, tmplPath, commit, nil, langIDMap)
}

View File

@ -0,0 +1,53 @@
package generator
import (
"bytes"
"io"
"io/ioutil"
"strings"
"text/template"
"gopkg.in/yaml.v2"
)
// Interpreters reads from fileToParse and builds source file from tmplPath. It complies with type File signature.
func Interpreters(fileToParse, samplesDir, outPath, tmplPath, tmplName, commit string) error {
data, err := ioutil.ReadFile(fileToParse)
if err != nil {
return err
}
languages := make(map[string]*languageInfo)
if err := yaml.Unmarshal(data, &languages); err != nil {
return err
}
orderedKeys := getAlphabeticalOrderedKeys(languages)
languagesByInterpreter := buildInterpreterLanguagesMap(languages, orderedKeys)
buf := &bytes.Buffer{}
if err := executeInterpretersTemplate(buf, languagesByInterpreter, tmplPath, tmplName, commit); err != nil {
return err
}
return formatedWrite(outPath, buf.Bytes())
}
func buildInterpreterLanguagesMap(languages map[string]*languageInfo, orderedKeys []string) map[string][]string {
interpreterLangsMap := make(map[string][]string)
for _, lang := range orderedKeys {
langInfo := languages[lang]
for _, interpreter := range langInfo.Interpreters {
interpreterLangsMap[interpreter] = append(interpreterLangsMap[interpreter], lang)
}
}
return interpreterLangsMap
}
func executeInterpretersTemplate(out io.Writer, languagesByInterpreter map[string][]string, tmplPath, tmplName, commit string) error {
fmap := template.FuncMap{
"formatStringSlice": func(slice []string) string { return `"` + strings.Join(slice, `","`) + `"` },
}
return executeTemplate(out, tmplName, tmplPath, commit, fmap, languagesByInterpreter)
}

View File

@ -0,0 +1,62 @@
package generator
import (
"bytes"
"io"
"io/ioutil"
"sort"
"gopkg.in/yaml.v2"
)
type languageInfo struct {
FSName string `yaml:"fs_name"`
Type string `yaml:"type,omitempty"`
Color string `yaml:"color,omitempty"`
Group string `yaml:"group,omitempty"`
Aliases []string `yaml:"aliases,omitempty"`
Extensions []string `yaml:"extensions,omitempty,flow"`
Interpreters []string `yaml:"interpreters,omitempty,flow"`
Filenames []string `yaml:"filenames,omitempty,flow"`
MimeType string `yaml:"codemirror_mime_type,omitempty,flow"`
TMScope string `yaml:"tm_scope"`
AceMode string `yaml:"ace_mode"`
CodeMirrorMode string `yaml:"codemirror_mode"`
Wrap bool `yaml:"wrap"`
LanguageID *int `yaml:"language_id,omitempty"`
}
func getAlphabeticalOrderedKeys(languages map[string]*languageInfo) []string {
keyList := make([]string, 0)
for lang := range languages {
keyList = append(keyList, lang)
}
sort.Strings(keyList)
return keyList
}
// LanguageInfo generates maps in Go with language name -> LanguageInfo and language ID -> LanguageInfo.
// It is of generator.File type.
func LanguageInfo(fileToParse, samplesDir, outPath, tmplPath, tmplName, commit string) error {
data, err := ioutil.ReadFile(fileToParse)
if err != nil {
return err
}
languages := make(map[string]*languageInfo)
if err := yaml.Unmarshal(data, &languages); err != nil {
return err
}
buf := &bytes.Buffer{}
if err := executeLanguageInfoTemplate(buf, languages, tmplPath, tmplName, commit); err != nil {
return err
}
return formatedWrite(outPath, buf.Bytes())
}
func executeLanguageInfoTemplate(out io.Writer, languages map[string]*languageInfo, tmplPath, tmplName, commit string) error {
return executeTemplate(out, tmplName, tmplPath, commit, nil, languages)
}

View File

@ -0,0 +1,14 @@
package generator
import (
"bytes"
)
// Commit takes a commit and builds the source file from tmplPath. It complies with type File signature.
func Commit(fileToParse, samplesDir, outPath, tmplPath, tmplName, commit string) error {
buf := &bytes.Buffer{}
if err := executeTemplate(buf, tmplName, tmplPath, commit, nil, nil); err != nil {
return err
}
return formatedWrite(outPath, buf.Bytes())
}

View File

@ -0,0 +1,47 @@
package generator
import (
"bytes"
"io"
"io/ioutil"
"gopkg.in/yaml.v2"
)
// MimeType generates a map in Go with language name -> MIME string.
// It is of generator.File type.
func MimeType(fileToParse, samplesDir, outPath, tmplPath, tmplName, commit string) error {
data, err := ioutil.ReadFile(fileToParse)
if err != nil {
return err
}
languages := make(map[string]*languageInfo)
if err := yaml.Unmarshal(data, &languages); err != nil {
return err
}
langMimeMap := buildLanguageMimeMap(languages)
buf := &bytes.Buffer{}
if err := executeMimeTemplate(buf, langMimeMap, tmplPath, tmplName, commit); err != nil {
return err
}
return formatedWrite(outPath, buf.Bytes())
}
func buildLanguageMimeMap(languages map[string]*languageInfo) map[string]string {
langMimeMap := make(map[string]string)
for lang, info := range languages {
if len(info.MimeType) != 0 {
langMimeMap[lang] = info.MimeType
}
}
return langMimeMap
}
func executeMimeTemplate(out io.Writer, langMimeMap map[string]string, tmplPath, tmplName, commit string) error {
return executeTemplate(out, tmplName, tmplPath, commit, nil, langMimeMap)
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,601 @@
// Code generated by github.com/go-enry/go-enry/v2/internal/code-generator DO NOT EDIT.
// Extracted from github/linguist commit: 42fd3c2514375fc6ad281552368edd3fb9f6ee7f
package data
var LanguagesColor = map[string]string{
"1C Enterprise": "#814CCC",
"2-Dimensional Array": "#38761D",
"4D": "#004289",
"ABAP": "#E8274B",
"ABAP CDS": "#555e25",
"AGS Script": "#B9D9FF",
"AIDL": "#34EB6B",
"AL": "#3AA2B5",
"AMPL": "#E6EFBB",
"ANTLR": "#9DC3FF",
"API Blueprint": "#2ACCA8",
"APL": "#5A8164",
"ASP.NET": "#9400ff",
"ATS": "#1ac620",
"ActionScript": "#882B0F",
"Ada": "#02f88c",
"Adblock Filter List": "#800000",
"Adobe Font Metrics": "#fa0f00",
"Agda": "#315665",
"Alloy": "#64C800",
"Alpine Abuild": "#0D597F",
"Altium Designer": "#A89663",
"AngelScript": "#C7D7DC",
"Ant Build System": "#A9157E",
"Antlers": "#ff269e",
"ApacheConf": "#d12127",
"Apex": "#1797c0",
"Apollo Guidance Computer": "#0B3D91",
"AppleScript": "#101F1F",
"Arc": "#aa2afe",
"AsciiDoc": "#73a0c5",
"AspectJ": "#a957b0",
"Assembly": "#6E4C13",
"Astro": "#ff5a03",
"Asymptote": "#ff0000",
"Augeas": "#9CC134",
"AutoHotkey": "#6594b9",
"AutoIt": "#1C3552",
"Avro IDL": "#0040FF",
"Awk": "#c30e9b",
"BASIC": "#ff0000",
"BQN": "#2b7067",
"Ballerina": "#FF5000",
"Batchfile": "#C1F12E",
"Beef": "#a52f4e",
"Berry": "#15A13C",
"BibTeX": "#778899",
"Bicep": "#519aba",
"Bikeshed": "#5562ac",
"Bison": "#6A463F",
"BitBake": "#00bce4",
"Blade": "#f7523f",
"BlitzBasic": "#00FFAE",
"BlitzMax": "#cd6400",
"Bluespec": "#12223c",
"Bluespec BH": "#12223c",
"Boo": "#d4bec1",
"Boogie": "#c80fa0",
"Brainfuck": "#2F2530",
"BrighterScript": "#66AABB",
"Brightscript": "#662D91",
"Browserslist": "#ffd539",
"C": "#555555",
"C#": "#178600",
"C++": "#f34b7d",
"CAP CDS": "#0092d1",
"CLIPS": "#00A300",
"CMake": "#DA3434",
"COLLADA": "#F1A42B",
"CSON": "#244776",
"CSS": "#563d7c",
"CSV": "#237346",
"CUE": "#5886E1",
"CWeb": "#00007a",
"Cabal Config": "#483465",
"Caddyfile": "#22b638",
"Cadence": "#00ef8b",
"Cairo": "#ff4a48",
"CameLIGO": "#3be133",
"Cap'n Proto": "#c42727",
"Ceylon": "#dfa535",
"Chapel": "#8dc63f",
"ChucK": "#3f8000",
"Circom": "#707575",
"Cirru": "#ccccff",
"Clarion": "#db901e",
"Clarity": "#5546ff",
"Classic ASP": "#6a40fd",
"Clean": "#3F85AF",
"Click": "#E4E6F3",
"Clojure": "#db5855",
"Closure Templates": "#0d948f",
"Cloud Firestore Security Rules": "#FFA000",
"CodeQL": "#140f46",
"CoffeeScript": "#244776",
"ColdFusion": "#ed2cd6",
"ColdFusion CFC": "#ed2cd6",
"Common Lisp": "#3fb68b",
"Common Workflow Language": "#B5314C",
"Component Pascal": "#B0CE4E",
"Coq": "#d0b68c",
"Crystal": "#000100",
"Csound": "#1a1a1a",
"Csound Document": "#1a1a1a",
"Csound Score": "#1a1a1a",
"Cuda": "#3A4E3A",
"Curry": "#531242",
"Cypher": "#34c0eb",
"Cython": "#fedf5b",
"D": "#ba595e",
"D2": "#526ee8",
"DM": "#447265",
"Dafny": "#FFEC25",
"Darcs Patch": "#8eff23",
"Dart": "#00B4AB",
"DataWeave": "#003a52",
"Debian Package Control File": "#D70751",
"DenizenScript": "#FBEE96",
"Dhall": "#dfafff",
"DirectX 3D File": "#aace60",
"Dockerfile": "#384d54",
"Dogescript": "#cca760",
"Dotenv": "#e5d559",
"Dylan": "#6c616e",
"E": "#ccce35",
"ECL": "#8a1267",
"ECLiPSe": "#001d9d",
"EJS": "#a91e50",
"EQ": "#a78649",
"Earthly": "#2af0ff",
"Easybuild": "#069406",
"Ecere Projects": "#913960",
"Ecmarkup": "#eb8131",
"Edge": "#0dffe0",
"EdgeQL": "#31A7FF",
"EditorConfig": "#fff1f2",
"Eiffel": "#4d6977",
"Elixir": "#6e4a7e",
"Elm": "#60B5CC",
"Elvish": "#55BB55",
"Elvish Transcript": "#55BB55",
"Emacs Lisp": "#c065db",
"EmberScript": "#FFF4F3",
"Erlang": "#B83998",
"Euphoria": "#FF790B",
"F#": "#b845fc",
"F*": "#572e30",
"FIGlet Font": "#FFDDBB",
"FIRRTL": "#2f632f",
"FLUX": "#88ccff",
"Factor": "#636746",
"Fancy": "#7b9db4",
"Fantom": "#14253c",
"Faust": "#c37240",
"Fennel": "#fff3d7",
"Filebench WML": "#F6B900",
"Fluent": "#ffcc33",
"Forth": "#341708",
"Fortran": "#4d41b1",
"Fortran Free Form": "#4d41b1",
"FreeBasic": "#141AC9",
"FreeMarker": "#0050b2",
"Frege": "#00cafe",
"Futhark": "#5f021f",
"G-code": "#D08CF2",
"GAML": "#FFC766",
"GAMS": "#f49a22",
"GAP": "#0000cc",
"GCC Machine Description": "#FFCFAB",
"GDScript": "#355570",
"GEDCOM": "#003058",
"GLSL": "#5686a5",
"GSC": "#FF6800",
"Game Maker Language": "#71b417",
"Gemfile.lock": "#701516",
"Gemini": "#ff6900",
"Genero 4gl": "#63408e",
"Genero per": "#d8df39",
"Genie": "#fb855d",
"Genshi": "#951531",
"Gentoo Ebuild": "#9400ff",
"Gentoo Eclass": "#9400ff",
"Gerber Image": "#d20b00",
"Gherkin": "#5B2063",
"Git Attributes": "#F44D27",
"Git Config": "#F44D27",
"Git Revision List": "#F44D27",
"Gleam": "#ffaff3",
"Glimmer JS": "#F5835F",
"Glimmer TS": "#3178c6",
"Glyph": "#c1ac7f",
"Gnuplot": "#f0a9f0",
"Go": "#00ADD8",
"Go Checksums": "#00ADD8",
"Go Module": "#00ADD8",
"Go Workspace": "#00ADD8",
"Godot Resource": "#355570",
"Golo": "#88562A",
"Gosu": "#82937f",
"Grace": "#615f8b",
"Gradle": "#02303a",
"Gradle Kotlin DSL": "#02303a",
"Grammatical Framework": "#ff0000",
"GraphQL": "#e10098",
"Graphviz (DOT)": "#2596be",
"Groovy": "#4298b8",
"Groovy Server Pages": "#4298b8",
"HAProxy": "#106da9",
"HCL": "#844FBA",
"HLSL": "#aace60",
"HOCON": "#9ff8ee",
"HTML": "#e34c26",
"HTML+ECR": "#2e1052",
"HTML+EEX": "#6e4a7e",
"HTML+ERB": "#701516",
"HTML+PHP": "#4f5d95",
"HTML+Razor": "#512be4",
"HTTP": "#005C9C",
"HXML": "#f68712",
"Hack": "#878787",
"Haml": "#ece2a9",
"Handlebars": "#f7931e",
"Harbour": "#0e60e3",
"Haskell": "#5e5086",
"Haxe": "#df7900",
"HiveQL": "#dce200",
"HolyC": "#ffefaf",
"Hosts File": "#308888",
"Hy": "#7790B2",
"IDL": "#a3522f",
"IGOR Pro": "#0000cc",
"INI": "#d1dbe0",
"Idris": "#b30000",
"Ignore List": "#000000",
"ImageJ Macro": "#99AAFF",
"Imba": "#16cec6",
"Inno Setup": "#264b99",
"Io": "#a9188d",
"Ioke": "#078193",
"Isabelle": "#FEFE00",
"Isabelle ROOT": "#FEFE00",
"J": "#9EEDFF",
"JAR Manifest": "#b07219",
"JCL": "#d90e09",
"JFlex": "#DBCA00",
"JSON": "#292929",
"JSON with Comments": "#292929",
"JSON5": "#267CB9",
"JSONLD": "#0c479c",
"JSONiq": "#40d47e",
"Janet": "#0886a5",
"Jasmin": "#d03600",
"Java": "#b07219",
"Java Properties": "#2A6277",
"Java Server Pages": "#2A6277",
"JavaScript": "#f1e05a",
"JavaScript+ERB": "#f1e05a",
"Jest Snapshot": "#15c213",
"JetBrains MPS": "#21D789",
"Jinja": "#a52a22",
"Jison": "#56b3cb",
"Jison Lex": "#56b3cb",
"Jolie": "#843179",
"Jsonnet": "#0064bd",
"Julia": "#a270ba",
"Julia REPL": "#a270ba",
"Jupyter Notebook": "#DA5B0B",
"Just": "#384d54",
"KRL": "#28430A",
"Kaitai Struct": "#773b37",
"KakouneScript": "#6f8042",
"KerboScript": "#41adf0",
"KiCad Layout": "#2f4aab",
"KiCad Legacy Layout": "#2f4aab",
"KiCad Schematic": "#2f4aab",
"Kotlin": "#A97BFF",
"LFE": "#4C3023",
"LLVM": "#185619",
"LOLCODE": "#cc9900",
"LSL": "#3d9970",
"LabVIEW": "#fede06",
"Lark": "#2980B9",
"Lasso": "#999999",
"Latte": "#f2a542",
"Less": "#1d365d",
"Lex": "#DBCA00",
"LigoLANG": "#0e74ff",
"LilyPond": "#9ccc7c",
"Liquid": "#67b8de",
"Literate Agda": "#315665",
"Literate CoffeeScript": "#244776",
"Literate Haskell": "#5e5086",
"LiveScript": "#499886",
"Logtalk": "#295b9a",
"LookML": "#652B81",
"Lua": "#000080",
"Luau": "#00A2FF",
"MATLAB": "#e16737",
"MAXScript": "#00a6a6",
"MDX": "#fcb32c",
"MLIR": "#5EC8DB",
"MQL4": "#62A8D6",
"MQL5": "#4A76B8",
"MTML": "#b7e1f4",
"Macaulay2": "#d8ffff",
"Makefile": "#427819",
"Mako": "#7e858d",
"Markdown": "#083fa1",
"Marko": "#42bff2",
"Mask": "#f97732",
"Mathematica": "#dd1100",
"Max": "#c4a79c",
"Mercury": "#ff2b2b",
"Mermaid": "#ff3670",
"Meson": "#007800",
"Metal": "#8f14e9",
"MiniYAML": "#ff1111",
"Mint": "#02b046",
"Mirah": "#c7a938",
"Modelica": "#de1d31",
"Modula-2": "#10253f",
"Modula-3": "#223388",
"Mojo": "#ff4c1f",
"Monkey C": "#8D6747",
"MoonScript": "#ff4585",
"Motoko": "#fbb03b",
"Motorola 68K Assembly": "#005daa",
"Move": "#4a137a",
"Mustache": "#724b3b",
"NCL": "#28431f",
"NMODL": "#00356B",
"NPM Config": "#cb3837",
"NWScript": "#111522",
"Nasal": "#1d2c4e",
"Nearley": "#990000",
"Nemerle": "#3d3c6e",
"NetLinx": "#0aa0ff",
"NetLinx+ERB": "#747faa",
"NetLogo": "#ff6375",
"NewLisp": "#87AED7",
"Nextflow": "#3ac486",
"Nginx": "#009639",
"Nim": "#ffc200",
"Nit": "#009917",
"Nix": "#7e7eff",
"Nu": "#c9df40",
"NumPy": "#9C8AF9",
"Nunjucks": "#3d8137",
"Nushell": "#4E9906",
"OASv2-json": "#85ea2d",
"OASv2-yaml": "#85ea2d",
"OASv3-json": "#85ea2d",
"OASv3-yaml": "#85ea2d",
"OCaml": "#ef7a08",
"ObjectScript": "#424893",
"Objective-C": "#438eff",
"Objective-C++": "#6866fb",
"Objective-J": "#ff0c5a",
"Odin": "#60AFFE",
"Omgrofl": "#cabbff",
"Opal": "#f7ede0",
"Open Policy Agent": "#7d9199",
"OpenAPI Specification v2": "#85ea2d",
"OpenAPI Specification v3": "#85ea2d",
"OpenCL": "#ed2e2d",
"OpenEdge ABL": "#5ce600",
"OpenQASM": "#AA70FF",
"OpenSCAD": "#e5cd45",
"Option List": "#476732",
"Org": "#77aa99",
"Oxygene": "#cdd0e3",
"Oz": "#fab738",
"P4": "#7055b5",
"PDDL": "#0d00ff",
"PEG.js": "#234d6b",
"PHP": "#4F5D95",
"PLSQL": "#dad8d8",
"PLpgSQL": "#336790",
"POV-Ray SDL": "#6bac65",
"Pact": "#F7A8B8",
"Pan": "#cc0000",
"Papyrus": "#6600cc",
"Parrot": "#f3ca0a",
"Pascal": "#E3F171",
"Pawn": "#dbb284",
"Pep8": "#C76F5B",
"Perl": "#0298c3",
"PicoLisp": "#6067af",
"PigLatin": "#fcd7de",
"Pike": "#005390",
"Pip Requirements": "#FFD343",
"Pkl": "#6b9543",
"PlantUML": "#fbbd16",
"PogoScript": "#d80074",
"Polar": "#ae81ff",
"Portugol": "#f8bd00",
"PostCSS": "#dc3a0c",
"PostScript": "#da291c",
"PowerBuilder": "#8f0f8d",
"PowerShell": "#012456",
"Praat": "#c8506d",
"Prisma": "#0c344b",
"Processing": "#0096D8",
"Procfile": "#3B2F63",
"Prolog": "#74283c",
"Promela": "#de0000",
"Propeller Spin": "#7fa2a7",
"Pug": "#a86454",
"Puppet": "#302B6D",
"PureBasic": "#5a6986",
"PureScript": "#1D222D",
"Pyret": "#ee1e10",
"Python": "#3572A5",
"Python console": "#3572A5",
"Python traceback": "#3572A5",
"Q#": "#fed659",
"QML": "#44a51c",
"Qt Script": "#00b841",
"Quake": "#882233",
"R": "#198CE7",
"RAML": "#77d9fb",
"RBS": "#701516",
"RDoc": "#701516",
"REXX": "#d90e09",
"RMarkdown": "#198ce7",
"RON": "#a62c00",
"RPGLE": "#2BDE21",
"RUNOFF": "#665a4e",
"Racket": "#3c5caa",
"Ragel": "#9d5200",
"Raku": "#0000fb",
"Rascal": "#fffaa0",
"ReScript": "#ed5051",
"Reason": "#ff5847",
"ReasonLIGO": "#ff5847",
"Rebol": "#358a5b",
"Record Jar": "#0673ba",
"Red": "#f50000",
"Regular Expression": "#009a00",
"Ren'Py": "#ff7f7f",
"Rez": "#FFDAB3",
"Ring": "#2D54CB",
"Riot": "#A71E49",
"RobotFramework": "#00c0b5",
"Roc": "#7c38f5",
"Roff": "#ecdebe",
"Roff Manpage": "#ecdebe",
"Rouge": "#cc0088",
"RouterOS Script": "#DE3941",
"Ruby": "#701516",
"Rust": "#dea584",
"SAS": "#B34936",
"SCSS": "#c6538c",
"SPARQL": "#0C4597",
"SQF": "#3F3F3F",
"SQL": "#e38c00",
"SQLPL": "#e38c00",
"SRecode Template": "#348a34",
"STL": "#373b5e",
"SVG": "#ff9900",
"SaltStack": "#646464",
"Sass": "#a53b70",
"Scala": "#c22d40",
"Scaml": "#bd181a",
"Scenic": "#fdc700",
"Scheme": "#1e4aec",
"Scilab": "#ca0f21",
"Self": "#0579aa",
"ShaderLab": "#222c37",
"Shell": "#89e051",
"ShellCheck Config": "#cecfcb",
"Shen": "#120F14",
"Simple File Verification": "#C9BFED",
"Singularity": "#64E6AD",
"Slash": "#007eff",
"Slice": "#003fa2",
"Slim": "#2b2b2b",
"Slint": "#2379F4",
"SmPL": "#c94949",
"Smalltalk": "#596706",
"Smarty": "#f0c040",
"Smithy": "#c44536",
"Snakemake": "#419179",
"Solidity": "#AA6746",
"SourcePawn": "#f69e1d",
"Squirrel": "#800000",
"Stan": "#b2011d",
"Standard ML": "#dc566d",
"Starlark": "#76d275",
"Stata": "#1a5f91",
"StringTemplate": "#3fb34f",
"Stylus": "#ff6347",
"SubRip Text": "#9e0101",
"SugarSS": "#2fcc9f",
"SuperCollider": "#46390b",
"Svelte": "#ff3e00",
"Sway": "#00F58C",
"Sweave": "#198ce7",
"Swift": "#F05138",
"SystemVerilog": "#DAE1C2",
"TI Program": "#A0AA87",
"TL-Verilog": "#C40023",
"TLA": "#4b0079",
"TOML": "#9c4221",
"TSQL": "#e38c00",
"TSV": "#237346",
"TSX": "#3178c6",
"TXL": "#0178b8",
"Talon": "#333333",
"Tcl": "#e4cc98",
"TeX": "#3D6117",
"Terra": "#00004c",
"Terraform Template": "#7b42bb",
"TextGrid": "#c8506d",
"TextMate Properties": "#df66e4",
"Textile": "#ffe7ac",
"Thrift": "#D12127",
"Toit": "#c2c9fb",
"Turing": "#cf142b",
"Twig": "#c1d026",
"TypeScript": "#3178c6",
"Typst": "#239dad",
"Unified Parallel C": "#4e3617",
"Unity3D Asset": "#222c37",
"Uno": "#9933cc",
"UnrealScript": "#a54c4d",
"UrWeb": "#ccccee",
"V": "#4f87c4",
"VBA": "#867db1",
"VBScript": "#15dcdc",
"VCL": "#148AA8",
"VHDL": "#adb2cb",
"Vala": "#a56de2",
"Valve Data Format": "#f26025",
"Velocity Template Language": "#507cff",
"Verilog": "#b2b7f8",
"Vim Help File": "#199f4b",
"Vim Script": "#199f4b",
"Vim Snippet": "#199f4b",
"Visual Basic .NET": "#945db7",
"Visual Basic 6.0": "#2c6353",
"Volt": "#1F1F1F",
"Vue": "#41b883",
"Vyper": "#2980b9",
"WDL": "#42f1f4",
"WGSL": "#1a5e9a",
"Web Ontology Language": "#5b70bd",
"WebAssembly": "#04133b",
"WebAssembly Interface Type": "#6250e7",
"Whiley": "#d5c397",
"Wikitext": "#fc5757",
"Windows Registry Entries": "#52d5ff",
"Witcher Script": "#ff0000",
"Wollok": "#a23738",
"World of Warcraft Addon Data": "#f7e43f",
"Wren": "#383838",
"X10": "#4B6BEF",
"XC": "#99DA07",
"XML": "#0060ac",
"XML Property List": "#0060ac",
"XQuery": "#5232e7",
"XSLT": "#EB8CEB",
"Xojo": "#81bd41",
"Xonsh": "#285EEF",
"Xtend": "#24255d",
"YAML": "#cb171e",
"YARA": "#220000",
"YASnippet": "#32AB90",
"Yacc": "#4B6C4B",
"Yul": "#794932",
"ZAP": "#0d665e",
"ZIL": "#dc75e5",
"ZenScript": "#00BCD1",
"Zephir": "#118f9e",
"Zig": "#ec915c",
"Zimpl": "#d67711",
"crontab": "#ead7ac",
"eC": "#913960",
"fish": "#4aae47",
"hoon": "#00b171",
"jq": "#c7254e",
"kvlang": "#1da6e0",
"mIRC Script": "#3d57c3",
"mcfunction": "#E22837",
"mupad": "#244963",
"nanorc": "#2d004d",
"nesC": "#94B0C7",
"ooc": "#b0b77e",
"q": "#0040cd",
"reStructuredText": "#141414",
"sed": "#64b970",
"templ": "#66D0DD",
"wisp": "#7582D1",
"xBase": "#403a40",
}

View File

@ -0,0 +1,7 @@
// Code generated by github.com/go-enry/go-enry/v2/internal/code-generator DO NOT EDIT.
// Extracted from github/linguist commit: 42fd3c2514375fc6ad281552368edd3fb9f6ee7f
package data
// linguist's commit from which files were generated.
var LinguistCommit = "42fd3c2514375fc6ad281552368edd3fb9f6ee7f"

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,27 @@
// Code generated by github.com/go-enry/go-enry/v2/internal/code-generator DO NOT EDIT.
// Extracted from github/linguist commit: 42fd3c2514375fc6ad281552368edd3fb9f6ee7f
package data
import "github.com/go-enry/go-enry/v2/regex"
var DocumentationMatchers = []regex.EnryRegexp{
regex.MustCompile(`^[Dd]ocs?/`),
regex.MustCompile(`(^|/)[Dd]ocumentation/`),
regex.MustCompile(`(^|/)[Gg]roovydoc/`),
regex.MustCompile(`(^|/)[Jj]avadoc/`),
regex.MustCompile(`^[Mm]an/`),
regex.MustCompile(`^[Ee]xamples/`),
regex.MustCompile(`^[Dd]emos?/`),
regex.MustCompile(`(^|/)inst/doc/`),
regex.MustCompile(`(^|/)CITATION(\.cff|(S)?(\.(bib|md))?)$`),
regex.MustCompile(`(^|/)CHANGE(S|LOG)?(\.|$)`),
regex.MustCompile(`(^|/)CONTRIBUTING(\.|$)`),
regex.MustCompile(`(^|/)COPYING(\.|$)`),
regex.MustCompile(`(^|/)INSTALL(\.|$)`),
regex.MustCompile(`(^|/)LICEN[CS]E(\.|$)`),
regex.MustCompile(`(^|/)[Ll]icen[cs]e(\.|$)`),
regex.MustCompile(`(^|/)README(\.|$)`),
regex.MustCompile(`(^|/)[Rr]eadme(\.|$)`),
regex.MustCompile(`^[Ss]amples?/`),
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,376 @@
// Code generated by github.com/go-enry/go-enry/v2/internal/code-generator DO NOT EDIT.
// Extracted from github/linguist commit: 42fd3c2514375fc6ad281552368edd3fb9f6ee7f
package data
var LanguagesByFilename = map[string][]string{
".JUSTFILE": {"Just"},
".Justfile": {"Just"},
".Rprofile": {"R"},
".XCompose": {"XCompose"},
".abbrev_defs": {"Emacs Lisp"},
".ackrc": {"Option List"},
".all-contributorsrc": {"JSON"},
".arcconfig": {"JSON"},
".atomignore": {"Ignore List"},
".auto-changelog": {"JSON"},
".babelignore": {"Ignore List"},
".babelrc": {"JSON with Comments"},
".bash_aliases": {"Shell"},
".bash_functions": {"Shell"},
".bash_history": {"Shell"},
".bash_logout": {"Shell"},
".bash_profile": {"Shell"},
".bashrc": {"Shell"},
".browserslistrc": {"Browserslist"},
".bzrignore": {"Ignore List"},
".c8rc": {"JSON"},
".clang-format": {"YAML"},
".clang-tidy": {"YAML"},
".classpath": {"XML"},
".coffeelintignore": {"Ignore List"},
".coveragerc": {"INI"},
".cproject": {"XML"},
".cshrc": {"Shell"},
".curlrc": {"cURL Config"},
".cvsignore": {"Ignore List"},
".devcontainer.json": {"JSON with Comments"},
".dir_colors": {"dircolors"},
".dircolors": {"dircolors"},
".dockerignore": {"Ignore List"},
".editorconfig": {"EditorConfig"},
".eleventyignore": {"Ignore List"},
".emacs": {"Emacs Lisp"},
".emacs.desktop": {"Emacs Lisp"},
".env": {"Dotenv"},
".env.ci": {"Dotenv"},
".env.dev": {"Dotenv"},
".env.development": {"Dotenv"},
".env.development.local": {"Dotenv"},
".env.example": {"Dotenv"},
".env.local": {"Dotenv"},
".env.prod": {"Dotenv"},
".env.production": {"Dotenv"},
".env.sample": {"Dotenv"},
".env.staging": {"Dotenv"},
".env.test": {"Dotenv"},
".env.testing": {"Dotenv"},
".envrc": {"Shell"},
".eslintignore": {"Ignore List"},
".eslintrc.json": {"JSON with Comments"},
".exrc": {"Vim Script"},
".factor-boot-rc": {"Factor"},
".factor-rc": {"Factor"},
".flake8": {"INI"},
".flaskenv": {"Shell"},
".gclient": {"Python"},
".gemrc": {"YAML"},
".git-blame-ignore-revs": {"Git Revision List"},
".gitattributes": {"Git Attributes"},
".gitconfig": {"Git Config"},
".gitignore": {"Ignore List"},
".gitmodules": {"Git Config"},
".gn": {"GN"},
".gnus": {"Emacs Lisp"},
".gvimrc": {"Vim Script"},
".htaccess": {"ApacheConf"},
".htmlhintrc": {"JSON"},
".imgbotconfig": {"JSON"},
".inputrc": {"Readline Config"},
".irbrc": {"Ruby"},
".jscsrc": {"JSON with Comments"},
".jshintrc": {"JSON with Comments"},
".jslintrc": {"JSON with Comments"},
".justfile": {"Just"},
".kshrc": {"Shell"},
".latexmkrc": {"Perl"},
".login": {"Shell"},
".luacheckrc": {"Lua"},
".markdownlintignore": {"Ignore List"},
".nanorc": {"nanorc"},
".nodemonignore": {"Ignore List"},
".npmignore": {"Ignore List"},
".npmrc": {"NPM Config"},
".nvimrc": {"Vim Script"},
".nycrc": {"JSON"},
".php": {"PHP"},
".php_cs": {"PHP"},
".php_cs.dist": {"PHP"},
".prettierignore": {"Ignore List"},
".profile": {"Shell"},
".project": {"XML"},
".pryrc": {"Ruby"},
".pylintrc": {"INI"},
".rspec": {"Option List"},
".scalafix.conf": {"HOCON"},
".scalafmt.conf": {"HOCON"},
".shellcheckrc": {"ShellCheck Config"},
".simplecov": {"Ruby"},
".spacemacs": {"Emacs Lisp"},
".stylelintignore": {"Ignore List"},
".swcrc": {"JSON with Comments"},
".tern-config": {"JSON"},
".tern-project": {"JSON"},
".tm_properties": {"TextMate Properties"},
".tmux.conf": {"Shell"},
".vercelignore": {"Ignore List"},
".vimrc": {"Vim Script"},
".viper": {"Emacs Lisp"},
".vscodeignore": {"Ignore List"},
".watchmanconfig": {"JSON"},
".wgetrc": {"Wget Config"},
".yardopts": {"Option List"},
".zlogin": {"Shell"},
".zlogout": {"Shell"},
".zprofile": {"Shell"},
".zshenv": {"Shell"},
".zshrc": {"Shell"},
"9fs": {"Shell"},
"APKBUILD": {"Alpine Abuild"},
"Android.bp": {"Soong"},
"App.config": {"XML"},
"Appraisals": {"Ruby"},
"BSDmakefile": {"Makefile"},
"BUCK": {"Starlark"},
"BUILD": {"Starlark"},
"BUILD.bazel": {"Starlark"},
"Berksfile": {"Ruby"},
"Brewfile": {"Ruby"},
"Buildfile": {"Ruby"},
"CITATION": {"Text"},
"CITATION.cff": {"YAML"},
"CITATIONS": {"Text"},
"CMakeLists.txt": {"CMake"},
"CODEOWNERS": {"CODEOWNERS"},
"COPYING": {"Text"},
"COPYING.regex": {"Text"},
"COPYRIGHT.regex": {"Text"},
"Caddyfile": {"Caddyfile"},
"Cakefile": {"CoffeeScript"},
"Capfile": {"Ruby"},
"Cargo.lock": {"TOML"},
"Cargo.toml.orig": {"TOML"},
"Cask": {"Emacs Lisp"},
"Containerfile": {"Dockerfile"},
"DEPS": {"Python"},
"DIR_COLORS": {"dircolors"},
"Dangerfile": {"Ruby"},
"Deliverfile": {"Ruby"},
"Dockerfile": {"Dockerfile"},
"Earthfile": {"Earthly"},
"Emakefile": {"Erlang"},
"FONTLOG": {"Text"},
"Fakefile": {"Fancy"},
"Fastfile": {"Ruby"},
"GNUmakefile": {"Makefile"},
"Gemfile": {"Ruby"},
"Gemfile.lock": {"Gemfile.lock"},
"Gopkg.lock": {"TOML"},
"Guardfile": {"Ruby"},
"HOSTS": {"Hosts File", "INI"},
"INSTALL": {"Text"},
"INSTALL.mysql": {"Text"},
"JUSTFILE": {"Just"},
"Jakefile": {"JavaScript"},
"Jarfile": {"Ruby"},
"Jenkinsfile": {"Groovy"},
"Justfile": {"Just"},
"Kbuild": {"Makefile"},
"LICENSE": {"Text"},
"LICENSE.mysql": {"Text"},
"Lexer.x": {"Lex"},
"MANIFEST.MF": {"JAR Manifest"},
"MD5SUMS": {"Checksums"},
"MODULE.bazel": {"Starlark"},
"MODULE.bazel.lock": {"JSON"},
"Makefile": {"Makefile"},
"Makefile.PL": {"Perl"},
"Makefile.am": {"Makefile"},
"Makefile.boot": {"Makefile"},
"Makefile.frag": {"Makefile"},
"Makefile.in": {"Makefile"},
"Makefile.inc": {"Makefile"},
"Makefile.wat": {"Makefile"},
"Mavenfile": {"Ruby"},
"Modulefile": {"Puppet"},
"NEWS": {"Text"},
"Notebook": {"Jupyter Notebook"},
"NuGet.config": {"XML"},
"Nukefile": {"Nu"},
"PKGBUILD": {"Shell"},
"Phakefile": {"PHP"},
"Pipfile": {"TOML"},
"Pipfile.lock": {"JSON"},
"Podfile": {"Ruby"},
"Procfile": {"Procfile"},
"Project.ede": {"Emacs Lisp"},
"Puppetfile": {"Ruby"},
"README.me": {"Text"},
"README.mysql": {"Text"},
"README.nss": {"Text"},
"ROOT": {"Isabelle ROOT"},
"Rakefile": {"Ruby"},
"Rexfile": {"Perl"},
"SConscript": {"Python"},
"SConstruct": {"Python"},
"SHA1SUMS": {"Checksums"},
"SHA256SUMS": {"Checksums"},
"SHA256SUMS.txt": {"Checksums"},
"SHA512SUMS": {"Checksums"},
"Settings.StyleCop": {"XML"},
"Singularity": {"Singularity"},
"Slakefile": {"LiveScript"},
"Snakefile": {"Snakemake"},
"Snapfile": {"Ruby"},
"Steepfile": {"Ruby"},
"Thorfile": {"Ruby"},
"Tiltfile": {"Starlark"},
"Vagrantfile": {"Ruby"},
"WORKSPACE": {"Starlark"},
"WORKSPACE.bazel": {"Starlark"},
"Web.Debug.config": {"XML"},
"Web.Release.config": {"XML"},
"Web.config": {"XML"},
"XCompose": {"XCompose"},
"_curlrc": {"cURL Config"},
"_dir_colors": {"dircolors"},
"_dircolors": {"dircolors"},
"_emacs": {"Emacs Lisp"},
"_redirects": {"Redirect Rules"},
"_vimrc": {"Vim Script"},
"abbrev_defs": {"Emacs Lisp"},
"ack": {"Perl"},
"ackrc": {"Option List"},
"ant.xml": {"Ant Build System"},
"apache2.conf": {"ApacheConf"},
"api-extractor.json": {"JSON with Comments"},
"bash_aliases": {"Shell"},
"bash_logout": {"Shell"},
"bash_profile": {"Shell"},
"bashrc": {"Shell"},
"browserslist": {"Browserslist"},
"build.xml": {"Ant Build System"},
"buildfile": {"Ruby"},
"buildozer.spec": {"INI"},
"cabal.config": {"Cabal Config"},
"cabal.project": {"Cabal Config"},
"checksums.txt": {"Checksums"},
"cksums": {"Checksums"},
"click.me": {"Text"},
"composer.lock": {"JSON"},
"configure.ac": {"M4Sugar"},
"contents.lr": {"Markdown"},
"cpanfile": {"Perl"},
"crontab": {"crontab"},
"cshrc": {"Shell"},
"delete.me": {"Text"},
"deno.lock": {"JSON"},
"descrip.mmk": {"Module Management System"},
"descrip.mms": {"Module Management System"},
"devcontainer.json": {"JSON with Comments"},
"dir_colors": {"dircolors"},
"encodings.dir": {"X Font Directory Index"},
"eqnrc": {"Roff"},
"expr-dist": {"R"},
"file_contexts": {"SELinux Policy"},
"firestore.rules": {"Cloud Firestore Security Rules"},
"flake.lock": {"JSON"},
"fonts.alias": {"X Font Directory Index"},
"fonts.dir": {"X Font Directory Index"},
"fonts.scale": {"X Font Directory Index"},
"fp-lib-table": {"KiCad Layout"},
"genfs_contexts": {"SELinux Policy"},
"gitignore-global": {"Ignore List"},
"gitignore_global": {"Ignore List"},
"glide.lock": {"YAML"},
"go.mod": {"Go Module"},
"go.sum": {"Go Checksums"},
"go.work": {"Go Workspace"},
"go.work.sum": {"Go Checksums"},
"gradlew": {"Shell"},
"gvimrc": {"Vim Script"},
"haproxy.cfg": {"HAProxy"},
"hosts": {"Hosts File", "INI"},
"httpd.conf": {"ApacheConf"},
"initial_sids": {"SELinux Policy"},
"inputrc": {"Readline Config"},
"installscript.qs": {"Qt Script"},
"jsconfig.json": {"JSON with Comments"},
"justfile": {"Just"},
"kakrc": {"KakouneScript"},
"keep.me": {"Text"},
"kshrc": {"Shell"},
"language-configuration.json": {"JSON with Comments"},
"language-subtag-registry.txt": {"Record Jar"},
"latexmkrc": {"Perl"},
"ld.script": {"Linker Script"},
"lexer.x": {"Lex"},
"login": {"Shell"},
"m3makefile": {"Quake"},
"m3overrides": {"Quake"},
"makefile": {"Makefile"},
"makefile.sco": {"Makefile"},
"man": {"Shell"},
"mcmod.info": {"JSON"},
"md5sum.txt": {"Checksums"},
"meson.build": {"Meson"},
"meson_options.txt": {"Meson"},
"mix.lock": {"Elixir"},
"mkfile": {"Makefile"},
"mmn": {"Roff"},
"mmt": {"Roff"},
"mocha.opts": {"Option List"},
"nanorc": {"nanorc"},
"nextflow.config": {"Nextflow"},
"nginx.conf": {"Nginx"},
"nim.cfg": {"Nim"},
"nvimrc": {"Vim Script"},
"owh": {"Tcl"},
"package.mask": {"Text"},
"package.use.mask": {"Text"},
"package.use.stable.mask": {"Text"},
"packages.config": {"XML"},
"pdm.lock": {"TOML"},
"poetry.lock": {"TOML"},
"pom.xml": {"Maven POM"},
"port_contexts": {"SELinux Policy"},
"profile": {"Shell"},
"project.godot": {"Godot Resource"},
"pylintrc": {"INI"},
"read.me": {"Text"},
"readme.1st": {"Text"},
"rebar.config": {"Erlang"},
"rebar.config.lock": {"Erlang"},
"rebar.lock": {"Erlang"},
"requirements-dev.txt": {"Pip Requirements"},
"requirements.txt": {"Pip Requirements"},
"riemann.config": {"Clojure"},
"robots.txt": {"robots.txt"},
"security_classes": {"SELinux Policy"},
"ssh-config": {"SSH Config"},
"ssh_config": {"SSH Config"},
"sshconfig": {"SSH Config"},
"sshconfig.snip": {"SSH Config"},
"sshd-config": {"SSH Config"},
"sshd_config": {"SSH Config"},
"starfield": {"Tcl"},
"test.me": {"Text"},
"tmux.conf": {"Shell"},
"toolchain_installscript.qs": {"Qt Script"},
"troffrc": {"Roff"},
"troffrc-end": {"Roff"},
"tsconfig.json": {"JSON with Comments"},
"tslint.json": {"JSON with Comments"},
"use.mask": {"Text"},
"use.stable.mask": {"Text"},
"vimrc": {"Vim Script"},
"vlcrc": {"INI"},
"wscript": {"Python"},
"xcompose": {"XCompose"},
"yarn.lock": {"YAML"},
"zlogin": {"Shell"},
"zlogout": {"Shell"},
"zprofile": {"Shell"},
"zshenv": {"Shell"},
"zshrc": {"Shell"},
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,87 @@
// Code generated by github.com/go-enry/go-enry/v2/internal/code-generator DO NOT EDIT.
// Extracted from github/linguist commit: 42fd3c2514375fc6ad281552368edd3fb9f6ee7f
package data
var LanguagesGroup = map[string]string{
"Alpine Abuild": "Shell",
"Apollo Guidance Computer": "Assembly",
"BibTeX": "TeX",
"Bison": "Yacc",
"Bluespec BH": "Bluespec",
"C2hs Haskell": "Haskell",
"CameLIGO": "LigoLANG",
"ColdFusion CFC": "ColdFusion",
"ECLiPSe": "Prolog",
"Easybuild": "Python",
"Ecere Projects": "JavaScript",
"Ecmarkup": "HTML",
"EditorConfig": "INI",
"Elvish Transcript": "Elvish",
"Filterscript": "RenderScript",
"Fortran": "Fortran",
"Fortran Free Form": "Fortran",
"Gentoo Ebuild": "Shell",
"Gentoo Eclass": "Shell",
"Git Config": "INI",
"Glimmer JS": "JavaScript",
"Glimmer TS": "TypeScript",
"Gradle Kotlin DSL": "Gradle",
"Groovy Server Pages": "Groovy",
"HTML+ECR": "HTML",
"HTML+EEX": "HTML",
"HTML+ERB": "HTML",
"HTML+PHP": "HTML",
"HTML+Razor": "HTML",
"Isabelle ROOT": "Isabelle",
"JFlex": "Lex",
"JSON with Comments": "JSON",
"Java Server Pages": "Java",
"JavaScript+ERB": "JavaScript",
"Jison": "Yacc",
"Jison Lex": "Lex",
"Julia REPL": "Julia",
"Lean 4": "Lean",
"LigoLANG": "LigoLANG",
"Literate Agda": "Agda",
"Literate CoffeeScript": "CoffeeScript",
"Literate Haskell": "Haskell",
"M4Sugar": "M4",
"MUF": "Forth",
"Maven POM": "XML",
"Motorola 68K Assembly": "Assembly",
"NPM Config": "INI",
"NumPy": "Python",
"OASv2-json": "OpenAPI Specification v2",
"OASv2-yaml": "OpenAPI Specification v2",
"OASv3-json": "OpenAPI Specification v3",
"OASv3-yaml": "OpenAPI Specification v3",
"OpenCL": "C",
"OpenRC runscript": "Shell",
"Parrot Assembly": "Parrot",
"Parrot Internal Representation": "Parrot",
"Pic": "Roff",
"PostCSS": "CSS",
"Python console": "Python",
"Python traceback": "Python",
"RBS": "Ruby",
"Readline Config": "INI",
"ReasonLIGO": "LigoLANG",
"Roff Manpage": "Roff",
"SSH Config": "INI",
"STON": "Smalltalk",
"Simple File Verification": "Checksums",
"Snakemake": "Python",
"TSX": "TypeScript",
"Tcsh": "Shell",
"Terraform Template": "HCL",
"Unified Parallel C": "C",
"Unix Assembly": "Assembly",
"Wget Config": "INI",
"X BitMap": "C",
"X PixMap": "C",
"XML Property List": "XML",
"cURL Config": "INI",
"fish": "Shell",
"nanorc": "INI",
}

View File

@ -0,0 +1,40 @@
# Tests care about number and order of heuristics in this fixture
disambiguations:
- extensions: ['.h', '.hh']
rules:
- language: Objective-C
pattern: 'objc'
- language: C++
named_pattern: cpp
- extensions: ['.f']
rules:
- language: Forth
pattern: #as in .md
- 'f'
- 'f1'
- language: Filebench WML
pattern: 'f2'
- language: Fortran
named_pattern: fortran
- extensions: ['.ms']
rules:
- language: Roff
pattern: 'rp'
- language: Unix Assembly
and:
- negative_pattern: 'np'
- pattern: 'p'
- language: MAXScript
- extensions: ['.mod']
rules:
- language: [Linux Kernel Module, AMPL]
named_patterns:
cpp:
- 'regex1'
- 'regex2'
fortran: 'regex3'

View File

@ -0,0 +1,160 @@
// Code generated by github.com/go-enry/go-enry/v2/internal/code-generator DO NOT EDIT.
// Extracted from github/linguist commit: 42fd3c2514375fc6ad281552368edd3fb9f6ee7f
package data
var LanguagesByInterpreter = map[string][]string{
"M2": {"Macaulay2"},
"RouterOS": {"RouterOS Script"},
"Rscript": {"R"},
"aidl": {"AIDL"},
"apl": {"APL"},
"aplx": {"APL"},
"ash": {"Shell"},
"asy": {"Asymptote"},
"awk": {"Awk"},
"bash": {"Shell"},
"bb": {"Clojure"},
"bigloo": {"Scheme"},
"boogie": {"Boogie"},
"boolector": {"SMT"},
"ccl": {"Common Lisp"},
"chakra": {"JavaScript"},
"chicken": {"Scheme"},
"clisp": {"Common Lisp"},
"coffee": {"CoffeeScript"},
"cperl": {"Perl"},
"crystal": {"Crystal"},
"csh": {"Tcsh"},
"csi": {"Scheme"},
"cvc4": {"SMT"},
"cwl-runner": {"Common Workflow Language"},
"d8": {"JavaScript"},
"dafny": {"Dafny"},
"dart": {"Dart"},
"dash": {"Shell"},
"deno": {"TypeScript"},
"dtrace": {"DTrace"},
"dyalog": {"APL"},
"ecl": {"Common Lisp"},
"elixir": {"Elixir"},
"elvish": {"Elvish"},
"escript": {"Erlang"},
"eui": {"Euphoria"},
"euiw": {"Euphoria"},
"fennel": {"Fennel"},
"fish": {"fish"},
"gawk": {"Awk"},
"gerbv": {"Gerber Image"},
"gerbview": {"Gerber Image"},
"gjs": {"JavaScript"},
"gn": {"GN"},
"gnuplot": {"Gnuplot"},
"gojq": {"jq"},
"gosh": {"Scheme"},
"groovy": {"Groovy"},
"gsed": {"sed"},
"guile": {"Scheme"},
"hy": {"Hy"},
"instantfpc": {"Pascal"},
"io": {"Io"},
"ioke": {"Ioke"},
"janet": {"Janet"},
"jaq": {"jq"},
"jconsole": {"J"},
"jolie": {"Jolie"},
"jq": {"jq"},
"jqjq": {"jq"},
"jqq": {"jq"},
"jruby": {"Ruby"},
"js": {"JavaScript"},
"julia": {"Julia"},
"ksh": {"Shell"},
"lisp": {"Common Lisp"},
"lsl": {"LSL"},
"lua": {"Lua", "Terra"},
"luau": {"Luau"},
"macruby": {"Ruby"},
"make": {"Makefile"},
"makeinfo": {"Texinfo"},
"mathsat5": {"SMT"},
"mawk": {"Awk"},
"minised": {"sed"},
"mksh": {"Shell"},
"mmi": {"Mercury"},
"moon": {"MoonScript"},
"nawk": {"Awk"},
"newlisp": {"NewLisp"},
"nextflow": {"Nextflow"},
"node": {"JavaScript"},
"nodejs": {"JavaScript"},
"nu": {"Nushell"},
"nush": {"Nu"},
"ocaml": {"OCaml", "ReScript"},
"ocamlrun": {"OCaml"},
"ocamlscript": {"OCaml"},
"openrc-run": {"OpenRC runscript"},
"opensmt": {"SMT"},
"osascript": {"AppleScript"},
"parrot": {"Parrot Assembly", "Parrot Internal Representation"},
"pdksh": {"Shell"},
"perl": {"Perl", "Pod"},
"perl6": {"Pod 6", "Raku"},
"php": {"PHP"},
"picolisp": {"PicoLisp"},
"pike": {"Pike"},
"pil": {"PicoLisp"},
"pkl": {"Pkl"},
"pwsh": {"PowerShell"},
"py": {"Python"},
"pypy": {"Python"},
"pypy3": {"Python"},
"python": {"Python"},
"python2": {"Python"},
"python3": {"Python"},
"qjs": {"JavaScript"},
"qmake": {"QMake"},
"query-json": {"jq"},
"r6rs": {"Scheme"},
"racket": {"Racket"},
"rake": {"Ruby"},
"raku": {"Raku"},
"rakudo": {"Raku"},
"rbx": {"Ruby"},
"rc": {"Shell"},
"regina": {"REXX"},
"rexx": {"REXX"},
"rhino": {"JavaScript"},
"ruby": {"Ruby"},
"rune": {"E"},
"runghc": {"Haskell"},
"runhaskell": {"Haskell"},
"runhugs": {"Haskell"},
"rust-script": {"Rust"},
"sbcl": {"Common Lisp"},
"scala": {"Scala"},
"scenic": {"Scenic"},
"scheme": {"Scheme"},
"sclang": {"SuperCollider"},
"scsynth": {"SuperCollider"},
"sed": {"sed"},
"sh": {"Shell"},
"smt-rat": {"SMT"},
"smtinterpol": {"SMT"},
"ssed": {"sed"},
"stp": {"SMT"},
"swipl": {"Prolog"},
"tcc": {"C"},
"tclsh": {"Tcl"},
"tcsh": {"Tcsh"},
"ts-node": {"TypeScript"},
"tsx": {"TypeScript"},
"v8": {"JavaScript"},
"v8-shell": {"JavaScript"},
"verit": {"SMT"},
"wish": {"Tcl"},
"yap": {"Prolog"},
"yices2": {"SMT"},
"z3": {"SMT"},
"zsh": {"Shell"},
}

View File

@ -0,0 +1,282 @@
// Code generated by github.com/go-enry/go-enry/v2/internal/code-generator DO NOT EDIT.
// Extracted from github/linguist commit: 42fd3c2514375fc6ad281552368edd3fb9f6ee7f
package data
var LanguagesMime = map[string]string{
"AGS Script": "text/x-c++src",
"APL": "text/apl",
"ASN.1": "text/x-ttcn-asn",
"ASP.NET": "application/x-aspx",
"Alpine Abuild": "text/x-sh",
"AngelScript": "text/x-c++src",
"Ant Build System": "application/xml",
"Apex": "text/x-java",
"Astro": "text/jsx",
"Asymptote": "text/x-kotlin",
"Beef": "text/x-csharp",
"BibTeX": "text/x-stex",
"Bikeshed": "text/html",
"Bluespec": "text/x-systemverilog",
"Bluespec BH": "text/x-haskell",
"Brainfuck": "text/x-brainfuck",
"C": "text/x-csrc",
"C#": "text/x-csharp",
"C++": "text/x-c++src",
"C2hs Haskell": "text/x-haskell",
"CMake": "text/x-cmake",
"COBOL": "text/x-cobol",
"COLLADA": "text/xml",
"CSON": "text/x-coffeescript",
"CSS": "text/css",
"Cabal Config": "text/x-haskell",
"CameLIGO": "text/x-ocaml",
"ChucK": "text/x-java",
"Clojure": "text/x-clojure",
"Closure Templates": "text/x-soy",
"Cloud Firestore Security Rules": "text/css",
"CoffeeScript": "text/x-coffeescript",
"Common Lisp": "text/x-common-lisp",
"Common Workflow Language": "text/x-yaml",
"Component Pascal": "text/x-pascal",
"Crystal": "text/x-crystal",
"Cuda": "text/x-c++src",
"Cycript": "text/javascript",
"Cython": "text/x-cython",
"D": "text/x-d",
"DTrace": "text/x-csrc",
"Dart": "application/dart",
"DenizenScript": "text/x-yaml",
"Dhall": "text/x-haskell",
"Diff": "text/x-diff",
"Dockerfile": "text/x-dockerfile",
"Dylan": "text/x-dylan",
"E-mail": "application/mbox",
"EBNF": "text/x-ebnf",
"ECL": "text/x-ecl",
"EQ": "text/x-csharp",
"Eagle": "text/xml",
"Easybuild": "text/x-python",
"Ecere Projects": "application/json",
"Ecmarkup": "text/html",
"EditorConfig": "text/x-properties",
"Edje Data Collection": "text/x-c++src",
"Eiffel": "text/x-eiffel",
"Elm": "text/x-elm",
"Emacs Lisp": "text/x-common-lisp",
"EmberScript": "text/x-coffeescript",
"Erlang": "text/x-erlang",
"F#": "text/x-fsharp",
"Factor": "text/x-factor",
"Forth": "text/x-forth",
"Fortran": "text/x-fortran",
"Fortran Free Form": "text/x-fortran",
"FreeBasic": "text/x-vb",
"GCC Machine Description": "text/x-common-lisp",
"GN": "text/x-python",
"GSC": "text/x-csrc",
"Game Maker Language": "text/x-c++src",
"Genshi": "text/xml",
"Gentoo Ebuild": "text/x-sh",
"Gentoo Eclass": "text/x-sh",
"Git Attributes": "text/x-sh",
"Git Config": "text/x-properties",
"Glyph": "text/x-tcl",
"Go": "text/x-go",
"Grammatical Framework": "text/x-haskell",
"Groovy": "text/x-groovy",
"Groovy Server Pages": "application/x-jsp",
"HCL": "text/x-ruby",
"HTML": "text/html",
"HTML+ECR": "text/html",
"HTML+EEX": "text/html",
"HTML+ERB": "application/x-erb",
"HTML+PHP": "application/x-httpd-php",
"HTML+Razor": "text/html",
"HTTP": "message/http",
"Hack": "application/x-httpd-php",
"Haml": "text/x-haml",
"Haskell": "text/x-haskell",
"Haxe": "text/x-haxe",
"HolyC": "text/x-csrc",
"IDL": "text/x-idl",
"INI": "text/x-properties",
"IRC log": "text/mirc",
"Ignore List": "text/x-sh",
"JSON": "application/json",
"JSON with Comments": "text/javascript",
"JSON5": "application/json",
"JSONLD": "application/json",
"JSONiq": "application/json",
"Janet": "text/x-scheme",
"Java": "text/x-java",
"Java Properties": "text/x-properties",
"Java Server Pages": "application/x-jsp",
"JavaScript": "text/javascript",
"JavaScript+ERB": "application/javascript",
"Jest Snapshot": "application/javascript",
"JetBrains MPS": "text/xml",
"Jinja": "text/x-django",
"Julia": "text/x-julia",
"Jupyter Notebook": "application/json",
"Kaitai Struct": "text/x-yaml",
"KiCad Layout": "text/x-common-lisp",
"Kit": "text/html",
"Kotlin": "text/x-kotlin",
"LFE": "text/x-common-lisp",
"LTspice Symbol": "text/x-spreadsheet",
"LabVIEW": "text/xml",
"Lark": "text/x-ebnf",
"Latte": "text/x-smarty",
"Less": "text/css",
"LigoLANG": "text/x-pascal",
"Literate Haskell": "text/x-literate-haskell",
"LiveScript": "text/x-livescript",
"LookML": "text/x-yaml",
"Lua": "text/x-lua",
"Luau": "text/x-lua",
"M": "text/x-mumps",
"MATLAB": "text/x-octave",
"MDX": "text/x-gfm",
"MTML": "text/html",
"MUF": "text/x-forth",
"Makefile": "text/x-cmake",
"Markdown": "text/x-gfm",
"Marko": "text/html",
"Mathematica": "text/x-mathematica",
"Maven POM": "text/xml",
"Max": "application/json",
"Metal": "text/x-c++src",
"MiniYAML": "text/x-yaml",
"Mirah": "text/x-ruby",
"Modelica": "text/x-modelica",
"Mojo": "text/x-python",
"Monkey C": "text/x-csrc",
"Mustache": "text/x-smarty",
"NSIS": "text/x-nsis",
"NWScript": "text/x-csrc",
"NetLogo": "text/x-common-lisp",
"NewLisp": "text/x-common-lisp",
"Nginx": "text/x-nginx-conf",
"Nu": "text/x-scheme",
"NumPy": "text/x-python",
"Nushell": "text/x-sh",
"OASv2-json": "application/json",
"OASv2-yaml": "text/x-yaml",
"OASv3-json": "application/json",
"OASv3-yaml": "text/x-yaml",
"OCaml": "text/x-ocaml",
"Objective-C": "text/x-objectivec",
"Objective-C++": "text/x-objectivec",
"OpenCL": "text/x-csrc",
"OpenRC runscript": "text/x-sh",
"Option List": "text/x-sh",
"Oz": "text/x-oz",
"PEG.js": "text/javascript",
"PHP": "application/x-httpd-php",
"PLSQL": "text/x-plsql",
"PLpgSQL": "text/x-sql",
"Pascal": "text/x-pascal",
"Perl": "text/x-perl",
"Pic": "text/troff",
"Pod": "text/x-perl",
"PowerShell": "application/x-powershell",
"Protocol Buffer": "text/x-protobuf",
"Public Key": "application/pgp",
"Pug": "text/x-pug",
"Puppet": "text/x-puppet",
"PureScript": "text/x-haskell",
"Python": "text/x-python",
"Qt Script": "text/javascript",
"R": "text/x-rsrc",
"RAML": "text/x-yaml",
"RBS": "text/x-ruby",
"RMarkdown": "text/x-gfm",
"RPM Spec": "text/x-rpm-spec",
"Raku": "text/x-perl",
"ReScript": "text/x-rustsrc",
"Reason": "text/x-rustsrc",
"ReasonLIGO": "text/x-rustsrc",
"Record Jar": "text/x-properties",
"Roff": "text/troff",
"Roff Manpage": "text/troff",
"Rouge": "text/x-clojure",
"Ruby": "text/x-ruby",
"Rust": "text/x-rustsrc",
"SAS": "text/x-sas",
"SCSS": "text/x-scss",
"SPARQL": "application/sparql-query",
"SQL": "text/x-sql",
"SQLPL": "text/x-sql",
"SRecode Template": "text/x-common-lisp",
"SVG": "text/xml",
"SWIG": "text/x-c++src",
"Sage": "text/x-python",
"SaltStack": "text/x-yaml",
"Sass": "text/x-sass",
"Scala": "text/x-scala",
"Scheme": "text/x-scheme",
"Shell": "text/x-sh",
"ShellCheck Config": "text/x-properties",
"ShellSession": "text/x-sh",
"Sieve": "application/sieve",
"Simple File Verification": "text/x-properties",
"Slim": "text/x-slim",
"Smalltalk": "text/x-stsrc",
"Smarty": "text/x-smarty",
"Smithy": "text/x-csrc",
"Snakemake": "text/x-python",
"Squirrel": "text/x-c++src",
"Standard ML": "text/x-ocaml",
"Starlark": "text/x-python",
"StringTemplate": "text/html",
"Svelte": "text/html",
"Sway": "text/x-rustsrc",
"Swift": "text/x-swift",
"SystemVerilog": "text/x-systemverilog",
"TOML": "text/x-toml",
"TSX": "text/jsx",
"Tcl": "text/x-tcl",
"Tcsh": "text/x-sh",
"TeX": "text/x-stex",
"Terra": "text/x-lua",
"Terraform Template": "text/x-ruby",
"TextMate Properties": "text/x-properties",
"Textile": "text/x-textile",
"Turtle": "text/turtle",
"Twig": "text/x-twig",
"TypeScript": "application/typescript",
"Unified Parallel C": "text/x-csrc",
"Unity3D Asset": "text/x-yaml",
"Uno": "text/x-csharp",
"UnrealScript": "text/x-java",
"V": "text/x-go",
"VBA": "text/x-vb",
"VBScript": "text/vbscript",
"VHDL": "text/x-vhdl",
"Velocity Template Language": "text/velocity",
"Verilog": "text/x-verilog",
"Visual Basic .NET": "text/x-vb",
"Visual Basic 6.0": "text/x-vb",
"Volt": "text/x-d",
"WebAssembly": "text/x-common-lisp",
"WebAssembly Interface Type": "text/x-webidl",
"WebIDL": "text/x-webidl",
"Win32 Message File": "text/x-properties",
"Windows Registry Entries": "text/x-properties",
"X BitMap": "text/x-csrc",
"X PixMap": "text/x-csrc",
"XC": "text/x-csrc",
"XML": "text/xml",
"XML Property List": "text/xml",
"XPages": "text/xml",
"XProc": "text/xml",
"XQuery": "application/xquery",
"XS": "text/x-csrc",
"XSLT": "text/xml",
"Xonsh": "text/x-python",
"YAML": "text/x-yaml",
"edn": "text/x-clojure",
"reStructuredText": "text/x-rst",
"wisp": "text/x-clojure",
}

View File

@ -0,0 +1,779 @@
// Code generated by github.com/go-enry/go-enry/v2/internal/code-generator DO NOT EDIT.
// Extracted from github/linguist commit: 42fd3c2514375fc6ad281552368edd3fb9f6ee7f
package data
// Type represent language's type. Either data, programming, markup, prose, or unknown.
type Type int
// Type's values.
const (
TypeUnknown Type = iota
TypeData
TypeProgramming
TypeMarkup
TypeProse
)
func (t Type) String() string {
switch t {
case TypeData:
return "data"
case TypeProgramming:
return "programming"
case TypeMarkup:
return "markup"
case TypeProse:
return "prose"
default:
return "unknown"
}
}
func TypeForString(s string) Type {
switch s {
case "data":
return TypeData
case "programming":
return TypeProgramming
case "markup":
return TypeMarkup
case "prose":
return TypeProse
default:
return TypeUnknown
}
}
var LanguagesType = map[string]int{
"1C Enterprise": 2,
"2-Dimensional Array": 1,
"4D": 2,
"ABAP": 2,
"ABAP CDS": 2,
"ABNF": 1,
"AGS Script": 2,
"AIDL": 2,
"AL": 2,
"AMPL": 2,
"ANTLR": 2,
"API Blueprint": 3,
"APL": 2,
"ASL": 2,
"ASN.1": 1,
"ASP.NET": 2,
"ATS": 2,
"ActionScript": 2,
"Ada": 2,
"Adblock Filter List": 1,
"Adobe Font Metrics": 1,
"Agda": 2,
"Alloy": 2,
"Alpine Abuild": 2,
"Altium Designer": 1,
"AngelScript": 2,
"Ant Build System": 1,
"Antlers": 3,
"ApacheConf": 1,
"Apex": 2,
"Apollo Guidance Computer": 2,
"AppleScript": 2,
"Arc": 2,
"AsciiDoc": 4,
"AspectJ": 2,
"Assembly": 2,
"Astro": 3,
"Asymptote": 2,
"Augeas": 2,
"AutoHotkey": 2,
"AutoIt": 2,
"Avro IDL": 1,
"Awk": 2,
"BASIC": 2,
"BQN": 2,
"Ballerina": 2,
"Batchfile": 2,
"Beef": 2,
"Befunge": 2,
"Berry": 2,
"BibTeX": 3,
"Bicep": 2,
"Bikeshed": 3,
"Bison": 2,
"BitBake": 2,
"Blade": 3,
"BlitzBasic": 2,
"BlitzMax": 2,
"Bluespec": 2,
"Bluespec BH": 2,
"Boo": 2,
"Boogie": 2,
"Brainfuck": 2,
"BrighterScript": 2,
"Brightscript": 2,
"Browserslist": 1,
"C": 2,
"C#": 2,
"C++": 2,
"C-ObjDump": 1,
"C2hs Haskell": 2,
"CAP CDS": 2,
"CIL": 1,
"CLIPS": 2,
"CMake": 2,
"COBOL": 2,
"CODEOWNERS": 1,
"COLLADA": 1,
"CSON": 1,
"CSS": 3,
"CSV": 1,
"CUE": 2,
"CWeb": 2,
"Cabal Config": 1,
"Caddyfile": 1,
"Cadence": 2,
"Cairo": 2,
"CameLIGO": 2,
"Cap'n Proto": 2,
"CartoCSS": 2,
"Ceylon": 2,
"Chapel": 2,
"Charity": 2,
"Checksums": 1,
"ChucK": 2,
"Circom": 2,
"Cirru": 2,
"Clarion": 2,
"Clarity": 2,
"Classic ASP": 2,
"Clean": 2,
"Click": 2,
"Clojure": 2,
"Closure Templates": 3,
"Cloud Firestore Security Rules": 1,
"CoNLL-U": 1,
"CodeQL": 2,
"CoffeeScript": 2,
"ColdFusion": 2,
"ColdFusion CFC": 2,
"Common Lisp": 2,
"Common Workflow Language": 2,
"Component Pascal": 2,
"Cool": 2,
"Coq": 2,
"Cpp-ObjDump": 1,
"Creole": 4,
"Crystal": 2,
"Csound": 2,
"Csound Document": 2,
"Csound Score": 2,
"Cuda": 2,
"Cue Sheet": 1,
"Curry": 2,
"Cycript": 2,
"Cypher": 2,
"Cython": 2,
"D": 2,
"D-ObjDump": 1,
"D2": 3,
"DIGITAL Command Language": 2,
"DM": 2,
"DNS Zone": 1,
"DTrace": 2,
"Dafny": 2,
"Darcs Patch": 1,
"Dart": 2,
"DataWeave": 2,
"Debian Package Control File": 1,
"DenizenScript": 2,
"Dhall": 2,
"Diff": 1,
"DirectX 3D File": 1,
"Dockerfile": 2,
"Dogescript": 2,
"Dotenv": 1,
"Dylan": 2,
"E": 2,
"E-mail": 1,
"EBNF": 1,
"ECL": 2,
"ECLiPSe": 2,
"EJS": 3,
"EQ": 2,
"Eagle": 1,
"Earthly": 2,
"Easybuild": 1,
"Ecere Projects": 1,
"Ecmarkup": 3,
"Edge": 3,
"EdgeQL": 2,
"EditorConfig": 1,
"Edje Data Collection": 1,
"Eiffel": 2,
"Elixir": 2,
"Elm": 2,
"Elvish": 2,
"Elvish Transcript": 2,
"Emacs Lisp": 2,
"EmberScript": 2,
"Erlang": 2,
"Euphoria": 2,
"F#": 2,
"F*": 2,
"FIGlet Font": 1,
"FIRRTL": 2,
"FLUX": 2,
"Factor": 2,
"Fancy": 2,
"Fantom": 2,
"Faust": 2,
"Fennel": 2,
"Filebench WML": 2,
"Filterscript": 2,
"Fluent": 2,
"Formatted": 1,
"Forth": 2,
"Fortran": 2,
"Fortran Free Form": 2,
"FreeBasic": 2,
"FreeMarker": 2,
"Frege": 2,
"Futhark": 2,
"G-code": 2,
"GAML": 2,
"GAMS": 2,
"GAP": 2,
"GCC Machine Description": 2,
"GDB": 2,
"GDScript": 2,
"GEDCOM": 1,
"GLSL": 2,
"GN": 1,
"GSC": 2,
"Game Maker Language": 2,
"Gemfile.lock": 1,
"Gemini": 4,
"Genero 4gl": 2,
"Genero per": 3,
"Genie": 2,
"Genshi": 2,
"Gentoo Ebuild": 2,
"Gentoo Eclass": 2,
"Gerber Image": 1,
"Gettext Catalog": 4,
"Gherkin": 2,
"Git Attributes": 1,
"Git Config": 1,
"Git Revision List": 1,
"Gleam": 2,
"Glimmer JS": 2,
"Glimmer TS": 2,
"Glyph": 2,
"Glyph Bitmap Distribution Format": 1,
"Gnuplot": 2,
"Go": 2,
"Go Checksums": 1,
"Go Module": 1,
"Go Workspace": 1,
"Godot Resource": 1,
"Golo": 2,
"Gosu": 2,
"Grace": 2,
"Gradle": 1,
"Gradle Kotlin DSL": 1,
"Grammatical Framework": 2,
"Graph Modeling Language": 1,
"GraphQL": 1,
"Graphviz (DOT)": 1,
"Groovy": 2,
"Groovy Server Pages": 2,
"HAProxy": 1,
"HCL": 2,
"HLSL": 2,
"HOCON": 1,
"HTML": 3,
"HTML+ECR": 3,
"HTML+EEX": 3,
"HTML+ERB": 3,
"HTML+PHP": 3,
"HTML+Razor": 3,
"HTTP": 1,
"HXML": 1,
"Hack": 2,
"Haml": 3,
"Handlebars": 3,
"Harbour": 2,
"Haskell": 2,
"Haxe": 2,
"HiveQL": 2,
"HolyC": 2,
"Hosts File": 1,
"Hy": 2,
"HyPhy": 2,
"IDL": 2,
"IGOR Pro": 2,
"INI": 1,
"IRC log": 1,
"Idris": 2,
"Ignore List": 1,
"ImageJ Macro": 2,
"Imba": 2,
"Inform 7": 2,
"Ink": 2,
"Inno Setup": 2,
"Io": 2,
"Ioke": 2,
"Isabelle": 2,
"Isabelle ROOT": 2,
"J": 2,
"JAR Manifest": 1,
"JCL": 2,
"JFlex": 2,
"JSON": 1,
"JSON with Comments": 1,
"JSON5": 1,
"JSONLD": 1,
"JSONiq": 2,
"Janet": 2,
"Jasmin": 2,
"Java": 2,
"Java Properties": 1,
"Java Server Pages": 2,
"JavaScript": 2,
"JavaScript+ERB": 2,
"Jest Snapshot": 1,
"JetBrains MPS": 2,
"Jinja": 3,
"Jison": 2,
"Jison Lex": 2,
"Jolie": 2,
"Jsonnet": 2,
"Julia": 2,
"Julia REPL": 2,
"Jupyter Notebook": 3,
"Just": 2,
"KRL": 2,
"Kaitai Struct": 2,
"KakouneScript": 2,
"KerboScript": 2,
"KiCad Layout": 1,
"KiCad Legacy Layout": 1,
"KiCad Schematic": 1,
"Kickstart": 1,
"Kit": 3,
"Kotlin": 2,
"Kusto": 1,
"LFE": 2,
"LLVM": 2,
"LOLCODE": 2,
"LSL": 2,
"LTspice Symbol": 1,
"LabVIEW": 2,
"Lark": 1,
"Lasso": 2,
"Latte": 3,
"Lean": 2,
"Lean 4": 2,
"Less": 3,
"Lex": 2,
"LigoLANG": 2,
"LilyPond": 2,
"Limbo": 2,
"Linker Script": 1,
"Linux Kernel Module": 1,
"Liquid": 3,
"Literate Agda": 2,
"Literate CoffeeScript": 2,
"Literate Haskell": 2,
"LiveScript": 2,
"Logos": 2,
"Logtalk": 2,
"LookML": 2,
"LoomScript": 2,
"Lua": 2,
"Luau": 2,
"M": 2,
"M4": 2,
"M4Sugar": 2,
"MATLAB": 2,
"MAXScript": 2,
"MDX": 3,
"MLIR": 2,
"MQL4": 2,
"MQL5": 2,
"MTML": 3,
"MUF": 2,
"Macaulay2": 2,
"Makefile": 2,
"Mako": 2,
"Markdown": 4,
"Marko": 3,
"Mask": 3,
"Mathematica": 2,
"Maven POM": 1,
"Max": 2,
"Mercury": 2,
"Mermaid": 3,
"Meson": 2,
"Metal": 2,
"Microsoft Developer Studio Project": 1,
"Microsoft Visual Studio Solution": 1,
"MiniD": 2,
"MiniYAML": 1,
"Mint": 2,
"Mirah": 2,
"Modelica": 2,
"Modula-2": 2,
"Modula-3": 2,
"Module Management System": 2,
"Mojo": 2,
"Monkey": 2,
"Monkey C": 2,
"Moocode": 2,
"MoonScript": 2,
"Motoko": 2,
"Motorola 68K Assembly": 2,
"Move": 2,
"Muse": 4,
"Mustache": 3,
"Myghty": 2,
"NASL": 2,
"NCL": 2,
"NEON": 1,
"NL": 1,
"NMODL": 2,
"NPM Config": 1,
"NSIS": 2,
"NWScript": 2,
"Nasal": 2,
"Nearley": 2,
"Nemerle": 2,
"NetLinx": 2,
"NetLinx+ERB": 2,
"NetLogo": 2,
"NewLisp": 2,
"Nextflow": 2,
"Nginx": 1,
"Nim": 2,
"Ninja": 1,
"Nit": 2,
"Nix": 2,
"Nu": 2,
"NumPy": 2,
"Nunjucks": 3,
"Nushell": 2,
"OASv2-json": 1,
"OASv2-yaml": 1,
"OASv3-json": 1,
"OASv3-yaml": 1,
"OCaml": 2,
"Oberon": 2,
"ObjDump": 1,
"Object Data Instance Notation": 1,
"ObjectScript": 2,
"Objective-C": 2,
"Objective-C++": 2,
"Objective-J": 2,
"Odin": 2,
"Omgrofl": 2,
"Opa": 2,
"Opal": 2,
"Open Policy Agent": 2,
"OpenAPI Specification v2": 1,
"OpenAPI Specification v3": 1,
"OpenCL": 2,
"OpenEdge ABL": 2,
"OpenQASM": 2,
"OpenRC runscript": 2,
"OpenSCAD": 2,
"OpenStep Property List": 1,
"OpenType Feature File": 1,
"Option List": 1,
"Org": 4,
"Ox": 2,
"Oxygene": 2,
"Oz": 2,
"P4": 2,
"PDDL": 2,
"PEG.js": 2,
"PHP": 2,
"PLSQL": 2,
"PLpgSQL": 2,
"POV-Ray SDL": 2,
"Pact": 2,
"Pan": 2,
"Papyrus": 2,
"Parrot": 2,
"Parrot Assembly": 2,
"Parrot Internal Representation": 2,
"Pascal": 2,
"Pawn": 2,
"Pep8": 2,
"Perl": 2,
"Pic": 3,
"Pickle": 1,
"PicoLisp": 2,
"PigLatin": 2,
"Pike": 2,
"Pip Requirements": 1,
"Pkl": 2,
"PlantUML": 1,
"Pod": 4,
"Pod 6": 4,
"PogoScript": 2,
"Polar": 2,
"Pony": 2,
"Portugol": 2,
"PostCSS": 3,
"PostScript": 3,
"PowerBuilder": 2,
"PowerShell": 2,
"Praat": 2,
"Prisma": 1,
"Processing": 2,
"Procfile": 2,
"Proguard": 1,
"Prolog": 2,
"Promela": 2,
"Propeller Spin": 2,
"Protocol Buffer": 1,
"Protocol Buffer Text Format": 1,
"Public Key": 1,
"Pug": 3,
"Puppet": 2,
"Pure Data": 1,
"PureBasic": 2,
"PureScript": 2,
"Pyret": 2,
"Python": 2,
"Python console": 2,
"Python traceback": 1,
"Q#": 2,
"QML": 2,
"QMake": 2,
"Qt Script": 2,
"Quake": 2,
"R": 2,
"RAML": 3,
"RBS": 1,
"RDoc": 4,
"REALbasic": 2,
"REXX": 2,
"RMarkdown": 4,
"RON": 1,
"RPC": 2,
"RPGLE": 2,
"RPM Spec": 1,
"RUNOFF": 3,
"Racket": 2,
"Ragel": 2,
"Raku": 2,
"Rascal": 2,
"Raw token data": 1,
"ReScript": 2,
"Readline Config": 1,
"Reason": 2,
"ReasonLIGO": 2,
"Rebol": 2,
"Record Jar": 1,
"Red": 2,
"Redcode": 2,
"Redirect Rules": 1,
"Regular Expression": 1,
"Ren'Py": 2,
"RenderScript": 2,
"Rez": 2,
"Rich Text Format": 3,
"Ring": 2,
"Riot": 3,
"RobotFramework": 2,
"Roc": 2,
"Roff": 3,
"Roff Manpage": 3,
"Rouge": 2,
"RouterOS Script": 2,
"Ruby": 2,
"Rust": 2,
"SAS": 2,
"SCSS": 3,
"SELinux Policy": 1,
"SMT": 2,
"SPARQL": 1,
"SQF": 2,
"SQL": 1,
"SQLPL": 2,
"SRecode Template": 3,
"SSH Config": 1,
"STAR": 1,
"STL": 1,
"STON": 1,
"SVG": 1,
"SWIG": 2,
"Sage": 2,
"SaltStack": 2,
"Sass": 3,
"Scala": 2,
"Scaml": 3,
"Scenic": 2,
"Scheme": 2,
"Scilab": 2,
"Self": 2,
"ShaderLab": 2,
"Shell": 2,
"ShellCheck Config": 1,
"ShellSession": 2,
"Shen": 2,
"Sieve": 2,
"Simple File Verification": 1,
"Singularity": 2,
"Slash": 2,
"Slice": 2,
"Slim": 3,
"Slint": 3,
"SmPL": 2,
"Smali": 2,
"Smalltalk": 2,
"Smarty": 2,
"Smithy": 2,
"Snakemake": 2,
"Solidity": 2,
"Soong": 1,
"SourcePawn": 2,
"Spline Font Database": 1,
"Squirrel": 2,
"Stan": 2,
"Standard ML": 2,
"Starlark": 2,
"Stata": 2,
"StringTemplate": 3,
"Stylus": 3,
"SubRip Text": 1,
"SugarSS": 3,
"SuperCollider": 2,
"Svelte": 3,
"Sway": 2,
"Sweave": 4,
"Swift": 2,
"SystemVerilog": 2,
"TI Program": 2,
"TL-Verilog": 2,
"TLA": 2,
"TOML": 1,
"TSQL": 2,
"TSV": 1,
"TSX": 2,
"TXL": 2,
"Talon": 2,
"Tcl": 2,
"Tcsh": 2,
"TeX": 3,
"Tea": 3,
"Terra": 2,
"Terraform Template": 3,
"Texinfo": 4,
"Text": 4,
"TextGrid": 1,
"TextMate Properties": 1,
"Textile": 4,
"Thrift": 2,
"Toit": 2,
"Turing": 2,
"Turtle": 1,
"Twig": 3,
"Type Language": 1,
"TypeScript": 2,
"Typst": 2,
"Unified Parallel C": 2,
"Unity3D Asset": 1,
"Unix Assembly": 2,
"Uno": 2,
"UnrealScript": 2,
"UrWeb": 2,
"V": 2,
"VBA": 2,
"VBScript": 2,
"VCL": 2,
"VHDL": 2,
"Vala": 2,
"Valve Data Format": 1,
"Velocity Template Language": 3,
"Verilog": 2,
"Vim Help File": 4,
"Vim Script": 2,
"Vim Snippet": 3,
"Visual Basic .NET": 2,
"Visual Basic 6.0": 2,
"Volt": 2,
"Vue": 3,
"Vyper": 2,
"WDL": 2,
"WGSL": 2,
"Wavefront Material": 1,
"Wavefront Object": 1,
"Web Ontology Language": 1,
"WebAssembly": 2,
"WebAssembly Interface Type": 1,
"WebIDL": 2,
"WebVTT": 1,
"Wget Config": 1,
"Whiley": 2,
"Wikitext": 4,
"Win32 Message File": 1,
"Windows Registry Entries": 1,
"Witcher Script": 2,
"Wollok": 2,
"World of Warcraft Addon Data": 1,
"Wren": 2,
"X BitMap": 1,
"X Font Directory Index": 1,
"X PixMap": 1,
"X10": 2,
"XC": 2,
"XCompose": 1,
"XML": 1,
"XML Property List": 1,
"XPages": 1,
"XProc": 2,
"XQuery": 2,
"XS": 2,
"XSLT": 2,
"Xojo": 2,
"Xonsh": 2,
"Xtend": 2,
"YAML": 1,
"YANG": 1,
"YARA": 2,
"YASnippet": 3,
"Yacc": 2,
"Yul": 2,
"ZAP": 2,
"ZIL": 2,
"Zeek": 2,
"ZenScript": 2,
"Zephir": 2,
"Zig": 2,
"Zimpl": 2,
"cURL Config": 1,
"crontab": 1,
"desktop": 1,
"dircolors": 1,
"eC": 2,
"edn": 1,
"fish": 2,
"hoon": 2,
"jq": 2,
"kvlang": 3,
"mIRC Script": 2,
"mcfunction": 2,
"mupad": 2,
"nanorc": 1,
"nesC": 2,
"ooc": 2,
"q": 2,
"reStructuredText": 4,
"robots.txt": 1,
"sed": 2,
"templ": 3,
"wisp": 2,
"xBase": 2,
}

View File

@ -0,0 +1,179 @@
// Code generated by github.com/go-enry/go-enry/v2/internal/code-generator DO NOT EDIT.
// Extracted from github/linguist commit: 42fd3c2514375fc6ad281552368edd3fb9f6ee7f
package data
import "github.com/go-enry/go-enry/v2/regex"
var VendorMatchers = []regex.EnryRegexp{
regex.MustCompile(`(^|/)cache/`),
regex.MustCompile(`^[Dd]ependencies/`),
regex.MustCompile(`(^|/)dist/`),
regex.MustCompile(`^deps/`),
regex.MustCompile(`(^|/)configure$`),
regex.MustCompile(`(^|/)config\.guess$`),
regex.MustCompile(`(^|/)config\.sub$`),
regex.MustCompile(`(^|/)aclocal\.m4`),
regex.MustCompile(`(^|/)libtool\.m4`),
regex.MustCompile(`(^|/)ltoptions\.m4`),
regex.MustCompile(`(^|/)ltsugar\.m4`),
regex.MustCompile(`(^|/)ltversion\.m4`),
regex.MustCompile(`(^|/)lt~obsolete\.m4`),
regex.MustCompile(`(^|/)dotnet-install\.(ps1|sh)$`),
regex.MustCompile(`(^|/)cpplint\.py`),
regex.MustCompile(`(^|/)node_modules/`),
regex.MustCompile(`(^|/)\.yarn/releases/`),
regex.MustCompile(`(^|/)\.yarn/plugins/`),
regex.MustCompile(`(^|/)\.yarn/sdks/`),
regex.MustCompile(`(^|/)\.yarn/versions/`),
regex.MustCompile(`(^|/)\.yarn/unplugged/`),
regex.MustCompile(`(^|/)_esy$`),
regex.MustCompile(`(^|/)bower_components/`),
regex.MustCompile(`^rebar$`),
regex.MustCompile(`(^|/)erlang\.mk`),
regex.MustCompile(`(^|/)Godeps/_workspace/`),
regex.MustCompile(`(^|/)testdata/`),
regex.MustCompile(`(^|/)\.indent\.pro`),
regex.MustCompile(`(\.|-)min\.(js|css)$`),
regex.MustCompile(`([^\s]*)import\.(css|less|scss|styl)$`),
regex.MustCompile(`(^|/)bootstrap([^/.]*)(\..*)?\.(js|css|less|scss|styl)$`),
regex.MustCompile(`(^|/)custom\.bootstrap([^\s]*)(js|css|less|scss|styl)$`),
regex.MustCompile(`(^|/)font-?awesome\.(css|less|scss|styl)$`),
regex.MustCompile(`(^|/)font-?awesome/.*\.(css|less|scss|styl)$`),
regex.MustCompile(`(^|/)foundation\.(css|less|scss|styl)$`),
regex.MustCompile(`(^|/)normalize\.(css|less|scss|styl)$`),
regex.MustCompile(`(^|/)skeleton\.(css|less|scss|styl)$`),
regex.MustCompile(`(^|/)[Bb]ourbon/.*\.(css|less|scss|styl)$`),
regex.MustCompile(`(^|/)animate\.(css|less|scss|styl)$`),
regex.MustCompile(`(^|/)materialize\.(css|less|scss|styl|js)$`),
regex.MustCompile(`(^|/)select2/.*\.(css|scss|js)$`),
regex.MustCompile(`(^|/)bulma\.(css|sass|scss)$`),
regex.MustCompile(`(3rd|[Tt]hird)[-_]?[Pp]arty/`),
regex.MustCompile(`(^|/)vendors?/`),
regex.MustCompile(`(^|/)[Ee]xtern(als?)?/`),
regex.MustCompile(`(^|/)[Vv]+endor/`),
regex.MustCompile(`^debian/`),
regex.MustCompile(`(^|/)run\.n$`),
regex.MustCompile(`(^|/)bootstrap-datepicker/`),
regex.MustCompile(`(^|/)jquery([^.]*)\.js$`),
regex.MustCompile(`(^|/)jquery\-\d\.\d+(\.\d+)?\.js$`),
regex.MustCompile(`(^|/)jquery\-ui(\-\d\.\d+(\.\d+)?)?(\.\w+)?\.(js|css)$`),
regex.MustCompile(`(^|/)jquery\.(ui|effects)\.([^.]*)\.(js|css)$`),
regex.MustCompile(`(^|/)jquery\.fn\.gantt\.js`),
regex.MustCompile(`(^|/)jquery\.fancybox\.(js|css)`),
regex.MustCompile(`(^|/)fuelux\.js`),
regex.MustCompile(`(^|/)jquery\.fileupload(-\w+)?\.js$`),
regex.MustCompile(`(^|/)jquery\.dataTables\.js`),
regex.MustCompile(`(^|/)bootbox\.js`),
regex.MustCompile(`(^|/)pdf\.worker\.js`),
regex.MustCompile(`(^|/)slick\.\w+.js$`),
regex.MustCompile(`(^|/)Leaflet\.Coordinates-\d+\.\d+\.\d+\.src\.js$`),
regex.MustCompile(`(^|/)leaflet\.draw-src\.js`),
regex.MustCompile(`(^|/)leaflet\.draw\.css`),
regex.MustCompile(`(^|/)Control\.FullScreen\.css`),
regex.MustCompile(`(^|/)Control\.FullScreen\.js`),
regex.MustCompile(`(^|/)leaflet\.spin\.js`),
regex.MustCompile(`(^|/)wicket-leaflet\.js`),
regex.MustCompile(`(^|/)\.sublime-project`),
regex.MustCompile(`(^|/)\.sublime-workspace`),
regex.MustCompile(`(^|/)\.vscode/`),
regex.MustCompile(`(^|/)prototype(.*)\.js$`),
regex.MustCompile(`(^|/)effects\.js$`),
regex.MustCompile(`(^|/)controls\.js$`),
regex.MustCompile(`(^|/)dragdrop\.js$`),
regex.MustCompile(`(.*?)\.d\.ts$`),
regex.MustCompile(`(^|/)mootools([^.]*)\d+\.\d+.\d+([^.]*)\.js$`),
regex.MustCompile(`(^|/)dojo\.js$`),
regex.MustCompile(`(^|/)MochiKit\.js$`),
regex.MustCompile(`(^|/)yahoo-([^.]*)\.js$`),
regex.MustCompile(`(^|/)yui([^.]*)\.js$`),
regex.MustCompile(`(^|/)ckeditor\.js$`),
regex.MustCompile(`(^|/)tiny_mce([^.]*)\.js$`),
regex.MustCompile(`(^|/)tiny_mce/(langs|plugins|themes|utils)`),
regex.MustCompile(`(^|/)ace-builds/`),
regex.MustCompile(`(^|/)fontello(.*?)\.css$`),
regex.MustCompile(`(^|/)MathJax/`),
regex.MustCompile(`(^|/)Chart\.js$`),
regex.MustCompile(`(^|/)[Cc]ode[Mm]irror/(\d+\.\d+/)?(lib|mode|theme|addon|keymap|demo)`),
regex.MustCompile(`(^|/)shBrush([^.]*)\.js$`),
regex.MustCompile(`(^|/)shCore\.js$`),
regex.MustCompile(`(^|/)shLegacy\.js$`),
regex.MustCompile(`(^|/)angular([^.]*)\.js$`),
regex.MustCompile(`(^|\/)d3(\.v\d+)?([^.]*)\.js$`),
regex.MustCompile(`(^|/)react(-[^.]*)?\.js$`),
regex.MustCompile(`(^|/)flow-typed/.*\.js$`),
regex.MustCompile(`(^|/)modernizr\-\d\.\d+(\.\d+)?\.js$`),
regex.MustCompile(`(^|/)modernizr\.custom\.\d+\.js$`),
regex.MustCompile(`(^|/)knockout-(\d+\.){3}(debug\.)?js$`),
regex.MustCompile(`(^|/)docs?/_?(build|themes?|templates?|static)/`),
regex.MustCompile(`(^|/)admin_media/`),
regex.MustCompile(`(^|/)env/`),
regex.MustCompile(`(^|/)fabfile\.py$`),
regex.MustCompile(`(^|/)waf$`),
regex.MustCompile(`(^|/)\.osx$`),
regex.MustCompile(`\.xctemplate/`),
regex.MustCompile(`\.imageset/`),
regex.MustCompile(`(^|/)Carthage/`),
regex.MustCompile(`(^|/)Sparkle/`),
regex.MustCompile(`(^|/)Crashlytics\.framework/`),
regex.MustCompile(`(^|/)Fabric\.framework/`),
regex.MustCompile(`(^|/)BuddyBuildSDK\.framework/`),
regex.MustCompile(`(^|/)Realm\.framework`),
regex.MustCompile(`(^|/)RealmSwift\.framework`),
regex.MustCompile(`(^|/)\.gitattributes$`),
regex.MustCompile(`(^|/)\.gitignore$`),
regex.MustCompile(`(^|/)\.gitmodules$`),
regex.MustCompile(`(^|/)gradlew$`),
regex.MustCompile(`(^|/)gradlew\.bat$`),
regex.MustCompile(`(^|/)gradle/wrapper/`),
regex.MustCompile(`(^|/)mvnw$`),
regex.MustCompile(`(^|/)mvnw\.cmd$`),
regex.MustCompile(`(^|/)\.mvn/wrapper/`),
regex.MustCompile(`-vsdoc\.js$`),
regex.MustCompile(`\.intellisense\.js$`),
regex.MustCompile(`(^|/)jquery([^.]*)\.validate(\.unobtrusive)?\.js$`),
regex.MustCompile(`(^|/)jquery([^.]*)\.unobtrusive\-ajax\.js$`),
regex.MustCompile(`(^|/)[Mm]icrosoft([Mm]vc)?([Aa]jax|[Vv]alidation)(\.debug)?\.js$`),
regex.MustCompile(`(^|/)[Pp]ackages\/.+\.\d+\/`),
regex.MustCompile(`(^|/)extjs/.*?\.js$`),
regex.MustCompile(`(^|/)extjs/.*?\.xml$`),
regex.MustCompile(`(^|/)extjs/.*?\.txt$`),
regex.MustCompile(`(^|/)extjs/.*?\.html$`),
regex.MustCompile(`(^|/)extjs/.*?\.properties$`),
regex.MustCompile(`(^|/)extjs/\.sencha/`),
regex.MustCompile(`(^|/)extjs/docs/`),
regex.MustCompile(`(^|/)extjs/builds/`),
regex.MustCompile(`(^|/)extjs/cmd/`),
regex.MustCompile(`(^|/)extjs/examples/`),
regex.MustCompile(`(^|/)extjs/locale/`),
regex.MustCompile(`(^|/)extjs/packages/`),
regex.MustCompile(`(^|/)extjs/plugins/`),
regex.MustCompile(`(^|/)extjs/resources/`),
regex.MustCompile(`(^|/)extjs/src/`),
regex.MustCompile(`(^|/)extjs/welcome/`),
regex.MustCompile(`(^|/)html5shiv\.js$`),
regex.MustCompile(`(^|/)[Tt]ests?/fixtures/`),
regex.MustCompile(`(^|/)[Ss]pecs?/fixtures/`),
regex.MustCompile(`(^|/)cordova([^.]*)\.js$`),
regex.MustCompile(`(^|/)cordova\-\d\.\d(\.\d)?\.js$`),
regex.MustCompile(`(^|/)foundation(\..*)?\.js$`),
regex.MustCompile(`(^|/)Vagrantfile$`),
regex.MustCompile(`(^|/)\.[Dd][Ss]_[Ss]tore$`),
regex.MustCompile(`(^|/)inst/extdata/`),
regex.MustCompile(`(^|/)octicons\.css`),
regex.MustCompile(`(^|/)sprockets-octicons\.scss`),
regex.MustCompile(`(^|/)activator$`),
regex.MustCompile(`(^|/)activator\.bat$`),
regex.MustCompile(`(^|/)proguard\.pro$`),
regex.MustCompile(`(^|/)proguard-rules\.pro$`),
regex.MustCompile(`(^|/)puphpet/`),
regex.MustCompile(`(^|/)\.google_apis/`),
regex.MustCompile(`(^|/)Jenkinsfile$`),
regex.MustCompile(`(^|/)\.gitpod\.Dockerfile$`),
regex.MustCompile(`(^|/)\.github/`),
regex.MustCompile(`(^|/)\.obsidian/`),
regex.MustCompile(`(^|/)\.teamcity/`),
}
// FastVendorMatcher is equivalent to matching any of the VendorMatchers.
var FastVendorMatcher = regex.MustCompile(`(?:^(?:(?:[Dd]ependencies/)|(?:debian/)|(?:deps/)|(?:rebar$)))|(?:(?:^|/)(?:(?:BuddyBuildSDK\.framework/)|(?:Carthage/)|(?:Chart\.js$)|(?:Control\.FullScreen\.css)|(?:Control\.FullScreen\.js)|(?:Crashlytics\.framework/)|(?:Fabric\.framework/)|(?:Godeps/_workspace/)|(?:Jenkinsfile$)|(?:Leaflet\.Coordinates-\d+\.\d+\.\d+\.src\.js$)|(?:MathJax/)|(?:MochiKit\.js$)|(?:RealmSwift\.framework)|(?:Realm\.framework)|(?:Sparkle/)|(?:Vagrantfile$)|(?:[Bb]ourbon/.*\.(css|less|scss|styl)$)|(?:[Cc]ode[Mm]irror/(\d+\.\d+/)?(lib|mode|theme|addon|keymap|demo))|(?:[Ee]xtern(als?)?/)|(?:[Mm]icrosoft([Mm]vc)?([Aa]jax|[Vv]alidation)(\.debug)?\.js$)|(?:[Pp]ackages\/.+\.\d+\/)|(?:[Ss]pecs?/fixtures/)|(?:[Tt]ests?/fixtures/)|(?:[Vv]+endor/)|(?:\.[Dd][Ss]_[Ss]tore$)|(?:\.gitattributes$)|(?:\.github/)|(?:\.gitignore$)|(?:\.gitmodules$)|(?:\.gitpod\.Dockerfile$)|(?:\.google_apis/)|(?:\.indent\.pro)|(?:\.mvn/wrapper/)|(?:\.obsidian/)|(?:\.osx$)|(?:\.sublime-project)|(?:\.sublime-workspace)|(?:\.teamcity/)|(?:\.vscode/)|(?:\.yarn/plugins/)|(?:\.yarn/releases/)|(?:\.yarn/sdks/)|(?:\.yarn/unplugged/)|(?:\.yarn/versions/)|(?:_esy$)|(?:ace-builds/)|(?:aclocal\.m4)|(?:activator$)|(?:activator\.bat$)|(?:admin_media/)|(?:angular([^.]*)\.js$)|(?:animate\.(css|less|scss|styl)$)|(?:bootbox\.js)|(?:bootstrap([^/.]*)(\..*)?\.(js|css|less|scss|styl)$)|(?:bootstrap-datepicker/)|(?:bower_components/)|(?:bulma\.(css|sass|scss)$)|(?:cache/)|(?:ckeditor\.js$)|(?:config\.guess$)|(?:config\.sub$)|(?:configure$)|(?:controls\.js$)|(?:cordova([^.]*)\.js$)|(?:cordova\-\d\.\d(\.\d)?\.js$)|(?:cpplint\.py)|(?:custom\.bootstrap([^\s]*)(js|css|less|scss|styl)$)|(?:dist/)|(?:docs?/_?(build|themes?|templates?|static)/)|(?:dojo\.js$)|(?:dotnet-install\.(ps1|sh)$)|(?:dragdrop\.js$)|(?:effects\.js$)|(?:env/)|(?:erlang\.mk)|(?:extjs/.*?\.html$)|(?:extjs/.*?\.js$)|(?:extjs/.*?\.properties$)|(?:extjs/.*?\.txt$)|(?:extjs/.*?\.xml$)|(?:extjs/\.sencha/)|(?:extjs/builds/)|(?:extjs/cmd/)|(?:extjs/docs/)|(?:extjs/examples/)|(?:extjs/locale/)|(?:extjs/packages/)|(?:extjs/plugins/)|(?:extjs/resources/)|(?:extjs/src/)|(?:extjs/welcome/)|(?:fabfile\.py$)|(?:flow-typed/.*\.js$)|(?:font-?awesome/.*\.(css|less|scss|styl)$)|(?:font-?awesome\.(css|less|scss|styl)$)|(?:fontello(.*?)\.css$)|(?:foundation(\..*)?\.js$)|(?:foundation\.(css|less|scss|styl)$)|(?:fuelux\.js)|(?:gradle/wrapper/)|(?:gradlew$)|(?:gradlew\.bat$)|(?:html5shiv\.js$)|(?:inst/extdata/)|(?:jquery([^.]*)\.js$)|(?:jquery([^.]*)\.unobtrusive\-ajax\.js$)|(?:jquery([^.]*)\.validate(\.unobtrusive)?\.js$)|(?:jquery\-\d\.\d+(\.\d+)?\.js$)|(?:jquery\-ui(\-\d\.\d+(\.\d+)?)?(\.\w+)?\.(js|css)$)|(?:jquery\.(ui|effects)\.([^.]*)\.(js|css)$)|(?:jquery\.dataTables\.js)|(?:jquery\.fancybox\.(js|css))|(?:jquery\.fileupload(-\w+)?\.js$)|(?:jquery\.fn\.gantt\.js)|(?:knockout-(\d+\.){3}(debug\.)?js$)|(?:leaflet\.draw-src\.js)|(?:leaflet\.draw\.css)|(?:leaflet\.spin\.js)|(?:libtool\.m4)|(?:ltoptions\.m4)|(?:ltsugar\.m4)|(?:ltversion\.m4)|(?:lt~obsolete\.m4)|(?:materialize\.(css|less|scss|styl|js)$)|(?:modernizr\-\d\.\d+(\.\d+)?\.js$)|(?:modernizr\.custom\.\d+\.js$)|(?:mootools([^.]*)\d+\.\d+.\d+([^.]*)\.js$)|(?:mvnw$)|(?:mvnw\.cmd$)|(?:node_modules/)|(?:normalize\.(css|less|scss|styl)$)|(?:octicons\.css)|(?:pdf\.worker\.js)|(?:proguard-rules\.pro$)|(?:proguard\.pro$)|(?:prototype(.*)\.js$)|(?:puphpet/)|(?:react(-[^.]*)?\.js$)|(?:run\.n$)|(?:select2/.*\.(css|scss|js)$)|(?:shBrush([^.]*)\.js$)|(?:shCore\.js$)|(?:shLegacy\.js$)|(?:skeleton\.(css|less|scss|styl)$)|(?:slick\.\w+.js$)|(?:sprockets-octicons\.scss)|(?:testdata/)|(?:tiny_mce([^.]*)\.js$)|(?:tiny_mce/(langs|plugins|themes|utils))|(?:vendors?/)|(?:waf$)|(?:wicket-leaflet\.js)|(?:yahoo-([^.]*)\.js$)|(?:yui([^.]*)\.js$)))|(?:(.*?)\.d\.ts$)|(?:(3rd|[Tt]hird)[-_]?[Pp]arty/)|(?:([^\s]*)import\.(css|less|scss|styl)$)|(?:(\.|-)min\.(js|css)$)|(?:(^|\/)d3(\.v\d+)?([^.]*)\.js$)|(?:-vsdoc\.js$)|(?:\.imageset/)|(?:\.intellisense\.js$)|(?:\.xctemplate/)`)

View File

@ -0,0 +1,51 @@
package generator
import (
"bytes"
"io"
"io/ioutil"
"gopkg.in/yaml.v2"
)
var typeToTypeConst = map[string]int{
"data": 1,
"programming": 2,
"markup": 3,
"prose": 4,
}
// Types reads from fileToParse and builds source file from tmplPath. It complies with type File signature.
func Types(fileToParse, samplesDir, outPath, tmplPath, tmplName, commit string) error {
data, err := ioutil.ReadFile(fileToParse)
if err != nil {
return err
}
languages := make(map[string]*languageInfo)
if err := yaml.Unmarshal(data, &languages); err != nil {
return err
}
langTypeMap := buildLanguageTypeMap(languages)
buf := &bytes.Buffer{}
if err := executeTypesTemplate(buf, langTypeMap, tmplPath, tmplName, commit); err != nil {
return err
}
return formatedWrite(outPath, buf.Bytes())
}
func buildLanguageTypeMap(languages map[string]*languageInfo) map[string]int {
langTypeMap := make(map[string]int)
for lang, info := range languages {
langTypeMap[lang] = typeToTypeConst[info.Type]
}
return langTypeMap
}
func executeTypesTemplate(out io.Writer, langTypeMap map[string]int, tmplPath, tmplName, commit string) error {
return executeTemplate(out, tmplName, tmplPath, commit, nil, langTypeMap)
}

View File

@ -0,0 +1,104 @@
package generator
import (
"bytes"
"fmt"
"io"
"io/ioutil"
"log"
"sort"
"strings"
"text/template"
"gopkg.in/yaml.v2"
)
// Vendor generates regex matchers in Go for vendoring files/dirs.
// It is of generator.File type.
func Vendor(fileToParse, samplesDir, outPath, tmplPath, tmplName, commit string) error {
data, err := ioutil.ReadFile(fileToParse)
if err != nil {
return err
}
var regexps []string
if err := yaml.Unmarshal(data, &regexps); err != nil {
return fmt.Errorf("failed to parse YAML %s, %q", fileToParse, err)
}
for _, re := range regexps {
if !isRE2(re) {
log.Printf("RE2 incompatible syntax for vendor:'%s'\n", re)
}
}
buf := &bytes.Buffer{}
if err := executeVendorTemplate(buf, regexps, tmplPath, tmplName, commit); err != nil {
return err
}
return formatedWrite(outPath, buf.Bytes())
}
func executeVendorTemplate(out io.Writer, regexps []string, tmplPath, tmplName, commit string) error {
funcs := template.FuncMap{"collateAllRegexps": collateAllRegexps}
return executeTemplate(out, tmplName, tmplPath, commit, funcs, regexps)
}
// collateAllRegexps all regexps to a single large regexp.
func collateAllRegexps(regexps []string) string {
// which is at least twice as fast to test than simply iterating & matching.
//
// Imperical observation: by looking at the regexps, we only have 3 types.
// 1. Those that start with `^`
// 2. Those that start with `(^|/)`
// 3. All the rest
//
// If we collate our regexps into these 3 groups - that will significantly
// reduce the likelihood of backtracking within the regexp trie matcher.
//
// A further improvement is to use non-capturing groups (?:) as otherwise
// the regexp parser, whilst matching, will have to allocate slices for
// matching positions. (A future improvement left out could be to
// enforce non-capturing groups within the sub-regexps.)
const (
caret = "^"
caretOrSlash = "(^|/)"
)
sort.Strings(regexps)
// Check prefix, group expressions
var caretPrefixed, caretOrSlashPrefixed, theRest []string
for _, re := range regexps {
if strings.HasPrefix(re, caret) {
caretPrefixed = append(caretPrefixed, re[len(caret):])
} else if strings.HasPrefix(re, caretOrSlash) {
caretOrSlashPrefixed = append(caretOrSlashPrefixed, re[len(caretOrSlash):])
} else {
theRest = append(theRest, re)
}
}
var sb strings.Builder
appendGroupWithCommonPrefix(&sb, "^", caretPrefixed)
sb.WriteString("|")
appendGroupWithCommonPrefix(&sb, "(?:^|/)", caretOrSlashPrefixed)
sb.WriteString("|")
appendGroupWithCommonPrefix(&sb, "", theRest)
return sb.String()
}
func appendGroupWithCommonPrefix(sb *strings.Builder, commonPrefix string, res []string) {
sb.WriteString("(?:")
if commonPrefix != "" {
sb.WriteString(fmt.Sprintf("%s(?:(?:", commonPrefix))
}
sb.WriteString(strings.Join(res, ")|(?:"))
if commonPrefix != "" {
sb.WriteString("))")
}
sb.WriteString(")")
}

View File

@ -0,0 +1,157 @@
package main
import (
"io/ioutil"
"log"
"path/filepath"
"github.com/go-enry/go-enry/v2/internal/code-generator/generator"
)
var (
// directories
samplesDir = filepath.Join(".linguist", "samples")
libDir = filepath.Join(".linguist", "lib", "linguist")
assetsDir = filepath.Join("internal", "code-generator", "assets")
// languages info file
languagesYAML = filepath.Join(libDir, "languages.yml")
// extension.go generation
extensionsFile = filepath.Join("data", "extension.go")
extensionsTmplPath = filepath.Join(assetsDir, "extension.go.tmpl")
extensionsTmpl = "extension.go.tmpl"
// content.go generation
heuristicsYAML = filepath.Join(libDir, "heuristics.yml")
contentFile = filepath.Join("data", "content.go")
contentTmplPath = filepath.Join(assetsDir, "content.go.tmpl")
contentTmpl = "content.go.tmpl"
// vendor.go generation
vendorYAML = filepath.Join(libDir, "vendor.yml")
vendorFile = filepath.Join("data", "vendor.go")
vendorTmplPath = filepath.Join(assetsDir, "vendor.go.tmpl")
vendorTmpl = "vendor.go.tmpl"
// documentation.go generation
documentationYAML = filepath.Join(libDir, "documentation.yml")
documentationFile = filepath.Join("data", "documentation.go")
documentationTmplPath = filepath.Join(assetsDir, "documentation.go.tmpl")
documentationTmpl = "documentation.go.tmpl"
// type.go generation
typeFile = filepath.Join("data", "type.go")
typeTmplPath = filepath.Join(assetsDir, "type.go.tmpl")
typeTmpl = "type.go.tmpl"
// interpreter.go generation
interpretersFile = filepath.Join("data", "interpreter.go")
interpretersTmplPath = filepath.Join(assetsDir, "interpreter.go.tmpl")
interpretersTmpl = "interpreter.go.tmpl"
// filename.go generation
filenamesFile = filepath.Join("data", "filename.go")
filenamesTmplPath = filepath.Join(assetsDir, "filename.go.tmpl")
filenamesTmpl = "filename.go.tmpl"
// alias.go generation
aliasesFile = filepath.Join("data", "alias.go")
aliasesTmplPath = filepath.Join(assetsDir, "alias.go.tmpl")
aliasesTmpl = "alias.go.tmpl"
// frequencies.go generation
frequenciesFile = filepath.Join("data", "frequencies.go")
frequenciesTmplPath = filepath.Join(assetsDir, "frequencies.go.tmpl")
frequenciesTmpl = "frequencies.go.tmpl"
// commit.go generation
commitFile = filepath.Join("data", "commit.go")
commitTmplPath = filepath.Join(assetsDir, "commit.go.tmpl")
commitTmpl = "commit.go.tmpl"
// mimeType.go generation
mimeTypeFile = filepath.Join("data", "mimeType.go")
mimeTypeTmplPath = filepath.Join(assetsDir, "mimeType.go.tmpl")
mimeTypeTmpl = "mimeType.go.tmpl"
// colors.go generation
colorsFile = filepath.Join("data", "colors.go")
colorsTmplPath = filepath.Join(assetsDir, "colors.go.tmpl")
colorsTmpl = "colors.go.tmpl"
// groups.go generation
groupsFile = filepath.Join("data", "groups.go")
groupsTmplPath = filepath.Join(assetsDir, "groups.go.tmpl")
groupsTmpl = "groups.go.tmpl"
// id.go generation
idFile = "data/id.go"
idTmplPath = filepath.Join(assetsDir, "id.go.tmpl")
idTmpl = "id.go.tmpl"
// languageInfo.go generation
languageInfoFile = filepath.Join("data", "languageInfo.go")
langaugeInfoTmplPath = filepath.Join(assetsDir, "languageInfo.go.tmpl")
langaugeInfoTmpl = "languageInfo.go.tmpl"
commitPath = filepath.Join(".linguist", ".git", "HEAD")
)
type generatorFiles struct {
generate generator.File
fileToParse string
samplesDir string
outPath string
tmplPath string
tmplName string
commit string
}
func main() {
commit, err := getCommit(commitPath)
if err != nil {
log.Printf("couldn't find commit: %v", err)
}
fileList := []*generatorFiles{
{generator.Extensions, languagesYAML, "", extensionsFile, extensionsTmplPath, extensionsTmpl, commit},
{generator.GenHeuristics, heuristicsYAML, "", contentFile, contentTmplPath, contentTmpl, commit},
{generator.Vendor, vendorYAML, "", vendorFile, vendorTmplPath, vendorTmpl, commit},
{generator.Documentation, documentationYAML, "", documentationFile, documentationTmplPath, documentationTmpl, commit},
{generator.Types, languagesYAML, "", typeFile, typeTmplPath, typeTmpl, commit},
{generator.Interpreters, languagesYAML, "", interpretersFile, interpretersTmplPath, interpretersTmpl, commit},
{generator.Filenames, languagesYAML, samplesDir, filenamesFile, filenamesTmplPath, filenamesTmpl, commit},
{generator.Aliases, languagesYAML, "", aliasesFile, aliasesTmplPath, aliasesTmpl, commit},
{generator.Frequencies, "", samplesDir, frequenciesFile, frequenciesTmplPath, frequenciesTmpl, commit},
{generator.Commit, "", "", commitFile, commitTmplPath, commitTmpl, commit},
{generator.MimeType, languagesYAML, "", mimeTypeFile, mimeTypeTmplPath, mimeTypeTmpl, commit},
{generator.Colors, languagesYAML, "", colorsFile, colorsTmplPath, colorsTmpl, commit},
{generator.Groups, languagesYAML, "", groupsFile, groupsTmplPath, groupsTmpl, commit},
{generator.ID, languagesYAML, "", idFile, idTmplPath, idTmpl, commit},
{generator.LanguageInfo, languagesYAML, "", languageInfoFile, langaugeInfoTmplPath, langaugeInfoTmpl, commit},
}
for _, file := range fileList {
if err := file.generate(file.fileToParse, file.samplesDir, file.outPath, file.tmplPath, file.tmplName, file.commit); err != nil {
log.Fatalf("failed to generate %q from %q - %+v", file.outPath, file.tmplPath, err)
}
}
}
func getCommit(path string) (string, error) {
commit, err := ioutil.ReadFile(path)
if err != nil {
return "", err
}
if string(commit) == "ref: refs/heads/master\n" {
path = filepath.Join(".linguist", ".git", string(commit[5:len(commit)-1]))
commit, err = ioutil.ReadFile(path)
if err != nil {
return "", err
}
}
return string(commit[:len(commit)-1]), nil
}

52
internal/tests/utils.go Normal file
View File

@ -0,0 +1,52 @@
package tests
import (
"fmt"
"io/ioutil"
"os"
"os/exec"
)
// Re-used by the packages: enry (test), enry (benchmark) and code-generator (test).
// Does not rely on testify, panics on errors so that there always is a trace to the caller.
func MaybeCloneLinguist(envVar, url, commit string) (string, bool, error) {
var err error
linguistTmpDir := os.Getenv(envVar)
isCleanupNeeded := false
isLinguistCloned := linguistTmpDir != ""
if !isLinguistCloned {
linguistTmpDir, err = ioutil.TempDir("", "linguist-")
if err != nil {
panic(err)
}
isCleanupNeeded = true
cmd := exec.Command("git", "clone", "--depth", "150", url, linguistTmpDir)
if err := cmd.Run(); err != nil {
panicOn(cmd.String(), err)
}
}
cwd, err := os.Getwd()
if err != nil {
panic(err)
}
if err = os.Chdir(linguistTmpDir); err != nil {
panic(err)
}
cmd := exec.Command("git", "checkout", commit)
if err := cmd.Run(); err != nil {
panicOn(cmd.String(), err)
}
if err = os.Chdir(cwd); err != nil {
panicOn(cmd.String(), err)
}
return linguistTmpDir, isCleanupNeeded, nil
}
func panicOn(cmd string, err error) {
panic(fmt.Errorf("%q returned %w", cmd, err))
}

View File

@ -0,0 +1,7 @@
// Package tokenizer implements file tokenization used by the enry content
// classifier. This package is an implementation detail of enry and should not
// be imported by other packages.
package tokenizer
// ByteLimit defines the maximum prefix of an input text that will be tokenized.
const ByteLimit = 100000

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,336 @@
#ifndef linguist_yyHEADER_H
#define linguist_yyHEADER_H 1
#define linguist_yyIN_HEADER 1
#line 6 "lex.linguist_yy.h"
#define YY_INT_ALIGNED short int
/* A lexical scanner generated by flex */
#define FLEX_SCANNER
#define YY_FLEX_MAJOR_VERSION 2
#define YY_FLEX_MINOR_VERSION 5
#define YY_FLEX_SUBMINOR_VERSION 35
#if YY_FLEX_SUBMINOR_VERSION > 0
#define FLEX_BETA
#endif
/* First, we deal with platform-specific or compiler-specific issues. */
/* begin standard C headers. */
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <stdlib.h>
/* end standard C headers. */
/* flex integer type definitions */
#ifndef FLEXINT_H
#define FLEXINT_H
/* C99 systems have <inttypes.h>. Non-C99 systems may or may not. */
#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
/* C99 says to define __STDC_LIMIT_MACROS before including stdint.h,
* if you want the limit (max/min) macros for int types.
*/
#ifndef __STDC_LIMIT_MACROS
#define __STDC_LIMIT_MACROS 1
#endif
#include <inttypes.h>
typedef int8_t flex_int8_t;
typedef uint8_t flex_uint8_t;
typedef int16_t flex_int16_t;
typedef uint16_t flex_uint16_t;
typedef int32_t flex_int32_t;
typedef uint32_t flex_uint32_t;
typedef uint64_t flex_uint64_t;
#else
typedef signed char flex_int8_t;
typedef short int flex_int16_t;
typedef int flex_int32_t;
typedef unsigned char flex_uint8_t;
typedef unsigned short int flex_uint16_t;
typedef unsigned int flex_uint32_t;
#endif /* ! C99 */
/* Limits of integral types. */
#ifndef INT8_MIN
#define INT8_MIN (-128)
#endif
#ifndef INT16_MIN
#define INT16_MIN (-32767-1)
#endif
#ifndef INT32_MIN
#define INT32_MIN (-2147483647-1)
#endif
#ifndef INT8_MAX
#define INT8_MAX (127)
#endif
#ifndef INT16_MAX
#define INT16_MAX (32767)
#endif
#ifndef INT32_MAX
#define INT32_MAX (2147483647)
#endif
#ifndef UINT8_MAX
#define UINT8_MAX (255U)
#endif
#ifndef UINT16_MAX
#define UINT16_MAX (65535U)
#endif
#ifndef UINT32_MAX
#define UINT32_MAX (4294967295U)
#endif
#endif /* ! FLEXINT_H */
#ifdef __cplusplus
/* The "const" storage-class-modifier is valid. */
#define YY_USE_CONST
#else /* ! __cplusplus */
/* C99 requires __STDC__ to be defined as 1. */
#if defined (__STDC__)
#define YY_USE_CONST
#endif /* defined (__STDC__) */
#endif /* ! __cplusplus */
#ifdef YY_USE_CONST
#define yyconst const
#else
#define yyconst
#endif
/* An opaque pointer. */
#ifndef YY_TYPEDEF_YY_SCANNER_T
#define YY_TYPEDEF_YY_SCANNER_T
typedef void* yyscan_t;
#endif
/* For convenience, these vars (plus the bison vars far below)
are macros in the reentrant scanner. */
#define yyin yyg->yyin_r
#define yyout yyg->yyout_r
#define yyextra yyg->yyextra_r
#define yyleng yyg->yyleng_r
#define yytext yyg->yytext_r
#define yylineno (YY_CURRENT_BUFFER_LVALUE->yy_bs_lineno)
#define yycolumn (YY_CURRENT_BUFFER_LVALUE->yy_bs_column)
#define yy_flex_debug yyg->yy_flex_debug_r
/* Size of default input buffer. */
#ifndef YY_BUF_SIZE
#define YY_BUF_SIZE 16384
#endif
#ifndef YY_TYPEDEF_YY_BUFFER_STATE
#define YY_TYPEDEF_YY_BUFFER_STATE
typedef struct yy_buffer_state *YY_BUFFER_STATE;
#endif
#ifndef YY_TYPEDEF_YY_SIZE_T
#define YY_TYPEDEF_YY_SIZE_T
typedef size_t yy_size_t;
#endif
#ifndef YY_STRUCT_YY_BUFFER_STATE
#define YY_STRUCT_YY_BUFFER_STATE
struct yy_buffer_state
{
FILE *yy_input_file;
char *yy_ch_buf; /* input buffer */
char *yy_buf_pos; /* current position in input buffer */
/* Size of input buffer in bytes, not including room for EOB
* characters.
*/
yy_size_t yy_buf_size;
/* Number of characters read into yy_ch_buf, not including EOB
* characters.
*/
yy_size_t yy_n_chars;
/* Whether we "own" the buffer - i.e., we know we created it,
* and can realloc() it to grow it, and should free() it to
* delete it.
*/
int yy_is_our_buffer;
/* Whether this is an "interactive" input source; if so, and
* if we're using stdio for input, then we want to use getc()
* instead of fread(), to make sure we stop fetching input after
* each newline.
*/
int yy_is_interactive;
/* Whether we're considered to be at the beginning of a line.
* If so, '^' rules will be active on the next match, otherwise
* not.
*/
int yy_at_bol;
int yy_bs_lineno; /**< The line count. */
int yy_bs_column; /**< The column count. */
/* Whether to try to fill the input buffer when we reach the
* end of it.
*/
int yy_fill_buffer;
int yy_buffer_status;
};
#endif /* !YY_STRUCT_YY_BUFFER_STATE */
void linguist_yyrestart (FILE *input_file ,yyscan_t yyscanner );
void linguist_yy_switch_to_buffer (YY_BUFFER_STATE new_buffer ,yyscan_t yyscanner );
YY_BUFFER_STATE linguist_yy_create_buffer (FILE *file,int size ,yyscan_t yyscanner );
void linguist_yy_delete_buffer (YY_BUFFER_STATE b ,yyscan_t yyscanner );
void linguist_yy_flush_buffer (YY_BUFFER_STATE b ,yyscan_t yyscanner );
void linguist_yypush_buffer_state (YY_BUFFER_STATE new_buffer ,yyscan_t yyscanner );
void linguist_yypop_buffer_state (yyscan_t yyscanner );
YY_BUFFER_STATE linguist_yy_scan_buffer (char *base,yy_size_t size ,yyscan_t yyscanner );
YY_BUFFER_STATE linguist_yy_scan_string (yyconst char *yy_str ,yyscan_t yyscanner );
YY_BUFFER_STATE linguist_yy_scan_bytes (yyconst char *bytes,yy_size_t len ,yyscan_t yyscanner );
void *linguist_yyalloc (yy_size_t ,yyscan_t yyscanner );
void *linguist_yyrealloc (void *,yy_size_t ,yyscan_t yyscanner );
void linguist_yyfree (void * ,yyscan_t yyscanner );
/* Begin user sect3 */
#define yytext_ptr yytext_r
#ifdef YY_HEADER_EXPORT_START_CONDITIONS
#define INITIAL 0
#define sgml 1
#define c_comment 2
#define xml_comment 3
#define haskell_comment 4
#define ocaml_comment 5
#define python_dcomment 6
#define python_scomment 7
#endif
#ifndef YY_NO_UNISTD_H
/* Special case for "unistd.h", since it is non-ANSI. We include it way
* down here because we want the user's section 1 to have been scanned first.
* The user has a chance to override it with an option.
*/
#include <unistd.h>
#endif
#define YY_EXTRA_TYPE struct tokenizer_extra *
int linguist_yylex_init (yyscan_t* scanner);
int linguist_yylex_init_extra (YY_EXTRA_TYPE user_defined,yyscan_t* scanner);
/* Accessor methods to globals.
These are made visible to non-reentrant scanners for convenience. */
int linguist_yylex_destroy (yyscan_t yyscanner );
int linguist_yyget_debug (yyscan_t yyscanner );
void linguist_yyset_debug (int debug_flag ,yyscan_t yyscanner );
YY_EXTRA_TYPE linguist_yyget_extra (yyscan_t yyscanner );
void linguist_yyset_extra (YY_EXTRA_TYPE user_defined ,yyscan_t yyscanner );
FILE *linguist_yyget_in (yyscan_t yyscanner );
void linguist_yyset_in (FILE * in_str ,yyscan_t yyscanner );
FILE *linguist_yyget_out (yyscan_t yyscanner );
void linguist_yyset_out (FILE * out_str ,yyscan_t yyscanner );
yy_size_t linguist_yyget_leng (yyscan_t yyscanner );
char *linguist_yyget_text (yyscan_t yyscanner );
int linguist_yyget_lineno (yyscan_t yyscanner );
void linguist_yyset_lineno (int line_number ,yyscan_t yyscanner );
/* Macros after this point can all be overridden by user definitions in
* section 1.
*/
#ifndef YY_SKIP_YYWRAP
#ifdef __cplusplus
extern "C" int linguist_yywrap (yyscan_t yyscanner );
#else
extern int linguist_yywrap (yyscan_t yyscanner );
#endif
#endif
#ifndef yytext_ptr
static void yy_flex_strncpy (char *,yyconst char *,int ,yyscan_t yyscanner);
#endif
#ifdef YY_NEED_STRLEN
static int yy_flex_strlen (yyconst char * ,yyscan_t yyscanner);
#endif
#ifndef YY_NO_INPUT
#endif
/* Amount of stuff to slurp up with each read. */
#ifndef YY_READ_BUF_SIZE
#define YY_READ_BUF_SIZE 8192
#endif
/* Number of entries by which start-condition stack grows. */
#ifndef YY_START_STACK_INCR
#define YY_START_STACK_INCR 25
#endif
/* Default declaration of generated scanner - a define so the user can
* easily add parameters.
*/
#ifndef YY_DECL
#define YY_DECL_IS_OURS 1
extern int linguist_yylex (yyscan_t yyscanner);
#define YY_DECL int linguist_yylex (yyscan_t yyscanner)
#endif /* !YY_DECL */
/* yy_get_previous_state - get the state just before the EOB char was reached */
#undef YY_NEW_FILE
#undef YY_FLUSH_BUFFER
#undef yy_set_bol
#undef yy_new_buffer
#undef yy_set_interactive
#undef YY_DO_BEFORE_ACTION
#ifdef YY_DECL_IS_OURS
#undef YY_DECL_IS_OURS
#undef YY_DECL
#endif
#line 118 "tokenizer.l"
#line 335 "lex.linguist_yy.h"
#undef linguist_yyIN_HEADER
#endif /* linguist_yyHEADER_H */

View File

@ -0,0 +1,15 @@
// https://github.com/github/linguist/blob/f72f2a21dfe80ebd16af3bc6216da75cd983a4f6/ext/linguist/linguist.h#L1
enum tokenizer_type {
NO_ACTION,
REGULAR_TOKEN,
SHEBANG_TOKEN,
SGML_TOKEN,
};
struct tokenizer_extra {
char *token;
enum tokenizer_type type;
};
// TODO(bzz) port Win support from
// https://github.com/github/linguist/commit/8e912b4d8bf2aef7948de59eba48b75cfcbc97e0

View File

@ -0,0 +1,73 @@
// +build flex
package flex
// #include <stdlib.h>
// #include "linguist.h"
// #include "lex.linguist_yy.h"
// int linguist_yywrap(yyscan_t yyscanner) {
// return 1;
// }
import "C"
import "unsafe"
const maxTokenLen = 32 // bytes
// TokenizeFlex implements tokenizer by calling Flex generated code from linguist in C
// This is a transliteration from C https://github.com/github/linguist/blob/master/ext/linguist/linguist.c#L12
func TokenizeFlex(content []byte) []string {
var buf C.YY_BUFFER_STATE
var scanner C.yyscan_t
var extra C.struct_tokenizer_extra
var _len C.ulong
var r C.int
_len = C.ulong(len(content))
cs := C.CBytes(content)
defer C.free(unsafe.Pointer(cs))
C.linguist_yylex_init_extra(&extra, &scanner)
buf = C.linguist_yy_scan_bytes((*C.char)(cs), _len, scanner)
ary := []string{}
for {
extra._type = C.NO_ACTION
extra.token = nil
r = C.linguist_yylex(scanner)
switch extra._type {
case C.NO_ACTION:
break
case C.REGULAR_TOKEN:
_len = C.strlen(extra.token)
if _len <= maxTokenLen {
ary = append(ary, C.GoStringN(extra.token, (C.int)(_len)))
}
C.free(unsafe.Pointer(extra.token))
break
case C.SHEBANG_TOKEN:
_len = C.strlen(extra.token)
if _len <= maxTokenLen {
s := "SHEBANG#!" + C.GoStringN(extra.token, (C.int)(_len))
ary = append(ary, s)
}
C.free(unsafe.Pointer(extra.token))
break
case C.SGML_TOKEN:
_len = C.strlen(extra.token)
if _len <= maxTokenLen {
s := C.GoStringN(extra.token, (C.int)(_len)) + ">"
ary = append(ary, s)
}
C.free(unsafe.Pointer(extra.token))
break
}
if r == 0 {
break
}
}
C.linguist_yy_delete_buffer(buf, scanner)
C.linguist_yylex_destroy(scanner)
return ary
}

View File

@ -0,0 +1,210 @@
// +build !flex
package tokenizer
import (
"bytes"
"github.com/go-enry/go-enry/v2/regex"
)
// Tokenize returns lexical tokens from content. The tokens returned match what
// the Linguist library returns. At most the first ByteLimit bytes of content are tokenized.
//
// BUG: Until https://github.com/src-d/enry/issues/193 is resolved, there are some
// differences between this function and the Linguist output.
func Tokenize(content []byte) []string {
if len(content) > ByteLimit {
content = content[:ByteLimit]
}
tokens := make([][]byte, 0, 50)
for _, extract := range extractTokens {
var extractedTokens [][]byte
content, extractedTokens = extract(content)
tokens = append(tokens, extractedTokens...)
}
return toString(tokens)
}
func toString(tokens [][]byte) []string {
stokens := make([]string, 0, len(tokens))
for _, token := range tokens {
stokens = append(stokens, string(token))
}
return stokens
}
var (
extractTokens = []func(content []byte) (replacedContent []byte, tokens [][]byte){
// The order to must be this
extractAndReplaceShebang,
extractAndReplaceSGML,
skipCommentsAndLiterals,
extractAndReplacePunctuation,
extractAndReplaceRegular,
extractAndReplaceOperator,
extractRemainders,
}
// Differences between golang regexp and oniguruma:
// 1. no (?s) in oniguruma - makes dot match \n
// 2. no (?U) in oniguruma - ungreedy *
// 3. (?m) implies dot matches \n in oniguruma
// 4. oniguruma handles \w differently - impossible, but true
//
// Workarounds:
// 1. (.|\n)
// 2. replace * with *?
// 3. replace . with [^\n]
// 4. replace \w with [0-9A-Za-z_]
//
// Original golang regexps:
//
// reLiteralStringQuotes = regexp.MustCompile(`(?sU)(".*"|'.*')`)
// reSingleLineComment = regexp.MustCompile(`(?m)(//|--|#|%|")\s(.*$)`)
// reMultilineComment = regexp.MustCompile(`(?sU)(/\*.*\*/|<!--.*-->|\{-.*-\}|\(\*.*\*\)|""".*"""|'''.*''')`)
// reLiteralNumber = regexp.MustCompile(`(0x[0-9A-Fa-f]([0-9A-Fa-f]|\.)*|\d(\d|\.)*)([uU][lL]{0,2}|([eE][-+]\d*)?[fFlL]*)`)
// reShebang = regexp.MustCompile(`(?m)^#!(?:/\w+)*/(?:(\w+)|\w+(?:\s*\w+=\w+\s*)*\s*(\w+))(?:\s*-\w+\s*)*$`)
// rePunctuation = regexp.MustCompile(`;|\{|\}|\(|\)|\[|\]`)
// reSGML = regexp.MustCompile(`(?sU)(<\/?[^\s<>=\d"']+)(?:\s.*\/?>|>)`)
// reSGMLComment = regexp.MustCompile(`(?sU)(<!--.*-->)`)
// reSGMLAttributes = regexp.MustCompile(`\s+(\w+=)|\s+([^\s>]+)`)
// reSGMLLoneAttribute = regexp.MustCompile(`(\w+)`)
// reRegularToken = regexp.MustCompile(`[\w\.@#\/\*]+`)
// reOperators = regexp.MustCompile(`<<?|\+|\-|\*|\/|%|&&?|\|\|?`)
//
// These regexps were converted to work in the same way for both engines:
//
reLiteralStringQuotes = regex.MustCompile(`("(.|\n)*?"|'(.|\n)*?')`)
reSingleLineComment = regex.MustCompile(`(?m)(//|--|#|%|")\s([^\n]*$)`)
reMultilineComment = regex.MustCompile(`(/\*(.|\n)*?\*/|<!--(.|\n)*?-->|\{-(.|\n)*?-\}|\(\*(.|\n)*?\*\)|"""(.|\n)*?"""|'''(.|\n)*?''')`)
reLiteralNumber = regex.MustCompile(`(0x[0-9A-Fa-f]([0-9A-Fa-f]|\.)*|\d(\d|\.)*)([uU][lL]{0,2}|([eE][-+]\d*)?[fFlL]*)`)
reShebang = regex.MustCompile(`(?m)^#!(?:/[0-9A-Za-z_]+)*/(?:([0-9A-Za-z_]+)|[0-9A-Za-z_]+(?:\s*[0-9A-Za-z_]+=[0-9A-Za-z_]+\s*)*\s*([0-9A-Za-z_]+))(?:\s*-[0-9A-Za-z_]+\s*)*$`)
rePunctuation = regex.MustCompile(`;|\{|\}|\(|\)|\[|\]`)
reSGML = regex.MustCompile(`(<\/?[^\s<>=\d"']+)(?:\s(.|\n)*?\/?>|>)`)
reSGMLComment = regex.MustCompile(`(<!--(.|\n)*?-->)`)
reSGMLAttributes = regex.MustCompile(`\s+([0-9A-Za-z_]+=)|\s+([^\s>]+)`)
reSGMLLoneAttribute = regex.MustCompile(`([0-9A-Za-z_]+)`)
reRegularToken = regex.MustCompile(`[0-9A-Za-z_\.@#\/\*]+`)
reOperators = regex.MustCompile(`<<?|\+|\-|\*|\/|%|&&?|\|\|?`)
regexToSkip = []regex.EnryRegexp{
// The order must be this
reLiteralStringQuotes,
reMultilineComment,
reSingleLineComment,
reLiteralNumber,
}
)
func extractAndReplaceShebang(content []byte) ([]byte, [][]byte) {
var shebangTokens [][]byte
matches := reShebang.FindAllSubmatch(content, -1)
if matches != nil {
shebangTokens = make([][]byte, 0, 2)
for _, match := range matches {
shebangToken := getShebangToken(match)
shebangTokens = append(shebangTokens, shebangToken)
}
reShebang.ReplaceAll(content, []byte(` `))
}
return content, shebangTokens
}
func getShebangToken(matchedShebang [][]byte) []byte {
const prefix = `SHEBANG#!`
var token []byte
for i := 1; i < len(matchedShebang); i++ {
if len(matchedShebang[i]) > 0 {
token = matchedShebang[i]
break
}
}
tokenShebang := append([]byte(prefix), token...)
return tokenShebang
}
func commonExtractAndReplace(content []byte, re regex.EnryRegexp) ([]byte, [][]byte) {
tokens := re.FindAll(content, -1)
content = re.ReplaceAll(content, []byte(` `))
return content, tokens
}
func extractAndReplacePunctuation(content []byte) ([]byte, [][]byte) {
return commonExtractAndReplace(content, rePunctuation)
}
func extractAndReplaceRegular(content []byte) ([]byte, [][]byte) {
return commonExtractAndReplace(content, reRegularToken)
}
func extractAndReplaceOperator(content []byte) ([]byte, [][]byte) {
return commonExtractAndReplace(content, reOperators)
}
func extractAndReplaceSGML(content []byte) ([]byte, [][]byte) {
var SGMLTokens [][]byte
matches := reSGML.FindAllSubmatch(content, -1)
if matches != nil {
SGMLTokens = make([][]byte, 0, 2)
for _, match := range matches {
if reSGMLComment.Match(match[0]) {
continue
}
token := append(append([]byte(nil), match[1]...), '>')
SGMLTokens = append(SGMLTokens, token)
attributes := getSGMLAttributes(match[0])
SGMLTokens = append(SGMLTokens, attributes...)
}
content = reSGML.ReplaceAll(content, []byte(` `))
}
return content, SGMLTokens
}
func getSGMLAttributes(SGMLTag []byte) [][]byte {
var attributes [][]byte
matches := reSGMLAttributes.FindAllSubmatch(SGMLTag, -1)
if matches != nil {
attributes = make([][]byte, 0, 5)
for _, match := range matches {
if len(match[1]) != 0 {
attributes = append(attributes, match[1])
}
if len(match[2]) != 0 {
loneAttributes := reSGMLLoneAttribute.FindAll(match[2], -1)
attributes = append(attributes, loneAttributes...)
}
}
}
return attributes
}
func skipCommentsAndLiterals(content []byte) ([]byte, [][]byte) {
for _, skip := range regexToSkip {
content = skip.ReplaceAll(content, []byte(` `))
}
return content, nil
}
func extractRemainders(content []byte) ([]byte, [][]byte) {
splitted := bytes.Fields(content)
remainderTokens := make([][]byte, 0, len(splitted)*3)
for _, remainder := range splitted {
remainders := bytes.Split(remainder, nil)
remainderTokens = append(remainderTokens, remainders...)
}
return content, remainderTokens
}

View File

@ -0,0 +1,15 @@
// +build flex
package tokenizer
import "github.com/go-enry/go-enry/v2/internal/tokenizer/flex"
// Tokenize returns lexical tokens from content. The tokens returned match what
// the Linguist library returns. At most the first ByteLimit bytes of content are tokenized.
func Tokenize(content []byte) []string {
if len(content) > ByteLimit {
content = content[:ByteLimit]
}
return flex.TokenizeFlex(content)
}

View File

@ -0,0 +1,179 @@
package tokenizer
import (
"fmt"
"testing"
"github.com/go-enry/go-enry/v2/regex"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
const (
testContent = `#!/usr/bin/ruby
#!/usr/bin/env node
aaa
#!/usr/bin/env A=B foo=bar awk -f
#!python
func Tokenize(content []byte) []string {
splitted := bytes.Fields(content)
tokens := /* make([]string, 0, len(splitted))
no comment -- comment
for _, tokenByte := range splitted {
token64 := base64.StdEncoding.EncodeToString(tokenByte)
tokens = append(tokens, token64)
notcatchasanumber3.5
}*/
othercode
/* testing multiple
multiline comments*/
<!-- com
ment -->
<!-- comment 2-->
ppp no comment # comment
"literal1"
abb (tokenByte, 0xAF02) | ,3.2L
'literal2' notcatchasanumber3.5
5 += number * anotherNumber
if isTrue && isToo {
0b00001000 >> 1
}
return tokens
oneBool = 3 <= 2
varBool = 3<=2>
#ifndef
#i'm not a comment if the single line comment symbol is not followed by a white
PyErr_SetString(PyExc_RuntimeError, "Relative import is not supported for Python <=2.4.");
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title id="hola" class="">This is a XHTML sample file</title>
<style type="text/css"><![CDATA[
#example {
background-color: yellow;
}
]]></style>
</head>
<body>
<div id="example">
Just a simple <strong>XHTML</strong> test page.
</div>
</body>
</html>`
)
var (
tokensFromTestContent = []string{"SHEBANG#!ruby", "SHEBANG#!node", "SHEBANG#!awk", "<!DOCTYPE>", "html", "PUBLIC",
"W3C", "DTD", "XHTML", "1", "0", "Strict", "EN", "http", "www", "w3", "org", "TR", "xhtml1", "DTD", "xhtml1",
"strict", "dtd", "<html>", "xmlns=", "<head>", "<title>", "id=", "class=", "</title>", "<style>", "type=",
"<![CDATA[>", "example", "background", "color", "yellow", "</style>", "</head>", "<body>", "<div>", "id=",
"<strong>", "</strong>", "</div>", "</body>", "</html>", "(", "[", "]", ")", "[", "]", "{", "(", ")", "(", ")",
"{", "}", "(", ")", ";", "#", "/usr/bin/ruby", "#", "/usr/bin/env", "node", "aaa", "#", "/usr/bin/env", "A",
"B", "foo", "bar", "awk", "f", "#", "python", "func", "Tokenize", "content", "byte", "string", "splitted",
"bytes.Fields", "content", "tokens", "othercode", "ppp", "no", "comment", "abb", "tokenByte",
"notcatchasanumber", "number", "*", "anotherNumber", "if", "isTrue", "isToo", "b", "return", "tokens",
"oneBool", "varBool", "#ifndef", "#i", "m", "not", "a", "comment", "if", "the", "single", "line", "comment",
"symbol", "is", "not", "followed", "by", "a", "white", "PyErr_SetString", "PyExc_RuntimeError", "This", "is",
"a", "XHTML", "sample", "file", "Just", "a", "simple", "XHTML", "test", "page.", "-", "|", "+", "&&", "<", "<",
"!", "!", "!", "=", "=", "!", ":", "=", ":", "=", ",", ",", "=", ">", ">", "=", "=", "=", "=", ">", "'", ","}
tests = []struct {
name string
content []byte
expected []string
}{
{name: "content", content: []byte(testContent), expected: tokensFromTestContent},
}
)
func TestTokenize(t *testing.T) {
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
before := string(test.content)
tokens := Tokenize(test.content)
after := string(test.content)
require.Equal(t, before, after, "the input slice was modified")
require.Equal(t, len(test.expected), len(tokens), fmt.Sprintf("token' slice length = %v, want %v", len(test.expected), len(tokens)))
for i, expectedToken := range test.expected {
assert.Equal(t, expectedToken, tokens[i], fmt.Sprintf("token = %v, want %v", tokens[i], expectedToken))
}
})
}
}
func TestTokenizerLatin1AsUtf8(t *testing.T) {
content := []byte("th\xe5 filling") // `th<74> filling`
t.Logf("%v - %q", content, string(content))
tokens := Tokenize(content)
for i, token := range tokens {
t.Logf("token %d, %s", i+1, token)
}
require.Equal(t, 3, len(tokens))
}
func TestRegexpOnInvalidUtf8(t *testing.T) {
origContent := []struct {
text string
tokens []string
}{
{"th\xe0 filling", []string{"th", "filling"}}, // `th<74> filling`
{"th\u0100 filling", []string{"th", "filling"}}, // `thĀ filling`
{"привет, как дела?", []string{}}, // empty, no ASCII tokens
}
re := regex.MustCompile(`[0-9A-Za-z_\.@#\/\*]+`) // a reRegularToken from tokenizer.go
for _, content := range origContent {
t.Run("", func(t *testing.T) {
t.Logf("%v - %q", content, content.text)
input := []byte(content.text)
tokens := re.FindAll(input, -1)
require.Equal(t, len(content.tokens), len(tokens))
newContent := re.ReplaceAll(input, []byte(` `))
t.Logf("content:%q, tokens:[", newContent)
for i, token := range tokens {
t.Logf("\t%q,", string(token))
require.Equal(t, content.tokens[i], string(token))
}
t.Logf(" ]\n")
})
}
}
func BenchmarkTokenizer_BaselineCopy(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
for _, test := range tests {
if len(test.content) > ByteLimit {
test.content = test.content[:ByteLimit]
}
_ = append([]byte(nil), test.content...)
}
}
}
func BenchmarkTokenizer(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
for _, test := range tests {
Tokenize(test.content)
}
}
}