mirror of
https://github.com/ralsina/tartrazine.git
synced 2025-06-19 06:33:06 -03:00
Rearranged code
This commit is contained in:
149
common.go
149
common.go
@ -1,40 +1,15 @@
|
||||
package slinguist
|
||||
|
||||
const OtherLanguage = "Other"
|
||||
|
||||
var (
|
||||
ExtensionsByLanguage map[string][]string
|
||||
ignoredExtensions = map[string]bool{
|
||||
".asc": true, ".cgi": true, ".fcgi": true, ".gml": true, ".fx": true,
|
||||
".vhost": true,
|
||||
}
|
||||
auxiliaryLanguages = map[string]bool{
|
||||
"Other": true, "XML": true, "YAML": true, "TOML": true, "INI": true,
|
||||
"JSON": true, "TeX": true, "Public Key": true, "AsciiDoc": true,
|
||||
"AGS Script": true, "VimL": true, "Diff": true, "CMake": true, "fish": true,
|
||||
"Awk": true, "Graphviz (DOT)": true, "Markdown": true, "desktop": true,
|
||||
"XSLT": true, "SQL": true, "RMarkdown": true, "IRC log": true,
|
||||
"reStructuredText": true, "Twig": true, "CSS": true, "Batchfile": true,
|
||||
"Text": true, "HTML+ERB": true, "HTML": true, "Gettext Catalog": true,
|
||||
"Smarty": true, "Raw token data": true,
|
||||
}
|
||||
import (
|
||||
"path/filepath"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func init() {
|
||||
for l, _ := range ignoredExtensions {
|
||||
languagesByExtension[l] = []string{OtherLanguage}
|
||||
}
|
||||
// OtherLanguage is used as a zero value when a function can not return a specific language.
|
||||
const OtherLanguage = "Other"
|
||||
|
||||
ExtensionsByLanguage = reverseStringListMap(languagesByExtension)
|
||||
}
|
||||
|
||||
// GetLanguageExtensions returns the different extensions being used by the
|
||||
// language.
|
||||
func GetLanguageExtensions(language string) []string {
|
||||
return ExtensionsByLanguage[language]
|
||||
}
|
||||
|
||||
// GetLanguage return the Language for a given filename and file content.
|
||||
// GetLanguage applies a sequence of strategies based on the given filename and content
|
||||
// to find out the most probably language to return.
|
||||
func GetLanguage(filename string, content []byte) string {
|
||||
if lang, safe := GetLanguageByModeline(content); safe {
|
||||
return lang
|
||||
@ -60,12 +35,110 @@ func GetLanguage(filename string, content []byte) string {
|
||||
return lang
|
||||
}
|
||||
|
||||
func reverseStringListMap(i map[string][]string) (o map[string][]string) {
|
||||
o = map[string][]string{}
|
||||
for key, set := range i {
|
||||
for _, value := range set {
|
||||
o[value] = append(o[value], key)
|
||||
}
|
||||
// GetLanguageByModeline returns the language of the given content looking for the modeline,
|
||||
// and safe to indicate the sureness of returned language.
|
||||
func GetLanguageByModeline(content []byte) (lang string, safe bool) {
|
||||
return getLanguageByModeline(content)
|
||||
}
|
||||
|
||||
// GetLanguageByFilename returns a language based on the given filename, and safe to indicate
|
||||
// the sureness of returned language.
|
||||
func GetLanguageByFilename(filename string) (lang string, safe bool) {
|
||||
return getLanguageByFilename(filename)
|
||||
}
|
||||
|
||||
func getLanguageByFilename(filename string) (lang string, safe bool) {
|
||||
lang, safe = languagesByFilename[filename]
|
||||
if lang == "" {
|
||||
lang = OtherLanguage
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// GetLanguageByShebang returns the language of the given content looking for the shebang line,
|
||||
// and safe to indicate the sureness of returned language.
|
||||
func GetLanguageByShebang(content []byte) (lang string, safe bool) {
|
||||
return getLanguageByShebang(content)
|
||||
}
|
||||
|
||||
// GetLanguageByExtension returns a language based on the given filename, and safe to indicate
|
||||
// the sureness of returned language.
|
||||
func GetLanguageByExtension(filename string) (lang string, safe bool) {
|
||||
return getLanguageByExtension(filename)
|
||||
}
|
||||
|
||||
func getLanguageByExtension(filename string) (lang string, safe bool) {
|
||||
ext := strings.ToLower(filepath.Ext(filename))
|
||||
lang = OtherLanguage
|
||||
langs, ok := languagesByExtension[ext]
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
|
||||
lang = langs[0]
|
||||
safe = len(langs) == 1
|
||||
return
|
||||
}
|
||||
|
||||
// GetLanguageByContent returns a language based on the filename and heuristics applies to the content,
|
||||
// and safe to indicate the sureness of returned language.
|
||||
func GetLanguageByContent(filename string, content []byte) (lang string, safe bool) {
|
||||
return getLanguageByContent(filename, content)
|
||||
}
|
||||
|
||||
func getLanguageByContent(filename string, content []byte) (lang string, safe bool) {
|
||||
ext := strings.ToLower(filepath.Ext(filename))
|
||||
if fnMatcher, ok := contentMatchers[ext]; ok {
|
||||
lang, safe = fnMatcher(content)
|
||||
} else {
|
||||
lang = OtherLanguage
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// GetLanguageByClassifier takes in a content and a list of candidates, and apply the classifier's Classify method to
|
||||
// get the most probably language. If classifier is null then DefaultClassfier will be used.
|
||||
func GetLanguageByClassifier(content []byte, candidates []string, classifier Classifier) string {
|
||||
return getLanguageByClassifier(content, candidates, classifier)
|
||||
}
|
||||
|
||||
// GetLanguageExtensions returns the different extensions being used by the language.
|
||||
func GetLanguageExtensions(language string) []string {
|
||||
return extensionsByLanguage[language]
|
||||
}
|
||||
|
||||
// Type represent language's type. Either data, programming, markup, prose, or unknown.
|
||||
type Type int
|
||||
|
||||
// Type's values.
|
||||
const (
|
||||
Unknown Type = iota
|
||||
Data
|
||||
Programming
|
||||
Markup
|
||||
Prose
|
||||
)
|
||||
|
||||
// GetLanguageType returns the given language's type.
|
||||
func GetLanguageType(language string) (langType Type) {
|
||||
langType, ok := languagesType[language]
|
||||
if !ok {
|
||||
langType = Unknown
|
||||
}
|
||||
|
||||
return langType
|
||||
}
|
||||
|
||||
// GetLanguageByAlias returns either the language related to the given alias and ok set to true
|
||||
// or Otherlanguage and ok set to false if the alias is not recognized.
|
||||
func GetLanguageByAlias(alias string) (lang string, ok bool) {
|
||||
a := strings.Split(alias, `,`)[0]
|
||||
a = strings.ToLower(a)
|
||||
lang, ok = languagesByAlias[a]
|
||||
if !ok {
|
||||
lang = OtherLanguage
|
||||
}
|
||||
|
||||
return
|
||||
|
Reference in New Issue
Block a user