2017-06-13 11:56:07 +00:00
|
|
|
package enry
|
2016-07-13 17:05:09 +00:00
|
|
|
|
|
|
|
import (
|
|
|
|
"bytes"
|
2022-10-14 11:32:06 +00:00
|
|
|
"fmt"
|
2016-07-13 17:05:09 +00:00
|
|
|
"path/filepath"
|
2021-03-31 19:34:43 +00:00
|
|
|
"sort"
|
2016-07-13 17:05:09 +00:00
|
|
|
"strings"
|
2017-06-08 10:28:36 +00:00
|
|
|
|
2020-03-19 16:31:29 +00:00
|
|
|
"github.com/go-enry/go-enry/v2/data"
|
2020-04-15 15:27:48 +00:00
|
|
|
"github.com/go-enry/go-enry/v2/regex"
|
2016-07-13 17:05:09 +00:00
|
|
|
)
|
|
|
|
|
2019-02-05 21:54:14 +00:00
|
|
|
const binSniffLen = 8000
|
2017-05-29 08:05:16 +00:00
|
|
|
|
2020-05-27 13:07:57 +00:00
|
|
|
var configurationLanguages = map[string]struct{}{
|
|
|
|
"XML": {},
|
|
|
|
"JSON": {},
|
|
|
|
"TOML": {},
|
|
|
|
"YAML": {},
|
|
|
|
"INI": {},
|
|
|
|
"SQL": {},
|
2016-07-18 14:20:12 +00:00
|
|
|
}
|
|
|
|
|
2019-02-05 21:54:14 +00:00
|
|
|
// IsConfiguration tells if filename is in one of the configuration languages.
|
2016-07-13 20:21:18 +00:00
|
|
|
func IsConfiguration(path string) bool {
|
2017-06-12 11:42:20 +00:00
|
|
|
language, _ := GetLanguageByExtension(path)
|
|
|
|
_, is := configurationLanguages[language]
|
2016-07-13 20:21:18 +00:00
|
|
|
return is
|
|
|
|
}
|
|
|
|
|
2019-02-05 21:54:14 +00:00
|
|
|
// IsImage tells if a given file is an image (PNG, JPEG or GIF format).
|
2017-07-11 10:27:48 +00:00
|
|
|
func IsImage(path string) bool {
|
|
|
|
extension := filepath.Ext(path)
|
2017-07-11 09:13:49 +00:00
|
|
|
if extension == ".png" || extension == ".jpg" || extension == ".jpeg" || extension == ".gif" {
|
|
|
|
return true
|
|
|
|
}
|
2017-07-11 10:27:48 +00:00
|
|
|
|
2017-07-11 09:13:49 +00:00
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2019-02-05 21:54:14 +00:00
|
|
|
// GetMIMEType returns a MIME type of a given file based on its languages.
|
|
|
|
func GetMIMEType(path string, language string) string {
|
2017-07-10 10:59:39 +00:00
|
|
|
if mime, ok := data.LanguagesMime[language]; ok {
|
|
|
|
return mime
|
|
|
|
}
|
|
|
|
|
2017-07-11 10:27:48 +00:00
|
|
|
if IsImage(path) {
|
|
|
|
return "image/" + filepath.Ext(path)[1:]
|
|
|
|
}
|
|
|
|
|
2017-07-10 10:59:39 +00:00
|
|
|
return "text/plain"
|
2017-07-10 10:50:52 +00:00
|
|
|
}
|
|
|
|
|
2019-02-05 21:54:14 +00:00
|
|
|
// IsDocumentation returns whether or not path is a documentation path.
|
|
|
|
func IsDocumentation(path string) bool {
|
2020-04-15 15:27:48 +00:00
|
|
|
return matchRegexSlice(data.DocumentationMatchers, path)
|
2019-02-05 21:54:14 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// IsDotFile returns whether or not path has dot as a prefix.
|
|
|
|
func IsDotFile(path string) bool {
|
|
|
|
base := filepath.Base(filepath.Clean(path))
|
|
|
|
return strings.HasPrefix(base, ".") && base != "."
|
|
|
|
}
|
|
|
|
|
2022-10-14 11:32:06 +00:00
|
|
|
var allVendorRegExp regex.EnryRegexp
|
2021-03-31 19:34:43 +00:00
|
|
|
|
2019-02-05 21:54:14 +00:00
|
|
|
// IsVendor returns whether or not path is a vendor path.
|
|
|
|
func IsVendor(path string) bool {
|
2022-10-14 11:32:06 +00:00
|
|
|
return allVendorRegExp.MatchString(path)
|
|
|
|
}
|
|
|
|
|
|
|
|
func init() {
|
|
|
|
// We now collate all regexps from VendorMatchers to a single large regexp
|
|
|
|
// which is at least twice as fast to test than simply iterating & matching.
|
|
|
|
//
|
|
|
|
// ---
|
|
|
|
//
|
|
|
|
// We could test each matcher from VendorMatchers in turn i.e.
|
|
|
|
//
|
|
|
|
// func IsVendor(filename string) bool {
|
|
|
|
// for _, matcher := range data.VendorMatchers {
|
|
|
|
// if matcher.MatchString(filename) {
|
|
|
|
// return true
|
|
|
|
// }
|
|
|
|
// }
|
|
|
|
// return false
|
|
|
|
// }
|
|
|
|
//
|
|
|
|
// Or naïvely concatentate all these regexps using groups i.e.
|
|
|
|
//
|
|
|
|
// `(regexp1)|(regexp2)|(regexp3)|...`
|
|
|
|
//
|
|
|
|
// However, both of these are relatively slow and don't take advantage
|
|
|
|
// of the inherent structure within our regexps.
|
|
|
|
//
|
|
|
|
// Imperical observation: by looking at the regexps, we only have 3 types.
|
|
|
|
// 1. Those that start with `^`
|
|
|
|
// 2. Those that start with `(^|/)`
|
|
|
|
// 3. All the rest
|
|
|
|
//
|
|
|
|
// If we collate our regexps into these 3 groups - that will significantly
|
|
|
|
// reduce the likelihood of backtracking within the regexp trie matcher.
|
|
|
|
//
|
|
|
|
// A further improvement is to use non-capturing groups (?:) as otherwise
|
|
|
|
// the regexp parser, whilst matching, will have to allocate slices for
|
|
|
|
// matching positions. (A future improvement left out could be to
|
|
|
|
// enforce non-capturing groups within the sub-regexps.)
|
|
|
|
|
|
|
|
matchers := data.VendorMatchers
|
|
|
|
sort.SliceStable(matchers, func(i, j int) bool {
|
|
|
|
return matchers[i].String() < matchers[j].String()
|
|
|
|
})
|
|
|
|
|
|
|
|
var caretPrefixed, caretOrSlashPrefixed, theRest []string
|
|
|
|
// Check prefix, add to the respective group slices
|
|
|
|
for _, matcher := range matchers {
|
|
|
|
str := matcher.String()
|
|
|
|
if strings.HasPrefix(str, "^") {
|
|
|
|
caretPrefixed = append(caretPrefixed, str[1:])
|
|
|
|
} else if strings.HasPrefix(str, "(^|/)") {
|
|
|
|
caretOrSlashPrefixed = append(caretOrSlashPrefixed, str[5:])
|
|
|
|
} else {
|
|
|
|
theRest = append(theRest, str)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
var sb strings.Builder
|
|
|
|
// group 1 - start with `^`
|
|
|
|
appendGroupWithCommonPrefix(&sb, "^", caretPrefixed)
|
|
|
|
sb.WriteString("|")
|
|
|
|
// group 2 - start with `(^|/)`
|
|
|
|
appendGroupWithCommonPrefix(&sb, "(?:^|/)", caretOrSlashPrefixed)
|
|
|
|
sb.WriteString("|")
|
|
|
|
// grou 3, all rest.
|
|
|
|
appendGroupWithCommonPrefix(&sb, "", theRest)
|
|
|
|
allVendorRegExp = regex.MustCompile(sb.String())
|
|
|
|
}
|
|
|
|
|
|
|
|
func appendGroupWithCommonPrefix(sb *strings.Builder, commonPrefix string, res []string) {
|
|
|
|
sb.WriteString("(?:")
|
|
|
|
if commonPrefix != "" {
|
|
|
|
sb.WriteString(fmt.Sprintf("%s(?:(?:", commonPrefix))
|
|
|
|
}
|
|
|
|
sb.WriteString(strings.Join(res, ")|(?:"))
|
|
|
|
if commonPrefix != "" {
|
|
|
|
sb.WriteString("))")
|
|
|
|
}
|
|
|
|
sb.WriteString(")")
|
2019-02-05 21:54:14 +00:00
|
|
|
}
|
2016-07-13 20:21:18 +00:00
|
|
|
|
2020-04-06 14:23:48 +00:00
|
|
|
// IsTest returns whether or not path is a test path.
|
|
|
|
func IsTest(path string) bool {
|
2020-04-15 15:27:48 +00:00
|
|
|
return matchRegexSlice(data.TestMatchers, path)
|
2020-04-06 14:23:48 +00:00
|
|
|
}
|
|
|
|
|
2022-10-14 11:32:06 +00:00
|
|
|
func matchRegexSlice(exprs []regex.EnryRegexp, str string) bool {
|
|
|
|
for _, expr := range exprs {
|
|
|
|
if expr.MatchString(str) {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2017-05-29 08:05:16 +00:00
|
|
|
// IsBinary detects if data is a binary value based on:
|
|
|
|
// http://git.kernel.org/cgit/git/git.git/tree/xdiff-interface.c?id=HEAD#n198
|
2016-07-13 20:21:18 +00:00
|
|
|
func IsBinary(data []byte) bool {
|
2019-02-05 21:54:14 +00:00
|
|
|
if len(data) > binSniffLen {
|
|
|
|
data = data[:binSniffLen]
|
2016-07-13 20:21:18 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if bytes.IndexByte(data, byte(0)) == -1 {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
return true
|
|
|
|
}
|
2019-07-19 20:28:57 +00:00
|
|
|
|
|
|
|
// GetColor returns a HTML color code of a given language.
|
|
|
|
func GetColor(language string) string {
|
|
|
|
if color, ok := data.LanguagesColor[language]; ok {
|
|
|
|
return color
|
|
|
|
}
|
|
|
|
|
2020-03-21 13:37:39 +00:00
|
|
|
if color, ok := data.LanguagesColor[GetLanguageGroup(language)]; ok {
|
|
|
|
return color
|
|
|
|
}
|
|
|
|
|
2019-07-19 20:28:57 +00:00
|
|
|
return "#cccccc"
|
|
|
|
}
|
2020-04-15 15:27:48 +00:00
|
|
|
|
2020-05-27 13:07:57 +00:00
|
|
|
// IsGenerated returns whether the file with the given path and content is a
|
|
|
|
// generated file.
|
|
|
|
func IsGenerated(path string, content []byte) bool {
|
|
|
|
ext := strings.ToLower(filepath.Ext(path))
|
|
|
|
if _, ok := data.GeneratedCodeExtensions[ext]; ok {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, m := range data.GeneratedCodeNameMatchers {
|
|
|
|
if m(path) {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
path = strings.ToLower(path)
|
|
|
|
for _, m := range data.GeneratedCodeMatchers {
|
|
|
|
if m(path, ext, content) {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return false
|
|
|
|
}
|