mirror of
https://github.com/ralsina/tartrazine.git
synced 2024-11-12 22:42:23 +00:00
commit
669ec497ef
4
.github/workflows/sync-linguist.yml
vendored
4
.github/workflows/sync-linguist.yml
vendored
@ -84,7 +84,7 @@ jobs:
|
||||
|
||||
branch_name="feature/sync-linguist-${{ steps.previous_linguist.outputs.short_commit }}"
|
||||
if git rev-parse --quiet --verify $branch_name; then
|
||||
echo "Linuist update branch $branch_name already exists"
|
||||
echo "Linguist update branch $branch_name already exists"
|
||||
echo "::set-output name=needs_pr::true"
|
||||
exit 0
|
||||
fi
|
||||
@ -104,7 +104,7 @@ jobs:
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "Linguist update unncessary"
|
||||
echo "Linguist update unnecessary"
|
||||
echo "::set-output name=needs_pr::false"
|
||||
- name: Create Pull Request
|
||||
id: open-pr
|
||||
|
@ -255,7 +255,7 @@ All benchmark scripts are in [_benchmarks_](https://github.com/go-enry/go-enry/b
|
||||
|
||||
#### Dependencies
|
||||
|
||||
As benchmarks depend on Ruby and Github-Linguist gem make sure you have:
|
||||
As benchmarks depend on Ruby and GitHub-Linguist gem make sure you have:
|
||||
|
||||
- Ruby (e.g using [`rbenv`](https://github.com/rbenv/rbenv)), [`bundler`](https://bundler.io/) installed
|
||||
- Docker
|
||||
|
@ -54,7 +54,7 @@ var (
|
||||
)
|
||||
|
||||
func main() {
|
||||
flag.BoolVar(&distribution, "distribution", false, "generate enry-distribuition.csv and linguist-distribution.csv")
|
||||
flag.BoolVar(&distribution, "distribution", false, "generate enry-distribution.csv and linguist-distribution.csv")
|
||||
flag.StringVar(&outDir, "outdir", "", "path to leave csv files")
|
||||
flag.Parse()
|
||||
|
||||
|
@ -29,7 +29,7 @@ func main() {
|
||||
breakdownFlag := flag.Bool("breakdown", false, "")
|
||||
jsonFlag := flag.Bool("json", false, "")
|
||||
showVersion := flag.Bool("version", false, "Show the enry version information")
|
||||
allLangs := flag.Bool("all", false, "Show all files, including those identifed as non-programming languages")
|
||||
allLangs := flag.Bool("all", false, "Show all files, including those identified as non-programming languages")
|
||||
countMode := flag.String("mode", "byte", "the method used to count file size. Available options are: file, line and byte")
|
||||
limitKB := flag.Int64("limit", 16*1024, "Analyse first N KB of the file (-1 means no limit)")
|
||||
flag.Parse()
|
||||
@ -97,7 +97,7 @@ func main() {
|
||||
return nil
|
||||
}
|
||||
|
||||
// TODO(bzz): provide API that mimics lingust CLI output for
|
||||
// TODO(bzz): provide API that mimics linguist CLI output for
|
||||
// - running ByExtension & ByFilename
|
||||
// - reading the file, if that did not work
|
||||
// - GetLanguage([]Strategy)
|
||||
|
@ -232,7 +232,7 @@ func isCompiledCoffeeScript(path, ext string, content []byte) bool {
|
||||
}
|
||||
})
|
||||
|
||||
// Require a score of 3. This is fairly abritrary. Consider tweaking later.
|
||||
// Require a score of 3. This is fairly arbitrary. Consider tweaking later.
|
||||
// See: https://github.com/github/linguist/blob/master/lib/linguist/generated.rb#L176-L213
|
||||
return score >= 3
|
||||
}
|
||||
|
@ -4,8 +4,8 @@ import "github.com/go-enry/go-enry/v2/data/rule"
|
||||
|
||||
// Heuristics implements a rule-based content matching engine.
|
||||
|
||||
// Heuristics is a number of sequntially applied rule.Heuristic where a
|
||||
// matching one disambiguages language(s) for a single file extension.
|
||||
// Heuristics is a number of sequentially applied rule.Heuristic where a
|
||||
// matching one disambiguates language(s) for a single file extension.
|
||||
type Heuristics []rule.Heuristic
|
||||
|
||||
// Match returns languages identified by the matching rule of the heuristic.
|
||||
|
@ -4,7 +4,7 @@
|
||||
package rule
|
||||
|
||||
// Heuristic consist of (a number of) rules where each, if matches,
|
||||
// identifes content as belonging to a programming language(s).
|
||||
// identifies content as belonging to a programming language(s).
|
||||
type Heuristic interface {
|
||||
Matcher
|
||||
Languages() []string
|
||||
@ -40,7 +40,7 @@ type or struct {
|
||||
}
|
||||
|
||||
// Or rule matches, if a single matching pattern exists.
|
||||
// It recives only one pattern as it relies on compile-time optimization that
|
||||
// It receives only one pattern as it relies on compile-time optimization that
|
||||
// represtes union with | inside a single regexp.
|
||||
func Or(l languages, r Matcher) Heuristic {
|
||||
return or{l, r}
|
||||
|
2
enry.go
2
enry.go
@ -1,7 +1,7 @@
|
||||
/*
|
||||
Package enry implements multiple strategies for programming language identification.
|
||||
|
||||
Identification is made based on file name and file content using a seriece
|
||||
Identification is made based on file name and file content using a service
|
||||
of strategies to narrow down possible option.
|
||||
Each strategy is available as a separate API call, as well as a main enty point
|
||||
|
||||
|
@ -108,7 +108,7 @@ func getFrequencies(samplesDir string) (*samplesFrequencies, error) {
|
||||
}, nil
|
||||
}
|
||||
|
||||
// readSamples collects ./samples/ filenames from the Linguist codebase, skiping symlinks.
|
||||
// readSamples collects ./samples/ filenames from the Linguist codebase, skipping symlinks.
|
||||
func readSamples(samplesLangDir string) ([]string, error) {
|
||||
const specialSubDir = "filenames"
|
||||
var samples []string
|
||||
@ -141,7 +141,7 @@ func readSamples(samplesLangDir string) ([]string, error) {
|
||||
|
||||
// isKnownSymlinkInLinguist checks if the file name is on the list of known symlinks.
|
||||
// On Windows, there is no symlink support in Git [1] and those become regular text files,
|
||||
// so we have to skip these files manually, maintaing a list here :/
|
||||
// so we have to skip these files manually, maintaining a list here :/
|
||||
// 1. https://github.com/git-for-windows/git/wiki/Symbolic-Links
|
||||
//
|
||||
// $ find -L .linguist/samples -xtype l
|
||||
|
@ -129,7 +129,7 @@ type Patterns struct {
|
||||
// See https://github.com/go-yaml/yaml/issues/100
|
||||
type StringArray []string
|
||||
|
||||
// UnmarshalYAML allowes to parse element always as a []string
|
||||
// UnmarshalYAML allows to parse element always as a []string
|
||||
func (sa *StringArray) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
||||
var multi []string
|
||||
if err := unmarshal(&multi); err != nil {
|
||||
@ -165,7 +165,7 @@ func parseYaml(file string) (*Heuristics, error) {
|
||||
// - named & numbered capturing group/after text matching
|
||||
// - backreference
|
||||
// - possessive quantifier
|
||||
// For referece on supported syntax see https://github.com/google/re2/wiki/Syntax
|
||||
// For reference on supported syntax see https://github.com/google/re2/wiki/Syntax
|
||||
func isUnsupportedRegexpSyntax(reg string) bool {
|
||||
return strings.Contains(reg, `(?<`) || strings.Contains(reg, `(?=`) || strings.Contains(reg, `(?!`) ||
|
||||
strings.Contains(reg, `(?>`) || strings.Contains(reg, `\1`) || strings.Contains(reg, `*+`) ||
|
||||
@ -173,7 +173,7 @@ func isUnsupportedRegexpSyntax(reg string) bool {
|
||||
(strings.HasPrefix(reg, multilinePrefix+`/`) && strings.HasSuffix(reg, `/`))
|
||||
}
|
||||
|
||||
// convertToValidRegexp converts Ruby regexp syntaxt to RE2 equivalent.
|
||||
// convertToValidRegexp converts Ruby regexp syntax to RE2 equivalent.
|
||||
// Does not work with Ruby regexp literals.
|
||||
func convertToValidRegexp(rubyRegexp string) string {
|
||||
return multilinePrefix + rubyRegexp
|
||||
|
@ -119,7 +119,7 @@ func TestTemplateMatcherVars(t *testing.T) {
|
||||
require.NotEmpty(t, buf)
|
||||
|
||||
// TODO(bzz) add more advanced test using go/ast package, to verify the
|
||||
// strucutre of generated code:
|
||||
// structure of generated code:
|
||||
// - check key literal exists in map for each extension:
|
||||
|
||||
src, err := format.Source(buf.Bytes())
|
||||
|
@ -2,7 +2,7 @@
|
||||
|
||||
## Usage
|
||||
|
||||
`enry-java` package is available thorugh [maven central](http://search.maven.org/#search%7Cga%7C1%7Ca%3A%22enry-java%22),
|
||||
`enry-java` package is available through [maven central](http://search.maven.org/#search%7Cga%7C1%7Ca%3A%22enry-java%22),
|
||||
so it be used easily added as a dependency in various package management systems.
|
||||
Examples of how to handle it for most commons systems are included below,
|
||||
for other systems just look at maven central's dependency information.
|
||||
|
2
java/sbt
2
java/sbt
@ -6,7 +6,7 @@
|
||||
# Copyright (c) 2011, Paul Phillips. All rights reserved.
|
||||
# Generated from http://www.opensource.org/licenses/bsd-license.php
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or withou
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are
|
||||
# met:
|
||||
#
|
||||
|
Loading…
Reference in New Issue
Block a user