mirror of
https://github.com/ralsina/tartrazine.git
synced 2024-11-13 23:12:24 +00:00
commit
669ec497ef
4
.github/workflows/sync-linguist.yml
vendored
4
.github/workflows/sync-linguist.yml
vendored
@ -84,7 +84,7 @@ jobs:
|
|||||||
|
|
||||||
branch_name="feature/sync-linguist-${{ steps.previous_linguist.outputs.short_commit }}"
|
branch_name="feature/sync-linguist-${{ steps.previous_linguist.outputs.short_commit }}"
|
||||||
if git rev-parse --quiet --verify $branch_name; then
|
if git rev-parse --quiet --verify $branch_name; then
|
||||||
echo "Linuist update branch $branch_name already exists"
|
echo "Linguist update branch $branch_name already exists"
|
||||||
echo "::set-output name=needs_pr::true"
|
echo "::set-output name=needs_pr::true"
|
||||||
exit 0
|
exit 0
|
||||||
fi
|
fi
|
||||||
@ -104,7 +104,7 @@ jobs:
|
|||||||
exit 0
|
exit 0
|
||||||
fi
|
fi
|
||||||
|
|
||||||
echo "Linguist update unncessary"
|
echo "Linguist update unnecessary"
|
||||||
echo "::set-output name=needs_pr::false"
|
echo "::set-output name=needs_pr::false"
|
||||||
- name: Create Pull Request
|
- name: Create Pull Request
|
||||||
id: open-pr
|
id: open-pr
|
||||||
|
@ -255,7 +255,7 @@ All benchmark scripts are in [_benchmarks_](https://github.com/go-enry/go-enry/b
|
|||||||
|
|
||||||
#### Dependencies
|
#### Dependencies
|
||||||
|
|
||||||
As benchmarks depend on Ruby and Github-Linguist gem make sure you have:
|
As benchmarks depend on Ruby and GitHub-Linguist gem make sure you have:
|
||||||
|
|
||||||
- Ruby (e.g using [`rbenv`](https://github.com/rbenv/rbenv)), [`bundler`](https://bundler.io/) installed
|
- Ruby (e.g using [`rbenv`](https://github.com/rbenv/rbenv)), [`bundler`](https://bundler.io/) installed
|
||||||
- Docker
|
- Docker
|
||||||
|
@ -54,7 +54,7 @@ var (
|
|||||||
)
|
)
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
flag.BoolVar(&distribution, "distribution", false, "generate enry-distribuition.csv and linguist-distribution.csv")
|
flag.BoolVar(&distribution, "distribution", false, "generate enry-distribution.csv and linguist-distribution.csv")
|
||||||
flag.StringVar(&outDir, "outdir", "", "path to leave csv files")
|
flag.StringVar(&outDir, "outdir", "", "path to leave csv files")
|
||||||
flag.Parse()
|
flag.Parse()
|
||||||
|
|
||||||
|
@ -29,7 +29,7 @@ func main() {
|
|||||||
breakdownFlag := flag.Bool("breakdown", false, "")
|
breakdownFlag := flag.Bool("breakdown", false, "")
|
||||||
jsonFlag := flag.Bool("json", false, "")
|
jsonFlag := flag.Bool("json", false, "")
|
||||||
showVersion := flag.Bool("version", false, "Show the enry version information")
|
showVersion := flag.Bool("version", false, "Show the enry version information")
|
||||||
allLangs := flag.Bool("all", false, "Show all files, including those identifed as non-programming languages")
|
allLangs := flag.Bool("all", false, "Show all files, including those identified as non-programming languages")
|
||||||
countMode := flag.String("mode", "byte", "the method used to count file size. Available options are: file, line and byte")
|
countMode := flag.String("mode", "byte", "the method used to count file size. Available options are: file, line and byte")
|
||||||
limitKB := flag.Int64("limit", 16*1024, "Analyse first N KB of the file (-1 means no limit)")
|
limitKB := flag.Int64("limit", 16*1024, "Analyse first N KB of the file (-1 means no limit)")
|
||||||
flag.Parse()
|
flag.Parse()
|
||||||
@ -97,7 +97,7 @@ func main() {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO(bzz): provide API that mimics lingust CLI output for
|
// TODO(bzz): provide API that mimics linguist CLI output for
|
||||||
// - running ByExtension & ByFilename
|
// - running ByExtension & ByFilename
|
||||||
// - reading the file, if that did not work
|
// - reading the file, if that did not work
|
||||||
// - GetLanguage([]Strategy)
|
// - GetLanguage([]Strategy)
|
||||||
|
@ -232,7 +232,7 @@ func isCompiledCoffeeScript(path, ext string, content []byte) bool {
|
|||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
// Require a score of 3. This is fairly abritrary. Consider tweaking later.
|
// Require a score of 3. This is fairly arbitrary. Consider tweaking later.
|
||||||
// See: https://github.com/github/linguist/blob/master/lib/linguist/generated.rb#L176-L213
|
// See: https://github.com/github/linguist/blob/master/lib/linguist/generated.rb#L176-L213
|
||||||
return score >= 3
|
return score >= 3
|
||||||
}
|
}
|
||||||
|
@ -4,8 +4,8 @@ import "github.com/go-enry/go-enry/v2/data/rule"
|
|||||||
|
|
||||||
// Heuristics implements a rule-based content matching engine.
|
// Heuristics implements a rule-based content matching engine.
|
||||||
|
|
||||||
// Heuristics is a number of sequntially applied rule.Heuristic where a
|
// Heuristics is a number of sequentially applied rule.Heuristic where a
|
||||||
// matching one disambiguages language(s) for a single file extension.
|
// matching one disambiguates language(s) for a single file extension.
|
||||||
type Heuristics []rule.Heuristic
|
type Heuristics []rule.Heuristic
|
||||||
|
|
||||||
// Match returns languages identified by the matching rule of the heuristic.
|
// Match returns languages identified by the matching rule of the heuristic.
|
||||||
|
@ -4,7 +4,7 @@
|
|||||||
package rule
|
package rule
|
||||||
|
|
||||||
// Heuristic consist of (a number of) rules where each, if matches,
|
// Heuristic consist of (a number of) rules where each, if matches,
|
||||||
// identifes content as belonging to a programming language(s).
|
// identifies content as belonging to a programming language(s).
|
||||||
type Heuristic interface {
|
type Heuristic interface {
|
||||||
Matcher
|
Matcher
|
||||||
Languages() []string
|
Languages() []string
|
||||||
@ -40,7 +40,7 @@ type or struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Or rule matches, if a single matching pattern exists.
|
// Or rule matches, if a single matching pattern exists.
|
||||||
// It recives only one pattern as it relies on compile-time optimization that
|
// It receives only one pattern as it relies on compile-time optimization that
|
||||||
// represtes union with | inside a single regexp.
|
// represtes union with | inside a single regexp.
|
||||||
func Or(l languages, r Matcher) Heuristic {
|
func Or(l languages, r Matcher) Heuristic {
|
||||||
return or{l, r}
|
return or{l, r}
|
||||||
|
2
enry.go
2
enry.go
@ -1,7 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
Package enry implements multiple strategies for programming language identification.
|
Package enry implements multiple strategies for programming language identification.
|
||||||
|
|
||||||
Identification is made based on file name and file content using a seriece
|
Identification is made based on file name and file content using a service
|
||||||
of strategies to narrow down possible option.
|
of strategies to narrow down possible option.
|
||||||
Each strategy is available as a separate API call, as well as a main enty point
|
Each strategy is available as a separate API call, as well as a main enty point
|
||||||
|
|
||||||
|
@ -108,7 +108,7 @@ func getFrequencies(samplesDir string) (*samplesFrequencies, error) {
|
|||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// readSamples collects ./samples/ filenames from the Linguist codebase, skiping symlinks.
|
// readSamples collects ./samples/ filenames from the Linguist codebase, skipping symlinks.
|
||||||
func readSamples(samplesLangDir string) ([]string, error) {
|
func readSamples(samplesLangDir string) ([]string, error) {
|
||||||
const specialSubDir = "filenames"
|
const specialSubDir = "filenames"
|
||||||
var samples []string
|
var samples []string
|
||||||
@ -141,7 +141,7 @@ func readSamples(samplesLangDir string) ([]string, error) {
|
|||||||
|
|
||||||
// isKnownSymlinkInLinguist checks if the file name is on the list of known symlinks.
|
// isKnownSymlinkInLinguist checks if the file name is on the list of known symlinks.
|
||||||
// On Windows, there is no symlink support in Git [1] and those become regular text files,
|
// On Windows, there is no symlink support in Git [1] and those become regular text files,
|
||||||
// so we have to skip these files manually, maintaing a list here :/
|
// so we have to skip these files manually, maintaining a list here :/
|
||||||
// 1. https://github.com/git-for-windows/git/wiki/Symbolic-Links
|
// 1. https://github.com/git-for-windows/git/wiki/Symbolic-Links
|
||||||
//
|
//
|
||||||
// $ find -L .linguist/samples -xtype l
|
// $ find -L .linguist/samples -xtype l
|
||||||
|
@ -129,7 +129,7 @@ type Patterns struct {
|
|||||||
// See https://github.com/go-yaml/yaml/issues/100
|
// See https://github.com/go-yaml/yaml/issues/100
|
||||||
type StringArray []string
|
type StringArray []string
|
||||||
|
|
||||||
// UnmarshalYAML allowes to parse element always as a []string
|
// UnmarshalYAML allows to parse element always as a []string
|
||||||
func (sa *StringArray) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
func (sa *StringArray) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
||||||
var multi []string
|
var multi []string
|
||||||
if err := unmarshal(&multi); err != nil {
|
if err := unmarshal(&multi); err != nil {
|
||||||
@ -165,7 +165,7 @@ func parseYaml(file string) (*Heuristics, error) {
|
|||||||
// - named & numbered capturing group/after text matching
|
// - named & numbered capturing group/after text matching
|
||||||
// - backreference
|
// - backreference
|
||||||
// - possessive quantifier
|
// - possessive quantifier
|
||||||
// For referece on supported syntax see https://github.com/google/re2/wiki/Syntax
|
// For reference on supported syntax see https://github.com/google/re2/wiki/Syntax
|
||||||
func isUnsupportedRegexpSyntax(reg string) bool {
|
func isUnsupportedRegexpSyntax(reg string) bool {
|
||||||
return strings.Contains(reg, `(?<`) || strings.Contains(reg, `(?=`) || strings.Contains(reg, `(?!`) ||
|
return strings.Contains(reg, `(?<`) || strings.Contains(reg, `(?=`) || strings.Contains(reg, `(?!`) ||
|
||||||
strings.Contains(reg, `(?>`) || strings.Contains(reg, `\1`) || strings.Contains(reg, `*+`) ||
|
strings.Contains(reg, `(?>`) || strings.Contains(reg, `\1`) || strings.Contains(reg, `*+`) ||
|
||||||
@ -173,7 +173,7 @@ func isUnsupportedRegexpSyntax(reg string) bool {
|
|||||||
(strings.HasPrefix(reg, multilinePrefix+`/`) && strings.HasSuffix(reg, `/`))
|
(strings.HasPrefix(reg, multilinePrefix+`/`) && strings.HasSuffix(reg, `/`))
|
||||||
}
|
}
|
||||||
|
|
||||||
// convertToValidRegexp converts Ruby regexp syntaxt to RE2 equivalent.
|
// convertToValidRegexp converts Ruby regexp syntax to RE2 equivalent.
|
||||||
// Does not work with Ruby regexp literals.
|
// Does not work with Ruby regexp literals.
|
||||||
func convertToValidRegexp(rubyRegexp string) string {
|
func convertToValidRegexp(rubyRegexp string) string {
|
||||||
return multilinePrefix + rubyRegexp
|
return multilinePrefix + rubyRegexp
|
||||||
|
@ -119,7 +119,7 @@ func TestTemplateMatcherVars(t *testing.T) {
|
|||||||
require.NotEmpty(t, buf)
|
require.NotEmpty(t, buf)
|
||||||
|
|
||||||
// TODO(bzz) add more advanced test using go/ast package, to verify the
|
// TODO(bzz) add more advanced test using go/ast package, to verify the
|
||||||
// strucutre of generated code:
|
// structure of generated code:
|
||||||
// - check key literal exists in map for each extension:
|
// - check key literal exists in map for each extension:
|
||||||
|
|
||||||
src, err := format.Source(buf.Bytes())
|
src, err := format.Source(buf.Bytes())
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
`enry-java` package is available thorugh [maven central](http://search.maven.org/#search%7Cga%7C1%7Ca%3A%22enry-java%22),
|
`enry-java` package is available through [maven central](http://search.maven.org/#search%7Cga%7C1%7Ca%3A%22enry-java%22),
|
||||||
so it be used easily added as a dependency in various package management systems.
|
so it be used easily added as a dependency in various package management systems.
|
||||||
Examples of how to handle it for most commons systems are included below,
|
Examples of how to handle it for most commons systems are included below,
|
||||||
for other systems just look at maven central's dependency information.
|
for other systems just look at maven central's dependency information.
|
||||||
|
2
java/sbt
2
java/sbt
@ -6,7 +6,7 @@
|
|||||||
# Copyright (c) 2011, Paul Phillips. All rights reserved.
|
# Copyright (c) 2011, Paul Phillips. All rights reserved.
|
||||||
# Generated from http://www.opensource.org/licenses/bsd-license.php
|
# Generated from http://www.opensource.org/licenses/bsd-license.php
|
||||||
#
|
#
|
||||||
# Redistribution and use in source and binary forms, with or withou
|
# Redistribution and use in source and binary forms, with or without
|
||||||
# modification, are permitted provided that the following conditions are
|
# modification, are permitted provided that the following conditions are
|
||||||
# met:
|
# met:
|
||||||
#
|
#
|
||||||
|
Loading…
Reference in New Issue
Block a user