mirror of
https://github.com/ralsina/tartrazine.git
synced 2025-05-23 16:40:08 -03:00
refactoring: remove un-used code, add go doc, fix ci (#199)
Refactoring, consisting of - remove unused method `isAuxiliaryLanguage` and `FileCountList` in order to reduce public API surfaces (go/java) - add GoDoc to public APIs - ci: java profile use latest go src It also now mimics https://docs.travis-ci.com/user/languages/go/#go-import-path for non-go build image, as code relies on internal imports. TEST PLAN: - make test
This commit is contained in:
parent
fe18dc0830
commit
13d3d66d37
10
.travis.yml
10
.travis.yml
@ -45,11 +45,19 @@ jobs:
|
|||||||
stage: test
|
stage: test
|
||||||
language: scala
|
language: scala
|
||||||
jdk: oraclejdk8
|
jdk: oraclejdk8
|
||||||
|
before_install:
|
||||||
|
# mimics exact behavior of 'go_import_path' for non-go build image
|
||||||
|
- export GOPATH=${TRAVIS_HOME}/gopath
|
||||||
|
- mkdir -p ${GOPATH}/src/gopkg.in/src-d/enry.v1
|
||||||
|
- tar -Pczf ${TRAVIS_TMPDIR}/src_archive.tar.gz -C ${TRAVIS_BUILD_DIR} . && tar -Pxzf ${TRAVIS_TMPDIR}/src_archive.tar.gz -C ${TRAVIS_HOME}/gopath/src/gopkg.in/src-d/enry.v1
|
||||||
|
- export TRAVIS_BUILD_DIR=${TRAVIS_HOME}/gopath/src/gopkg.in/src-d/enry.v1
|
||||||
|
- cd ${TRAVIS_HOME}/gopath/src/gopkg.in/src-d/enry.v1
|
||||||
install:
|
install:
|
||||||
- gimme version
|
- gimme version
|
||||||
- eval "$(curl -sL https://raw.githubusercontent.com/travis-ci/gimme/master/gimme | GIMME_GO_VERSION=$GO_VERSION bash)"
|
- eval "$(curl -sL https://raw.githubusercontent.com/travis-ci/gimme/master/gimme | GIMME_GO_VERSION=$GO_VERSION bash)"
|
||||||
- go version
|
- go version
|
||||||
- go get -v gopkg.in/src-d/enry.v1/...
|
- echo $PWD; echo $GOPATH
|
||||||
|
- go get -v ./...
|
||||||
before_script:
|
before_script:
|
||||||
- cd java
|
- cd java
|
||||||
- make
|
- make
|
||||||
|
@ -278,7 +278,7 @@ func printFileAnalysis(file string, limit int64, isJSON bool) error {
|
|||||||
// functions below can work on a sample
|
// functions below can work on a sample
|
||||||
fileType := getFileType(file, data)
|
fileType := getFileType(file, data)
|
||||||
language := enry.GetLanguage(file, data)
|
language := enry.GetLanguage(file, data)
|
||||||
mimeType := enry.GetMimeType(file, language)
|
mimeType := enry.GetMIMEType(file, language)
|
||||||
|
|
||||||
if isJSON {
|
if isJSON {
|
||||||
return json.NewEncoder(os.Stdout).Encode(map[string]interface{}{
|
return json.NewEncoder(os.Stdout).Encode(map[string]interface{}{
|
||||||
|
@ -26,6 +26,7 @@ var DefaultStrategies = []Strategy{
|
|||||||
GetLanguagesByClassifier,
|
GetLanguagesByClassifier,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// DefaultClassifier is a naive Bayes classifier based on Linguist samples.
|
||||||
var DefaultClassifier Classifier = &classifier{
|
var DefaultClassifier Classifier = &classifier{
|
||||||
languagesLogProbabilities: data.LanguagesLogProbabilities,
|
languagesLogProbabilities: data.LanguagesLogProbabilities,
|
||||||
tokensLogProbabilities: data.TokensLogProbabilities,
|
tokensLogProbabilities: data.TokensLogProbabilities,
|
||||||
|
13
enry.go
13
enry.go
@ -1,3 +1,16 @@
|
|||||||
|
/*
|
||||||
|
Package enry implements multiple strategies for programming language identification.
|
||||||
|
|
||||||
|
Identification is made based on file name and file content using a seriece
|
||||||
|
of strategies to narrow down possible option.
|
||||||
|
Each strategy is available as a separate API call, as well as a main enty point
|
||||||
|
|
||||||
|
GetLanguage(filename string, content []byte) (language string)
|
||||||
|
|
||||||
|
It is a port of the https://github.com/github/linguist from Ruby.
|
||||||
|
Upstream Linguist YAML files are used to generate datastructures for data
|
||||||
|
package.
|
||||||
|
*/
|
||||||
package enry // import "gopkg.in/src-d/enry.v1"
|
package enry // import "gopkg.in/src-d/enry.v1"
|
||||||
|
|
||||||
//go:generate make code-generate
|
//go:generate make code-generate
|
||||||
|
@ -28,7 +28,7 @@ $(RESOURCES_DIR): os-shared-lib
|
|||||||
cp -R $(RESOURCES_SRC) $(RESOURCES_DIR)
|
cp -R $(RESOURCES_SRC) $(RESOURCES_DIR)
|
||||||
|
|
||||||
$(JNAERATOR_JAR): $(RESOURCES_DIR)
|
$(JNAERATOR_JAR): $(RESOURCES_DIR)
|
||||||
mkdir $(JNAERATOR_DIR) && \
|
mkdir -p $(JNAERATOR_DIR) && \
|
||||||
wget $(JNAERATOR_JAR_URL) -O $(JNAERATOR_JAR)
|
wget $(JNAERATOR_JAR_URL) -O $(JNAERATOR_JAR)
|
||||||
|
|
||||||
os-shared-lib:
|
os-shared-lib:
|
||||||
|
@ -9,16 +9,6 @@ public class Enry {
|
|||||||
|
|
||||||
private static final EnryLibrary nativeLib = EnryLibrary.INSTANCE;
|
private static final EnryLibrary nativeLib = EnryLibrary.INSTANCE;
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns whether the given language is auxiliary or not.
|
|
||||||
*
|
|
||||||
* @param language name of the language, e.g. PHP, HTML, ...
|
|
||||||
* @return if it's an auxiliary language
|
|
||||||
*/
|
|
||||||
public static synchronized boolean isAuxiliaryLanguage(String language) {
|
|
||||||
return toJavaBool(nativeLib.IsAuxiliaryLanguage(toGoString(language)));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the language of the given file based on the filename and its
|
* Returns the language of the given file based on the filename and its
|
||||||
* contents.
|
* contents.
|
||||||
|
@ -6,12 +6,6 @@ import static org.junit.Assert.*;
|
|||||||
|
|
||||||
public class EnryTest {
|
public class EnryTest {
|
||||||
|
|
||||||
@Test
|
|
||||||
public void isAuxiliaryLanguage() {
|
|
||||||
assertTrue(Enry.isAuxiliaryLanguage("HTML"));
|
|
||||||
assertFalse(Enry.isAuxiliaryLanguage("Go"));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void getLanguage() {
|
public void getLanguage() {
|
||||||
String code = "<?php $foo = bar();";
|
String code = "<?php $foo = bar();";
|
||||||
|
@ -93,12 +93,7 @@ func GetLanguagesByVimModeline(filename string, content []byte, candidates []str
|
|||||||
|
|
||||||
//export GetMimeType
|
//export GetMimeType
|
||||||
func GetMimeType(path string, language string) string {
|
func GetMimeType(path string, language string) string {
|
||||||
return enry.GetMimeType(path, language)
|
return enry.GetMIMEType(path, language)
|
||||||
}
|
|
||||||
|
|
||||||
//export IsAuxiliaryLanguage
|
|
||||||
func IsAuxiliaryLanguage(lang string) bool {
|
|
||||||
return enry.IsAuxiliaryLanguage(lang)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//export IsBinary
|
//export IsBinary
|
||||||
|
80
utils.go
80
utils.go
@ -8,53 +8,20 @@ import (
|
|||||||
"gopkg.in/src-d/enry.v1/data"
|
"gopkg.in/src-d/enry.v1/data"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
const binSniffLen = 8000
|
||||||
auxiliaryLanguages = map[string]bool{
|
|
||||||
"Other": true, "XML": true, "YAML": true, "TOML": true, "INI": true,
|
|
||||||
"JSON": true, "TeX": true, "Public Key": true, "AsciiDoc": true,
|
|
||||||
"AGS Script": true, "VimL": true, "Diff": true, "CMake": true, "fish": true,
|
|
||||||
"Awk": true, "Graphviz (DOT)": true, "Markdown": true, "desktop": true,
|
|
||||||
"XSLT": true, "SQL": true, "RMarkdown": true, "IRC log": true,
|
|
||||||
"reStructuredText": true, "Twig": true, "CSS": true, "Batchfile": true,
|
|
||||||
"Text": true, "HTML+ERB": true, "HTML": true, "Gettext Catalog": true,
|
|
||||||
"Smarty": true, "Raw token data": true,
|
|
||||||
}
|
|
||||||
|
|
||||||
configurationLanguages = map[string]bool{
|
var configurationLanguages = map[string]bool{
|
||||||
"XML": true, "JSON": true, "TOML": true, "YAML": true, "INI": true, "SQL": true,
|
"XML": true, "JSON": true, "TOML": true, "YAML": true, "INI": true, "SQL": true,
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
// IsAuxiliaryLanguage returns whether or not lang is an auxiliary language.
|
|
||||||
func IsAuxiliaryLanguage(lang string) bool {
|
|
||||||
_, ok := auxiliaryLanguages[lang]
|
|
||||||
return ok
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// IsConfiguration returns whether or not path is using a configuration language.
|
// IsConfiguration tells if filename is in one of the configuration languages.
|
||||||
func IsConfiguration(path string) bool {
|
func IsConfiguration(path string) bool {
|
||||||
language, _ := GetLanguageByExtension(path)
|
language, _ := GetLanguageByExtension(path)
|
||||||
_, is := configurationLanguages[language]
|
_, is := configurationLanguages[language]
|
||||||
return is
|
return is
|
||||||
}
|
}
|
||||||
|
|
||||||
// IsDotFile returns whether or not path has dot as a prefix.
|
// IsImage tells if a given file is an image (PNG, JPEG or GIF format).
|
||||||
func IsDotFile(path string) bool {
|
|
||||||
path = filepath.Clean(path)
|
|
||||||
base := filepath.Base(path)
|
|
||||||
return strings.HasPrefix(base, ".") && base != "." && base != ".."
|
|
||||||
}
|
|
||||||
|
|
||||||
// IsVendor returns whether or not path is a vendor path.
|
|
||||||
func IsVendor(path string) bool {
|
|
||||||
return data.VendorMatchers.Match(path)
|
|
||||||
}
|
|
||||||
|
|
||||||
// IsDocumentation returns whether or not path is a documentation path.
|
|
||||||
func IsDocumentation(path string) bool {
|
|
||||||
return data.DocumentationMatchers.Match(path)
|
|
||||||
}
|
|
||||||
|
|
||||||
func IsImage(path string) bool {
|
func IsImage(path string) bool {
|
||||||
extension := filepath.Ext(path)
|
extension := filepath.Ext(path)
|
||||||
if extension == ".png" || extension == ".jpg" || extension == ".jpeg" || extension == ".gif" {
|
if extension == ".png" || extension == ".jpg" || extension == ".jpeg" || extension == ".gif" {
|
||||||
@ -64,7 +31,8 @@ func IsImage(path string) bool {
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
func GetMimeType(path string, language string) string {
|
// GetMIMEType returns a MIME type of a given file based on its languages.
|
||||||
|
func GetMIMEType(path string, language string) string {
|
||||||
if mime, ok := data.LanguagesMime[language]; ok {
|
if mime, ok := data.LanguagesMime[language]; ok {
|
||||||
return mime
|
return mime
|
||||||
}
|
}
|
||||||
@ -76,13 +44,27 @@ func GetMimeType(path string, language string) string {
|
|||||||
return "text/plain"
|
return "text/plain"
|
||||||
}
|
}
|
||||||
|
|
||||||
const sniffLen = 8000
|
// IsDocumentation returns whether or not path is a documentation path.
|
||||||
|
func IsDocumentation(path string) bool {
|
||||||
|
return data.DocumentationMatchers.Match(path)
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsDotFile returns whether or not path has dot as a prefix.
|
||||||
|
func IsDotFile(path string) bool {
|
||||||
|
base := filepath.Base(filepath.Clean(path))
|
||||||
|
return strings.HasPrefix(base, ".") && base != "."
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsVendor returns whether or not path is a vendor path.
|
||||||
|
func IsVendor(path string) bool {
|
||||||
|
return data.VendorMatchers.Match(path)
|
||||||
|
}
|
||||||
|
|
||||||
// IsBinary detects if data is a binary value based on:
|
// IsBinary detects if data is a binary value based on:
|
||||||
// http://git.kernel.org/cgit/git/git.git/tree/xdiff-interface.c?id=HEAD#n198
|
// http://git.kernel.org/cgit/git/git.git/tree/xdiff-interface.c?id=HEAD#n198
|
||||||
func IsBinary(data []byte) bool {
|
func IsBinary(data []byte) bool {
|
||||||
if len(data) > sniffLen {
|
if len(data) > binSniffLen {
|
||||||
data = data[:sniffLen]
|
data = data[:binSniffLen]
|
||||||
}
|
}
|
||||||
|
|
||||||
if bytes.IndexByte(data, byte(0)) == -1 {
|
if bytes.IndexByte(data, byte(0)) == -1 {
|
||||||
@ -91,17 +73,3 @@ func IsBinary(data []byte) bool {
|
|||||||
|
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
// FileCount type stores language name and count of files belonging to the
|
|
||||||
// language.
|
|
||||||
type FileCount struct {
|
|
||||||
Name string
|
|
||||||
Count int
|
|
||||||
}
|
|
||||||
|
|
||||||
// FileCountList type is a list of FileCounts.
|
|
||||||
type FileCountList []FileCount
|
|
||||||
|
|
||||||
func (fcl FileCountList) Len() int { return len(fcl) }
|
|
||||||
func (fcl FileCountList) Less(i, j int) bool { return fcl[i].Count < fcl[j].Count }
|
|
||||||
func (fcl FileCountList) Swap(i, j int) { fcl[i], fcl[j] = fcl[j], fcl[i] }
|
|
||||||
|
@ -3,38 +3,11 @@ package enry
|
|||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"fmt"
|
"fmt"
|
||||||
"sort"
|
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestIsAuxiliaryLanguage(t *testing.T) {
|
|
||||||
type testType struct {
|
|
||||||
name string
|
|
||||||
lang string
|
|
||||||
expected bool
|
|
||||||
}
|
|
||||||
|
|
||||||
tests := []testType{
|
|
||||||
{name: "TestIsAuxiliaryLanguage_Invalid", lang: "invalid", expected: false},
|
|
||||||
}
|
|
||||||
for k := range auxiliaryLanguages {
|
|
||||||
t := testType{
|
|
||||||
name: fmt.Sprintf("TestIsAuxiliaryLanguage_%s", k),
|
|
||||||
lang: k,
|
|
||||||
expected: true,
|
|
||||||
}
|
|
||||||
tests = append(tests, t)
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, test := range tests {
|
|
||||||
is := IsAuxiliaryLanguage(test.lang)
|
|
||||||
assert.Equal(t, is, test.expected,
|
|
||||||
fmt.Sprintf("%v: is = %v, expected: %v", test.name, is, test.expected))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestIsVendor(t *testing.T) {
|
func TestIsVendor(t *testing.T) {
|
||||||
tests := []struct {
|
tests := []struct {
|
||||||
name string
|
name string
|
||||||
@ -106,7 +79,7 @@ func TestGetMimeType(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for _, test := range tests {
|
for _, test := range tests {
|
||||||
is := GetMimeType(test.path, test.lang)
|
is := GetMIMEType(test.path, test.lang)
|
||||||
assert.Equal(t, is, test.expected, fmt.Sprintf("%v: is = %v, expected: %v", test.name, is, test.expected))
|
assert.Equal(t, is, test.expected, fmt.Sprintf("%v: is = %v, expected: %v", test.name, is, test.expected))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -160,43 +133,3 @@ func TestIsDotFile(t *testing.T) {
|
|||||||
assert.Equal(t, test.expected, is, fmt.Sprintf("%v: is = %v, expected: %v", test.name, is, test.expected))
|
assert.Equal(t, test.expected, is, fmt.Sprintf("%v: is = %v, expected: %v", test.name, is, test.expected))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestFileCountListSort(t *testing.T) {
|
|
||||||
sampleData := FileCountList{{"a", 8}, {"b", 65}, {"c", 20}, {"d", 90}}
|
|
||||||
const ascending = "ASC"
|
|
||||||
const descending = "DESC"
|
|
||||||
|
|
||||||
tests := []struct {
|
|
||||||
name string
|
|
||||||
data FileCountList
|
|
||||||
order string
|
|
||||||
expectedData FileCountList
|
|
||||||
}{
|
|
||||||
{
|
|
||||||
name: "ascending order",
|
|
||||||
data: sampleData,
|
|
||||||
order: ascending,
|
|
||||||
expectedData: FileCountList{{"a", 8}, {"c", 20}, {"b", 65}, {"d", 90}},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "descending order",
|
|
||||||
data: sampleData,
|
|
||||||
order: descending,
|
|
||||||
expectedData: FileCountList{{"d", 90}, {"b", 65}, {"c", 20}, {"a", 8}},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, test := range tests {
|
|
||||||
t.Run(test.name, func(t *testing.T) {
|
|
||||||
if test.order == descending {
|
|
||||||
sort.Sort(sort.Reverse(test.data))
|
|
||||||
} else {
|
|
||||||
sort.Sort(test.data)
|
|
||||||
}
|
|
||||||
|
|
||||||
for i := 0; i < len(test.data); i++ {
|
|
||||||
assert.Equal(t, test.data[i], test.expectedData[i], fmt.Sprintf("%v: FileCount at position %d = %v, expected: %v", test.name, i, test.data[i], test.expectedData[i]))
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user