From 13d3d66d37a87f23a013246a1b0678c9ee3d524b Mon Sep 17 00:00:00 2001 From: Alexander Date: Tue, 5 Feb 2019 22:54:14 +0100 Subject: [PATCH] refactoring: remove un-used code, add go doc, fix ci (#199) Refactoring, consisting of - remove unused method `isAuxiliaryLanguage` and `FileCountList` in order to reduce public API surfaces (go/java) - add GoDoc to public APIs - ci: java profile use latest go src It also now mimics https://docs.travis-ci.com/user/languages/go/#go-import-path for non-go build image, as code relies on internal imports. TEST PLAN: - make test --- .travis.yml | 10 ++- cmd/enry/main.go | 2 +- common.go | 1 + enry.go | 13 +++ java/Makefile | 2 +- .../src/main/java/tech/sourced/enry/Enry.java | 10 --- .../test/java/tech/sourced/enry/EnryTest.java | 6 -- shared/enry.go | 7 +- utils.go | 80 ++++++------------- utils_test.go | 69 +--------------- 10 files changed, 51 insertions(+), 149 deletions(-) diff --git a/.travis.yml b/.travis.yml index 3bf9768..f393e2b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -45,11 +45,19 @@ jobs: stage: test language: scala jdk: oraclejdk8 + before_install: + # mimics exact behavior of 'go_import_path' for non-go build image + - export GOPATH=${TRAVIS_HOME}/gopath + - mkdir -p ${GOPATH}/src/gopkg.in/src-d/enry.v1 + - tar -Pczf ${TRAVIS_TMPDIR}/src_archive.tar.gz -C ${TRAVIS_BUILD_DIR} . && tar -Pxzf ${TRAVIS_TMPDIR}/src_archive.tar.gz -C ${TRAVIS_HOME}/gopath/src/gopkg.in/src-d/enry.v1 + - export TRAVIS_BUILD_DIR=${TRAVIS_HOME}/gopath/src/gopkg.in/src-d/enry.v1 + - cd ${TRAVIS_HOME}/gopath/src/gopkg.in/src-d/enry.v1 install: - gimme version - eval "$(curl -sL https://raw.githubusercontent.com/travis-ci/gimme/master/gimme | GIMME_GO_VERSION=$GO_VERSION bash)" - go version - - go get -v gopkg.in/src-d/enry.v1/... + - echo $PWD; echo $GOPATH + - go get -v ./... before_script: - cd java - make diff --git a/cmd/enry/main.go b/cmd/enry/main.go index d97a862..b67d2aa 100644 --- a/cmd/enry/main.go +++ b/cmd/enry/main.go @@ -278,7 +278,7 @@ func printFileAnalysis(file string, limit int64, isJSON bool) error { // functions below can work on a sample fileType := getFileType(file, data) language := enry.GetLanguage(file, data) - mimeType := enry.GetMimeType(file, language) + mimeType := enry.GetMIMEType(file, language) if isJSON { return json.NewEncoder(os.Stdout).Encode(map[string]interface{}{ diff --git a/common.go b/common.go index c2f0636..22ae0ce 100644 --- a/common.go +++ b/common.go @@ -26,6 +26,7 @@ var DefaultStrategies = []Strategy{ GetLanguagesByClassifier, } +// DefaultClassifier is a naive Bayes classifier based on Linguist samples. var DefaultClassifier Classifier = &classifier{ languagesLogProbabilities: data.LanguagesLogProbabilities, tokensLogProbabilities: data.TokensLogProbabilities, diff --git a/enry.go b/enry.go index 4adf39a..d99b8eb 100644 --- a/enry.go +++ b/enry.go @@ -1,3 +1,16 @@ +/* + Package enry implements multiple strategies for programming language identification. + + Identification is made based on file name and file content using a seriece + of strategies to narrow down possible option. + Each strategy is available as a separate API call, as well as a main enty point + + GetLanguage(filename string, content []byte) (language string) + + It is a port of the https://github.com/github/linguist from Ruby. + Upstream Linguist YAML files are used to generate datastructures for data + package. +*/ package enry // import "gopkg.in/src-d/enry.v1" //go:generate make code-generate diff --git a/java/Makefile b/java/Makefile index 993d348..0fadca6 100644 --- a/java/Makefile +++ b/java/Makefile @@ -28,7 +28,7 @@ $(RESOURCES_DIR): os-shared-lib cp -R $(RESOURCES_SRC) $(RESOURCES_DIR) $(JNAERATOR_JAR): $(RESOURCES_DIR) - mkdir $(JNAERATOR_DIR) && \ + mkdir -p $(JNAERATOR_DIR) && \ wget $(JNAERATOR_JAR_URL) -O $(JNAERATOR_JAR) os-shared-lib: diff --git a/java/src/main/java/tech/sourced/enry/Enry.java b/java/src/main/java/tech/sourced/enry/Enry.java index 92b0d2c..5a0522c 100644 --- a/java/src/main/java/tech/sourced/enry/Enry.java +++ b/java/src/main/java/tech/sourced/enry/Enry.java @@ -9,16 +9,6 @@ public class Enry { private static final EnryLibrary nativeLib = EnryLibrary.INSTANCE; - /** - * Returns whether the given language is auxiliary or not. - * - * @param language name of the language, e.g. PHP, HTML, ... - * @return if it's an auxiliary language - */ - public static synchronized boolean isAuxiliaryLanguage(String language) { - return toJavaBool(nativeLib.IsAuxiliaryLanguage(toGoString(language))); - } - /** * Returns the language of the given file based on the filename and its * contents. diff --git a/java/src/test/java/tech/sourced/enry/EnryTest.java b/java/src/test/java/tech/sourced/enry/EnryTest.java index 9a1a453..29c45e3 100644 --- a/java/src/test/java/tech/sourced/enry/EnryTest.java +++ b/java/src/test/java/tech/sourced/enry/EnryTest.java @@ -6,12 +6,6 @@ import static org.junit.Assert.*; public class EnryTest { - @Test - public void isAuxiliaryLanguage() { - assertTrue(Enry.isAuxiliaryLanguage("HTML")); - assertFalse(Enry.isAuxiliaryLanguage("Go")); - } - @Test public void getLanguage() { String code = " sniffLen { - data = data[:sniffLen] + if len(data) > binSniffLen { + data = data[:binSniffLen] } if bytes.IndexByte(data, byte(0)) == -1 { @@ -91,17 +73,3 @@ func IsBinary(data []byte) bool { return true } - -// FileCount type stores language name and count of files belonging to the -// language. -type FileCount struct { - Name string - Count int -} - -// FileCountList type is a list of FileCounts. -type FileCountList []FileCount - -func (fcl FileCountList) Len() int { return len(fcl) } -func (fcl FileCountList) Less(i, j int) bool { return fcl[i].Count < fcl[j].Count } -func (fcl FileCountList) Swap(i, j int) { fcl[i], fcl[j] = fcl[j], fcl[i] } diff --git a/utils_test.go b/utils_test.go index c75f806..2ec3193 100644 --- a/utils_test.go +++ b/utils_test.go @@ -3,38 +3,11 @@ package enry import ( "bytes" "fmt" - "sort" "testing" "github.com/stretchr/testify/assert" ) -func TestIsAuxiliaryLanguage(t *testing.T) { - type testType struct { - name string - lang string - expected bool - } - - tests := []testType{ - {name: "TestIsAuxiliaryLanguage_Invalid", lang: "invalid", expected: false}, - } - for k := range auxiliaryLanguages { - t := testType{ - name: fmt.Sprintf("TestIsAuxiliaryLanguage_%s", k), - lang: k, - expected: true, - } - tests = append(tests, t) - } - - for _, test := range tests { - is := IsAuxiliaryLanguage(test.lang) - assert.Equal(t, is, test.expected, - fmt.Sprintf("%v: is = %v, expected: %v", test.name, is, test.expected)) - } -} - func TestIsVendor(t *testing.T) { tests := []struct { name string @@ -106,7 +79,7 @@ func TestGetMimeType(t *testing.T) { } for _, test := range tests { - is := GetMimeType(test.path, test.lang) + is := GetMIMEType(test.path, test.lang) assert.Equal(t, is, test.expected, fmt.Sprintf("%v: is = %v, expected: %v", test.name, is, test.expected)) } } @@ -160,43 +133,3 @@ func TestIsDotFile(t *testing.T) { assert.Equal(t, test.expected, is, fmt.Sprintf("%v: is = %v, expected: %v", test.name, is, test.expected)) } } - -func TestFileCountListSort(t *testing.T) { - sampleData := FileCountList{{"a", 8}, {"b", 65}, {"c", 20}, {"d", 90}} - const ascending = "ASC" - const descending = "DESC" - - tests := []struct { - name string - data FileCountList - order string - expectedData FileCountList - }{ - { - name: "ascending order", - data: sampleData, - order: ascending, - expectedData: FileCountList{{"a", 8}, {"c", 20}, {"b", 65}, {"d", 90}}, - }, - { - name: "descending order", - data: sampleData, - order: descending, - expectedData: FileCountList{{"d", 90}, {"b", 65}, {"c", 20}, {"a", 8}}, - }, - } - - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - if test.order == descending { - sort.Sort(sort.Reverse(test.data)) - } else { - sort.Sort(test.data) - } - - for i := 0; i < len(test.data); i++ { - assert.Equal(t, test.data[i], test.expectedData[i], fmt.Sprintf("%v: FileCount at position %d = %v, expected: %v", test.name, i, test.data[i], test.expectedData[i])) - } - }) - } -}