From 6d8f15af5bc6bdf2bcca59a2dfe00336ad98e2b5 Mon Sep 17 00:00:00 2001 From: Lauris BH Date: Sun, 15 Nov 2020 15:43:37 +0200 Subject: [PATCH 1/3] Add XML strategy --- common.go | 40 ++++++++++++++++++++++++++++++++++++---- common_test.go | 29 +++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+), 4 deletions(-) diff --git a/common.go b/common.go index d6c8b7e..10cd7d1 100644 --- a/common.go +++ b/common.go @@ -23,6 +23,7 @@ var DefaultStrategies = []Strategy{ GetLanguagesByShebang, GetLanguagesByExtension, GetLanguagesByManpage, + GetLanguagesByXML, GetLanguagesByContent, GetLanguagesByClassifier, } @@ -329,15 +330,23 @@ func getInterpreter(data []byte) (interpreter string) { return } -func getFirstLine(content []byte) []byte { - nlpos := bytes.IndexByte(content, '\n') - if nlpos < 0 { - return content +func getFirstLines(content []byte, count int) []byte { + nlpos := -1 + for ; count > 0; count-- { + pos := bytes.IndexByte(content[nlpos+1:], '\n') + if pos < 0 { + return content + } + nlpos += pos + 1 } return content[:nlpos] } +func getFirstLine(content []byte) []byte { + return getFirstLines(content, 1) +} + func hasShebang(line []byte) bool { const shebang = `#!` prefix := []byte(shebang) @@ -404,6 +413,29 @@ func GetLanguagesByManpage(filename string, _ []byte, _ []string) []string { return nil } +var ( + xmlHeader = regex.MustCompile(` 0 { + return candidates + } + + header := getFirstLines(content, 2) + + // Check if contains XML header + if xmlHeader.Match(header) { + return []string{ + "XML", + } + } + + return nil +} + func getDotIndexes(filename string) []int { dots := make([]int, 0, 2) for i, letter := range filename { diff --git a/common_test.go b/common_test.go index 15ae503..b79eff3 100644 --- a/common_test.go +++ b/common_test.go @@ -23,6 +23,7 @@ type EnryTestSuite struct { tmpLinguist string needToClone bool samplesDir string + testFixturesDir string } func (s *EnryTestSuite) TestRegexpEdgeCases() { @@ -72,6 +73,9 @@ func (s *EnryTestSuite) SetupSuite() { s.samplesDir = filepath.Join(s.tmpLinguist, "samples") s.T().Logf("using samples from %s", s.samplesDir) + s.testFixturesDir = filepath.Join(s.tmpLinguist, "test", "fixtures") + s.T().Logf("using test fixtures from %s", s.samplesDir) + cwd, err := os.Getwd() assert.NoError(s.T(), err) @@ -314,6 +318,31 @@ func (s *EnryTestSuite) TestGetLanguagesByManpage() { } } + +func (s *EnryTestSuite) TestGetLanguagesByXML() { + tests := []struct { + name string + filename string + candidates []string + expected []string + + }{ + {name: "TestGetLanguagesByXML_1", filename: filepath.Join(s.testFixturesDir, "XML/app.config"), expected: []string{"XML"}}, + {name: "TestGetLanguagesByXML_2", filename: filepath.Join(s.testFixturesDir, "XML/AssertionIDRequestOptionalAttributes.xml.svn-base"), expected: []string{"XML"}}, + // no XML header so should not be identified by this strategy + {name: "TestGetLanguagesByXML_3", filename: filepath.Join(s.samplesDir, "XML/libsomething.dll.config"), expected: nil}, + {name: "TestGetLanguagesByXML_4", filename: filepath.Join(s.samplesDir, "Eagle/Eagle.sch"), candidates: []string{"Eagle"}, expected: []string{"Eagle"}}, + } + + for _, test := range tests { + content, err := ioutil.ReadFile(test.filename) + assert.NoError(s.T(), err) + + languages := GetLanguagesByXML(test.filename, content, test.candidates) + assert.Equal(s.T(), test.expected, languages, fmt.Sprintf("%v: languages = %v, expected: %v", test.name, languages, test.expected)) + } +} + func (s *EnryTestSuite) TestGetLanguagesByClassifier() { test := []struct { name string From 8ac98f4b77b29b0447daf99b6c296c96054305cd Mon Sep 17 00:00:00 2001 From: Lauris BH Date: Sun, 15 Nov 2020 15:48:03 +0200 Subject: [PATCH 2/3] Update readme --- README.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/README.md b/README.md index b5ddd99..4e5134c 100644 --- a/README.md +++ b/README.md @@ -171,8 +171,6 @@ Parsing [linguist/samples](https://github.com/github/linguist/tree/master/sample - Detection of [generated files](https://github.com/github/linguist/blob/bf95666fc15e49d556f2def4d0a85338423c25f3/lib/linguist/generated.rb#L53) is not supported yet. (Thus they are not excluded from CLI output). See [#213](https://github.com/src-d/enry/issues/213). -- XML detection strategy is not implemented. See [#192](https://github.com/src-d/enry/issues/192). - - Overriding languages and types though `.gitattributes` is not yet supported. See [#18](https://github.com/src-d/enry/issues/18). - `enry` CLI output does NOT exclude `.gitignore`ed files and git submodules, as Linguist does From 0596fda1a45d285b56049b77dacd4b918800f97f Mon Sep 17 00:00:00 2001 From: Lauris BH Date: Thu, 26 Nov 2020 13:56:25 +0200 Subject: [PATCH 3/3] Fix strategy order --- common.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common.go b/common.go index 10cd7d1..4764cf7 100644 --- a/common.go +++ b/common.go @@ -22,8 +22,8 @@ var DefaultStrategies = []Strategy{ GetLanguagesByFilename, GetLanguagesByShebang, GetLanguagesByExtension, - GetLanguagesByManpage, GetLanguagesByXML, + GetLanguagesByManpage, GetLanguagesByContent, GetLanguagesByClassifier, }