mirror of
https://github.com/ralsina/tartrazine.git
synced 2024-11-10 05:22:23 +00:00
Merge pull request #40 from lafriks-fork/feat/strategy_xml
Add XML strategy
This commit is contained in:
commit
3faf9450da
@ -173,8 +173,6 @@ Parsing [linguist/samples](https://github.com/github/linguist/tree/master/sample
|
|||||||
- Detection of [generated files](https://github.com/github/linguist/blob/bf95666fc15e49d556f2def4d0a85338423c25f3/lib/linguist/generated.rb#L53) is not supported yet.
|
- Detection of [generated files](https://github.com/github/linguist/blob/bf95666fc15e49d556f2def4d0a85338423c25f3/lib/linguist/generated.rb#L53) is not supported yet.
|
||||||
(Thus they are not excluded from CLI output). See [#213](https://github.com/src-d/enry/issues/213).
|
(Thus they are not excluded from CLI output). See [#213](https://github.com/src-d/enry/issues/213).
|
||||||
|
|
||||||
- XML detection strategy is not implemented. See [#192](https://github.com/src-d/enry/issues/192).
|
|
||||||
|
|
||||||
- Overriding languages and types though `.gitattributes` is not yet supported. See [#18](https://github.com/src-d/enry/issues/18).
|
- Overriding languages and types though `.gitattributes` is not yet supported. See [#18](https://github.com/src-d/enry/issues/18).
|
||||||
|
|
||||||
- `enry` CLI output does NOT exclude `.gitignore`ed files and git submodules, as Linguist does
|
- `enry` CLI output does NOT exclude `.gitignore`ed files and git submodules, as Linguist does
|
||||||
|
38
common.go
38
common.go
@ -22,6 +22,7 @@ var DefaultStrategies = []Strategy{
|
|||||||
GetLanguagesByFilename,
|
GetLanguagesByFilename,
|
||||||
GetLanguagesByShebang,
|
GetLanguagesByShebang,
|
||||||
GetLanguagesByExtension,
|
GetLanguagesByExtension,
|
||||||
|
GetLanguagesByXML,
|
||||||
GetLanguagesByManpage,
|
GetLanguagesByManpage,
|
||||||
GetLanguagesByContent,
|
GetLanguagesByContent,
|
||||||
GetLanguagesByClassifier,
|
GetLanguagesByClassifier,
|
||||||
@ -329,15 +330,23 @@ func getInterpreter(data []byte) (interpreter string) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
func getFirstLine(content []byte) []byte {
|
func getFirstLines(content []byte, count int) []byte {
|
||||||
nlpos := bytes.IndexByte(content, '\n')
|
nlpos := -1
|
||||||
if nlpos < 0 {
|
for ; count > 0; count-- {
|
||||||
|
pos := bytes.IndexByte(content[nlpos+1:], '\n')
|
||||||
|
if pos < 0 {
|
||||||
return content
|
return content
|
||||||
}
|
}
|
||||||
|
nlpos += pos + 1
|
||||||
|
}
|
||||||
|
|
||||||
return content[:nlpos]
|
return content[:nlpos]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func getFirstLine(content []byte) []byte {
|
||||||
|
return getFirstLines(content, 1)
|
||||||
|
}
|
||||||
|
|
||||||
func hasShebang(line []byte) bool {
|
func hasShebang(line []byte) bool {
|
||||||
const shebang = `#!`
|
const shebang = `#!`
|
||||||
prefix := []byte(shebang)
|
prefix := []byte(shebang)
|
||||||
@ -404,6 +413,29 @@ func GetLanguagesByManpage(filename string, _ []byte, _ []string) []string {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
xmlHeader = regex.MustCompile(`<?xml version=`)
|
||||||
|
)
|
||||||
|
|
||||||
|
// GetLanguagesByXML returns a slice of possible XML language for the given filename.
|
||||||
|
// It complies with the signature to be a Strategy type.
|
||||||
|
func GetLanguagesByXML(_ string, content []byte, candidates []string) []string {
|
||||||
|
if len(candidates) > 0 {
|
||||||
|
return candidates
|
||||||
|
}
|
||||||
|
|
||||||
|
header := getFirstLines(content, 2)
|
||||||
|
|
||||||
|
// Check if contains XML header
|
||||||
|
if xmlHeader.Match(header) {
|
||||||
|
return []string{
|
||||||
|
"XML",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
func getDotIndexes(filename string) []int {
|
func getDotIndexes(filename string) []int {
|
||||||
dots := make([]int, 0, 2)
|
dots := make([]int, 0, 2)
|
||||||
for i, letter := range filename {
|
for i, letter := range filename {
|
||||||
|
@ -23,6 +23,7 @@ type EnryTestSuite struct {
|
|||||||
tmpLinguist string
|
tmpLinguist string
|
||||||
needToClone bool
|
needToClone bool
|
||||||
samplesDir string
|
samplesDir string
|
||||||
|
testFixturesDir string
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *EnryTestSuite) TestRegexpEdgeCases() {
|
func (s *EnryTestSuite) TestRegexpEdgeCases() {
|
||||||
@ -72,6 +73,9 @@ func (s *EnryTestSuite) SetupSuite() {
|
|||||||
s.samplesDir = filepath.Join(s.tmpLinguist, "samples")
|
s.samplesDir = filepath.Join(s.tmpLinguist, "samples")
|
||||||
s.T().Logf("using samples from %s", s.samplesDir)
|
s.T().Logf("using samples from %s", s.samplesDir)
|
||||||
|
|
||||||
|
s.testFixturesDir = filepath.Join(s.tmpLinguist, "test", "fixtures")
|
||||||
|
s.T().Logf("using test fixtures from %s", s.samplesDir)
|
||||||
|
|
||||||
cwd, err := os.Getwd()
|
cwd, err := os.Getwd()
|
||||||
assert.NoError(s.T(), err)
|
assert.NoError(s.T(), err)
|
||||||
|
|
||||||
@ -314,6 +318,31 @@ func (s *EnryTestSuite) TestGetLanguagesByManpage() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
func (s *EnryTestSuite) TestGetLanguagesByXML() {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
filename string
|
||||||
|
candidates []string
|
||||||
|
expected []string
|
||||||
|
|
||||||
|
}{
|
||||||
|
{name: "TestGetLanguagesByXML_1", filename: filepath.Join(s.testFixturesDir, "XML/app.config"), expected: []string{"XML"}},
|
||||||
|
{name: "TestGetLanguagesByXML_2", filename: filepath.Join(s.testFixturesDir, "XML/AssertionIDRequestOptionalAttributes.xml.svn-base"), expected: []string{"XML"}},
|
||||||
|
// no XML header so should not be identified by this strategy
|
||||||
|
{name: "TestGetLanguagesByXML_3", filename: filepath.Join(s.samplesDir, "XML/libsomething.dll.config"), expected: nil},
|
||||||
|
{name: "TestGetLanguagesByXML_4", filename: filepath.Join(s.samplesDir, "Eagle/Eagle.sch"), candidates: []string{"Eagle"}, expected: []string{"Eagle"}},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, test := range tests {
|
||||||
|
content, err := ioutil.ReadFile(test.filename)
|
||||||
|
assert.NoError(s.T(), err)
|
||||||
|
|
||||||
|
languages := GetLanguagesByXML(test.filename, content, test.candidates)
|
||||||
|
assert.Equal(s.T(), test.expected, languages, fmt.Sprintf("%v: languages = %v, expected: %v", test.name, languages, test.expected))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func (s *EnryTestSuite) TestGetLanguagesByClassifier() {
|
func (s *EnryTestSuite) TestGetLanguagesByClassifier() {
|
||||||
test := []struct {
|
test := []struct {
|
||||||
name string
|
name string
|
||||||
|
Loading…
Reference in New Issue
Block a user