Add XML strategy

This commit is contained in:
Lauris BH
2020-11-15 15:43:37 +02:00
parent 0fb4b8a768
commit 6d8f15af5b
2 changed files with 65 additions and 4 deletions

View File

@ -23,6 +23,7 @@ var DefaultStrategies = []Strategy{
GetLanguagesByShebang,
GetLanguagesByExtension,
GetLanguagesByManpage,
GetLanguagesByXML,
GetLanguagesByContent,
GetLanguagesByClassifier,
}
@ -329,15 +330,23 @@ func getInterpreter(data []byte) (interpreter string) {
return
}
func getFirstLine(content []byte) []byte {
nlpos := bytes.IndexByte(content, '\n')
if nlpos < 0 {
return content
func getFirstLines(content []byte, count int) []byte {
nlpos := -1
for ; count > 0; count-- {
pos := bytes.IndexByte(content[nlpos+1:], '\n')
if pos < 0 {
return content
}
nlpos += pos + 1
}
return content[:nlpos]
}
func getFirstLine(content []byte) []byte {
return getFirstLines(content, 1)
}
func hasShebang(line []byte) bool {
const shebang = `#!`
prefix := []byte(shebang)
@ -404,6 +413,29 @@ func GetLanguagesByManpage(filename string, _ []byte, _ []string) []string {
return nil
}
var (
xmlHeader = regex.MustCompile(`<?xml version=`)
)
// GetLanguagesByXML returns a slice of possible XML language for the given filename.
// It complies with the signature to be a Strategy type.
func GetLanguagesByXML(_ string, content []byte, candidates []string) []string {
if len(candidates) > 0 {
return candidates
}
header := getFirstLines(content, 2)
// Check if contains XML header
if xmlHeader.Match(header) {
return []string{
"XML",
}
}
return nil
}
func getDotIndexes(filename string) []int {
dots := make([]int, 0, 2)
for i, letter := range filename {