new by content heuristisc

This commit is contained in:
Máximo Cuadros
2016-07-14 15:14:32 +02:00
parent e0a990e4ea
commit b1a3085e44
3 changed files with 118 additions and 7 deletions

View File

@ -1,10 +1,12 @@
package slinguist
import (
"fmt"
"io/ioutil"
"os"
"path"
"path/filepath"
"text/tabwriter"
. "gopkg.in/check.v1"
)
@ -28,6 +30,7 @@ func (s *TSuite) TestGetLanguageByContentH(c *C) {
s.testGetLanguageByContent(c, "Prolog")
s.testGetLanguageByContent(c, "Perl")
s.testGetLanguageByContent(c, "Perl6")
s.testGetLanguageByContent(c, "Hack")
}
func (s *TSuite) testGetLanguageByContent(c *C, expected string) {
@ -41,7 +44,6 @@ func (s *TSuite) testGetLanguageByContent(c *C, expected string) {
}
content, _ := ioutil.ReadFile(file)
obtained, _ := GetLanguageByContent(path.Base(file), content)
if obtained == OtherLanguage {
continue
@ -50,3 +52,51 @@ func (s *TSuite) testGetLanguageByContent(c *C, expected string) {
c.Check(obtained, Equals, expected, Commentf(file))
}
}
func (s *TSuite) TestGetLanguageByContentLinguistCorpus(c *C) {
var total, failed, ok, other, unsafe int
w := new(tabwriter.Writer)
w.Init(os.Stdout, 0, 8, 0, '\t', 0)
filepath.Walk(".linguist/samples", func(path string, f os.FileInfo, err error) error {
if f.IsDir() {
if f.Name() == "filenames" {
return filepath.SkipDir
}
return nil
}
total++
expected := filepath.Base(filepath.Dir(path))
filename := filepath.Base(path)
content, _ := ioutil.ReadFile(path)
obtained, safe := GetLanguageByContent(filename, content)
if obtained == OtherLanguage {
other++
}
var status string
if expected == obtained {
status = "ok"
ok++
} else {
status = "failed"
failed++
if !safe {
unsafe++
}
}
fmt.Fprintf(w, "%s\t%s\t%s\t%v\t%s\n", filename, expected, obtained, safe, status)
return nil
})
fmt.Fprintln(w)
w.Flush()
fmt.Printf("total files: %d, ok: %d, failed: %d, unsafe: %d, other: %d\n", total, ok, failed, unsafe, other)
}