diff --git a/common.go b/common.go index f6b97a8..55bb7d9 100644 --- a/common.go +++ b/common.go @@ -929,6 +929,8 @@ var LanguagesByExtension = map[string][]string{ } func init() { + LanguagesByExtension[".cgi"] = []string{OtherLanguage} + LanguagesByExtension[".fcgi"] = []string{OtherLanguage} ExtensionsByLanguage = reverseStringListMap(LanguagesByExtension) } diff --git a/content.go b/content.go index 46a96c7..a6523f1 100644 --- a/content.go +++ b/content.go @@ -23,27 +23,32 @@ var matchers = map[string]languageMatcher{ ".cls": clsExtLanguage, ".m": mExtLanguage, ".ms": msExtLanguage, + ".md": mdExtLanguage, + ".fs": fsExtLanguage, ".h": hExtLanguage, + ".hh": hhExtLanguage, ".l": lExtLanguage, ".n": nExtLanguage, ".lisp": lispExtLanguage, ".lsp": lispExtLanguage, ".pm": pmExtLanguage, ".t": pmExtLanguage, + ".rs": rsExtLanguage, ".pl": plExtLanguage, ".pro": proExtLanguage, ".toc": tocExtLanguage, + ".sls": slsExtLanguage, } var ( cPlusPlusMatcher = substring.BytesOr( - substring.BytesRegexp(`^\s*template\s*<`), - substring.BytesRegexp(`^\s*#\s*include <(cstdint|string|vector|map|list|array|bitset|queue|stack|forward_list|unordered_map|unordered_set|(i|o|io)stream)>`), - substring.BytesRegexp(`^[ \t]*try`), - substring.BytesRegexp(`^[ \t]*(class|(using[ \t]+)?namespace)\s+\w+`), - substring.BytesRegexp(`^[ \t]*(private|public|protected):$`), + substring.BytesRegexp(`\s*template\s*<`), + substring.BytesRegexp(`\s*#\s*include <(cstdint|string|vector|map|list|array|bitset|queue|stack|forward_list|unordered_map|unordered_set|(i|o|io)stream)>`), + substring.BytesRegexp(`\n[ \t]*try`), + substring.BytesRegexp(`\n[ \t]*(class|(using[ \t]+)?namespace)\s+\w+`), + substring.BytesRegexp(`\n[ \t]*(private|public|protected):$`), substring.BytesRegexp(`std::\w+`), - substring.BytesRegexp(`^[ \t]*catch\s*`), + substring.BytesRegexp(`[ \t]*catch\s*`), ) ) @@ -55,6 +60,20 @@ func incExtLanguage(input []byte) (string, bool) { return OtherLanguage, true } +func fsExtLanguage(input []byte) (string, bool) { + if substring.BytesRegexp(`\n(: |new-device)`).Match(input) { + return "Forth", true + } else if substring.BytesRegexp(`\s*(#light|import|let|module|namespace|open|type)`).Match(input) { + return "F#", true + } else if substring.BytesRegexp(`(#version|precision|uniform|varying|vec[234])`).Match(input) { + return "GLSL", true + } else if substring.BytesRegexp(`#include|#pragma\s+(rs|version)|__attribute__`).Match(input) { + return "Filterscript", true + } + + return OtherLanguage, true +} + func hExtLanguage(input []byte) (string, bool) { if objectiveCMatcher.Match(input) { return "Objective-C", true @@ -65,6 +84,16 @@ func hExtLanguage(input []byte) (string, bool) { return "C", true } +func hhExtLanguage(input []byte) (string, bool) { + if substring.BytesRegexp(`^<\?(?:hh)?`).Match(input) { + return "Hack", true + } else if cPlusPlusMatcher.Match(input) { + return "C++", true + } + + return OtherLanguage, false +} + func msExtLanguage(input []byte) (string, bool) { if substring.BytesRegexp(`[.'][a-z][a-z](\s|$)`).Match(input) { return "Groff", true @@ -179,6 +208,16 @@ func mExtLanguage(input []byte) (string, bool) { return OtherLanguage, false } +func mdExtLanguage(input []byte) (string, bool) { + if substring.BytesRegexp(`\n[-a-z0-9=#!\*\[|]`).Match(input) { + return "Markdown", true + } else if substring.BytesRegexp(`\n(;;|\(define_)`).Match(input) { + return "GCC Machine Description", true + } + + return OtherLanguage, false +} + var ( prologMatcher = substring.BytesRegexp(`^[^#]+:-`) perlMatcher = substring.BytesRegexp(`use strict|use\s+v?5\.`) @@ -205,6 +244,16 @@ func pmExtLanguage(input []byte) (string, bool) { return "Perl", false } +func rsExtLanguage(input []byte) (string, bool) { + if substring.BytesRegexp(`(use |fn |mod |pub |macro_rules|impl|#!?\[)`).Match(input) { + return "Rust", true + } else if substring.BytesRegexp(`#include|#pragma\s+(rs|version)|__attribute__`).Match(input) { + return "RenderScript", true + } + + return OtherLanguage, false +} + func proExtLanguage(input []byte) (string, bool) { if prologMatcher.Match(input) { return "Prolog", true @@ -222,3 +271,13 @@ func tocExtLanguage(input []byte) (string, bool) { return OtherLanguage, false } + +func slsExtLanguage(input []byte) (string, bool) { + if substring.BytesRegexp("## |@no-lib-strip@").Match(input) { + return "World of Warcraft Addon Data", true + } else if substring.BytesRegexp("(contentsline|defcounter|beamer|boolfalse)").Match(input) { + return "TeX", true + } + + return OtherLanguage, false +} diff --git a/content_test.go b/content_test.go index 65bc02b..fb86275 100644 --- a/content_test.go +++ b/content_test.go @@ -1,10 +1,12 @@ package slinguist import ( + "fmt" "io/ioutil" "os" "path" "path/filepath" + "text/tabwriter" . "gopkg.in/check.v1" ) @@ -28,6 +30,7 @@ func (s *TSuite) TestGetLanguageByContentH(c *C) { s.testGetLanguageByContent(c, "Prolog") s.testGetLanguageByContent(c, "Perl") s.testGetLanguageByContent(c, "Perl6") + s.testGetLanguageByContent(c, "Hack") } func (s *TSuite) testGetLanguageByContent(c *C, expected string) { @@ -41,7 +44,6 @@ func (s *TSuite) testGetLanguageByContent(c *C, expected string) { } content, _ := ioutil.ReadFile(file) - obtained, _ := GetLanguageByContent(path.Base(file), content) if obtained == OtherLanguage { continue @@ -50,3 +52,51 @@ func (s *TSuite) testGetLanguageByContent(c *C, expected string) { c.Check(obtained, Equals, expected, Commentf(file)) } } + +func (s *TSuite) TestGetLanguageByContentLinguistCorpus(c *C) { + var total, failed, ok, other, unsafe int + + w := new(tabwriter.Writer) + w.Init(os.Stdout, 0, 8, 0, '\t', 0) + + filepath.Walk(".linguist/samples", func(path string, f os.FileInfo, err error) error { + if f.IsDir() { + if f.Name() == "filenames" { + return filepath.SkipDir + } + return nil + } + + total++ + expected := filepath.Base(filepath.Dir(path)) + filename := filepath.Base(path) + content, _ := ioutil.ReadFile(path) + + obtained, safe := GetLanguageByContent(filename, content) + if obtained == OtherLanguage { + other++ + } + + var status string + if expected == obtained { + status = "ok" + ok++ + } else { + status = "failed" + failed++ + if !safe { + unsafe++ + } + } + + fmt.Fprintf(w, "%s\t%s\t%s\t%v\t%s\n", filename, expected, obtained, safe, status) + + return nil + }) + + fmt.Fprintln(w) + w.Flush() + + fmt.Printf("total files: %d, ok: %d, failed: %d, unsafe: %d, other: %d\n", total, ok, failed, unsafe, other) + +}