From 645bdd73313eefea60a288bc3457dc62da772918 Mon Sep 17 00:00:00 2001 From: Manuel Carmona Date: Wed, 19 Apr 2017 10:22:46 +0200 Subject: [PATCH 1/6] added filenames_map.go generation languagesByFilename now is a map[string]string --- filename.go | 10 ++ filenames_map.go | 140 ++++++++++++++++++ .../code-generator/assets/filenames.go.tmpl | 11 ++ .../code-generator/generator/filenames.go | 50 +++++++ .../generator/generator_test.go | 91 ++++-------- internal/code-generator/generator/langs.go | 1 + .../generator/test_files/filenames.gold | 12 ++ .../test_files/filenames.test.go.tmpl | 11 ++ .../generator/test_files/filenames.test.yml | 13 ++ internal/code-generator/main.go | 6 + 10 files changed, 286 insertions(+), 59 deletions(-) create mode 100644 filename.go create mode 100644 filenames_map.go create mode 100644 internal/code-generator/assets/filenames.go.tmpl create mode 100644 internal/code-generator/generator/filenames.go create mode 100644 internal/code-generator/generator/test_files/filenames.gold create mode 100644 internal/code-generator/generator/test_files/filenames.test.go.tmpl create mode 100644 internal/code-generator/generator/test_files/filenames.test.yml diff --git a/filename.go b/filename.go new file mode 100644 index 0000000..49a88f4 --- /dev/null +++ b/filename.go @@ -0,0 +1,10 @@ +package slinguist + +func GetLanguageByFilename(filename string) (lang string, safe bool) { + lang, safe = languagesByFilename[filename] + if lang == "" { + lang = OtherLanguage + } + + return +} diff --git a/filenames_map.go b/filenames_map.go new file mode 100644 index 0000000..e198c80 --- /dev/null +++ b/filenames_map.go @@ -0,0 +1,140 @@ +package slinguist + +// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/simple-linguist.v1/internal/code-generator +// THIS FILE SHOULD NOT BE EDITED BY HAND +// Extracted from github/linguist commit: dae33dc2b20cddc85d1300435c3be7118a7115a9 + +var languagesByFilename = map[string]string{ + ".Rprofile": "R", + ".XCompose": "XCompose", + ".abbrev_defs": "Emacs Lisp", + ".arcconfig": "JSON", + ".babelrc": "JSON5", + ".bash_history": "Shell", + ".bash_logout": "Shell", + ".bash_profile": "Shell", + ".bashrc": "Shell", + ".clang-format": "YAML", + ".classpath": "XML", + ".emacs": "Emacs Lisp", + ".emacs.desktop": "Emacs Lisp", + ".factor-boot-rc": "Factor", + ".factor-rc": "Factor", + ".gclient": "Python", + ".gnus": "Emacs Lisp", + ".jshintrc": "JSON", + ".nvimrc": "Vim script", + ".php_cs": "PHP", + ".php_cs.dist": "PHP", + ".project": "XML", + ".pryrc": "Ruby", + ".spacemacs": "Emacs Lisp", + ".vimrc": "Vim script", + ".viper": "Emacs Lisp", + "APKBUILD": "Alpine Abuild", + "App.config": "XML", + "Appraisals": "Ruby", + "BSDmakefile": "Makefile", + "BUCK": "Python", + "BUILD": "Python", + "Berksfile": "Ruby", + "Brewfile": "Ruby", + "Buildfile": "Ruby", + "CMakeLists.txt": "CMake", + "COPYING": "Text", + "COPYRIGHT.regex": "Text", + "Cakefile": "CoffeeScript", + "Cask": "Emacs Lisp", + "Dangerfile": "Ruby", + "Deliverfile": "Ruby", + "Dockerfile": "Dockerfile", + "Emakefile": "Erlang", + "FONTLOG": "Text", + "Fakefile": "Fancy", + "Fastfile": "Ruby", + "GNUmakefile": "Makefile", + "Gemfile": "Ruby", + "Gemfile.lock": "Ruby", + "Guardfile": "Ruby", + "INSTALL": "Text", + "INSTALL.mysql": "Text", + "Jakefile": "JavaScript", + "Jarfile": "Ruby", + "Jenkinsfile": "Groovy", + "Kbuild": "Makefile", + "LICENSE": "Text", + "LICENSE.mysql": "Text", + "Makefile": "Makefile", + "Makefile.am": "Makefile", + "Makefile.boot": "Makefile", + "Makefile.frag": "Makefile", + "Makefile.in": "Makefile", + "Makefile.inc": "Makefile", + "Mavenfile": "Ruby", + "Modulefile": "Puppet", + "NEWS": "Text", + "Notebook": "Jupyter Notebook", + "NuGet.config": "XML", + "Nukefile": "Nu", + "PKGBUILD": "Shell", + "Phakefile": "PHP", + "Podfile": "Ruby", + "Project.ede": "Emacs Lisp", + "Puppetfile": "Ruby", + "README.1ST": "Text", + "README.me": "Text", + "README.mysql": "Text", + "ROOT": "Isabelle ROOT", + "Rexfile": "Perl6", + "SConscript": "Python", + "SConstruct": "Python", + "Settings.StyleCop": "XML", + "Slakefile": "LiveScript", + "Snakefile": "Python", + "Snapfile": "Ruby", + "Thorfile": "Ruby", + "Vagrantfile": "Ruby", + "WORKSPACE": "Python", + "Web.Debug.config": "XML", + "Web.Release.config": "XML", + "Web.config": "XML", + "XCompose": "XCompose", + "_emacs": "Emacs Lisp", + "_vimrc": "Vim script", + "abbrev_defs": "Emacs Lisp", + "ant.xml": "Ant Build System", + "build.xml": "Ant Build System", + "buildfile": "Ruby", + "click.me": "Text", + "composer.lock": "JSON", + "configure.ac": "M4Sugar", + "delete.me": "Text", + "descrip.mmk": "Module Management System", + "descrip.mms": "Module Management System", + "gradlew": "Shell", + "gvimrc": "Vim script", + "keep.me": "Text", + "ld.script": "Linker Script", + "makefile": "Makefile", + "makefile.sco": "Makefile", + "mcmod.info": "JSON", + "meson.build": "Meson", + "meson_options.txt": "Meson", + "mix.lock": "Elixir", + "mkfile": "Makefile", + "mmn": "Roff", + "mmt": "Roff", + "nginx.conf": "Nginx", + "nvimrc": "Vim script", + "packages.config": "XML", + "pom.xml": "Maven POM", + "read.me": "Text", + "rebar.config": "Erlang", + "rebar.config.lock": "Erlang", + "rebar.lock": "Erlang", + "riemann.config": "Clojure", + "test.me": "Text", + "vimrc": "Vim script", + "wscript": "Python", + "xcompose": "XCompose", +} diff --git a/internal/code-generator/assets/filenames.go.tmpl b/internal/code-generator/assets/filenames.go.tmpl new file mode 100644 index 0000000..f0a7952 --- /dev/null +++ b/internal/code-generator/assets/filenames.go.tmpl @@ -0,0 +1,11 @@ +package slinguist + +// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/simple-linguist.v1/internal/code-generator +// THIS FILE SHOULD NOT BE EDITED BY HAND +// Extracted from github/linguist commit: {{ getCommit }} + +var languagesByFilename = map[string]string{ + {{range $filename, $language := . -}} + "{{ $filename }}": {{- printf "%q" $language -}}, + {{end -}} +} diff --git a/internal/code-generator/generator/filenames.go b/internal/code-generator/generator/filenames.go new file mode 100644 index 0000000..17f8482 --- /dev/null +++ b/internal/code-generator/generator/filenames.go @@ -0,0 +1,50 @@ +package generator + +import ( + "bytes" + "io" + "text/template" + + yaml "gopkg.in/yaml.v2" +) + +// Filenames reads from buf and builds filenames_map.go file from filenamesTmplPath. +func Filenames(data []byte, filenamesTmplPath, filenamesTmplName, commit string) ([]byte, error) { + languages := make(map[string]*languageInfo) + if err := yaml.Unmarshal(data, &languages); err != nil { + return nil, err + } + + languagesByFilename := buildFilenameLanguageMap(languages) + + buf := &bytes.Buffer{} + if err := executeFilenamesTemplate(buf, languagesByFilename, filenamesTmplPath, filenamesTmplName, commit); err != nil { + return nil, err + } + + return buf.Bytes(), nil +} + +func buildFilenameLanguageMap(languages map[string]*languageInfo) map[string]string { + filenameLangMap := make(map[string]string) + for lang, langInfo := range languages { + for _, filename := range langInfo.Filenames { + filenameLangMap[filename] = lang + } + } + + return filenameLangMap +} + +func executeFilenamesTemplate(out io.Writer, languagesByFilename map[string]string, filenamesTmplPath, filenamesTmpl, commit string) error { + fmap := template.FuncMap{ + "getCommit": func() string { return commit }, + } + + t := template.Must(template.New(filenamesTmpl).Funcs(fmap).ParseFiles(filenamesTmplPath)) + if err := t.Execute(out, languagesByFilename); err != nil { + return err + } + + return nil +} diff --git a/internal/code-generator/generator/generator_test.go b/internal/code-generator/generator/generator_test.go index 84a0c8c..46e8fe1 100644 --- a/internal/code-generator/generator/generator_test.go +++ b/internal/code-generator/generator/generator_test.go @@ -47,130 +47,103 @@ const ( interpretersGold = "test_files/interpreters.gold" interpretersTestTmplPath = "test_files/interpreters.test.go.tmpl" interpretersTestTmplName = "interpreters.test.go.tmpl" + + // Filenames test + filenamesTestFile = "test_files/filenames.test.yml" + filenamesGold = "test_files/filenames.gold" + filenamesTestTmplPath = "test_files/filenames.test.go.tmpl" + filenamesTestTmplName = "filenames.test.go.tmpl" ) func TestFromFile(t *testing.T) { - goldLang, err := ioutil.ReadFile(langGold) - assert.NoError(t, err) - - goldContent, err := ioutil.ReadFile(contentGold) - assert.NoError(t, err) - - goldVendor, err := ioutil.ReadFile(vendorGold) - assert.NoError(t, err) - - goldDocumentation, err := ioutil.ReadFile(documentationGold) - assert.NoError(t, err) - - goldTypes, err := ioutil.ReadFile(typesGold) - assert.NoError(t, err) - - goldInterpreters, err := ioutil.ReadFile(interpretersGold) - assert.NoError(t, err) - - outPathLang, err := ioutil.TempFile("/tmp", "generator-test-") - assert.NoError(t, err) - defer os.Remove(outPathLang.Name()) - - outPathContent, err := ioutil.TempFile("/tmp", "generator-test-") - assert.NoError(t, err) - defer os.Remove(outPathContent.Name()) - - outPathVendor, err := ioutil.TempFile("/tmp", "generator-test-") - assert.NoError(t, err) - defer os.Remove(outPathVendor.Name()) - - outPathDocumentation, err := ioutil.TempFile("/tmp", "generator-test-") - assert.NoError(t, err) - defer os.Remove(outPathDocumentation.Name()) - - outPathTypes, err := ioutil.TempFile("/tmp", "generator-test-") - assert.NoError(t, err) - defer os.Remove(outPathTypes.Name()) - - outPathInterpreters, err := ioutil.TempFile("/tmp", "generator-test-") - assert.NoError(t, err) - defer os.Remove(outPathInterpreters.Name()) - tests := []struct { name string fileToParse string - outPath string tmplPath string tmplName string commit string generate Func - wantOut []byte + wantOut string }{ { name: "TestFromFile_Language", fileToParse: ymlTestFile, - outPath: outPathLang.Name(), tmplPath: languagesTestTmplPath, tmplName: languagesTestTmplName, commit: commitTest, generate: Languages, - wantOut: goldLang, + wantOut: langGold, }, { name: "TestFromFile_Heuristics", fileToParse: heuristicsTestFile, - outPath: outPathContent.Name(), tmplPath: contentTestTmplPath, tmplName: contentTestTmplName, commit: commitTest, generate: Heuristics, - wantOut: goldContent, + wantOut: contentGold, }, { name: "TestFromFile_Vendor", fileToParse: vendorTestFile, - outPath: outPathVendor.Name(), tmplPath: vendorTestTmplPath, tmplName: vendorTestTmplName, commit: commitTest, generate: Vendor, - wantOut: goldVendor, + wantOut: vendorGold, }, { name: "TestFromFile_Documentation", fileToParse: documentationTestFile, - outPath: outPathDocumentation.Name(), tmplPath: documentationTestTmplPath, tmplName: documentationTestTmplName, commit: commitTest, generate: Documentation, - wantOut: goldDocumentation, + wantOut: documentationGold, }, { name: "TestFromFile_Types", fileToParse: typesTestFile, - outPath: outPathTypes.Name(), tmplPath: typesTestTmplPath, tmplName: typesTestTmplName, commit: commitTest, generate: Types, - wantOut: goldTypes, + wantOut: typesGold, }, { name: "TestFromFile_Interpreters", fileToParse: interpretersTestFile, - outPath: outPathInterpreters.Name(), tmplPath: interpretersTestTmplPath, tmplName: interpretersTestTmplName, commit: commitTest, generate: Interpreters, - wantOut: goldInterpreters, + wantOut: interpretersGold, + }, + { + name: "TestFromFile_Filenames", + fileToParse: filenamesTestFile, + tmplPath: filenamesTestTmplPath, + tmplName: filenamesTestTmplName, + commit: commitTest, + generate: Filenames, + wantOut: filenamesGold, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - err := FromFile(tt.fileToParse, tt.outPath, tt.tmplPath, tt.tmplName, tt.commit, tt.generate) + gold, err := ioutil.ReadFile(tt.wantOut) assert.NoError(t, err) - out, err := ioutil.ReadFile(tt.outPath) + + outPath, err := ioutil.TempFile("/tmp", "generator-test-") assert.NoError(t, err) - assert.EqualValues(t, tt.wantOut, out, fmt.Sprintf("FromFile() = %v, want %v", string(out), string(tt.wantOut))) + defer os.Remove(outPath.Name()) + + err = FromFile(tt.fileToParse, outPath.Name(), tt.tmplPath, tt.tmplName, tt.commit, tt.generate) + assert.NoError(t, err) + out, err := ioutil.ReadFile(outPath.Name()) + assert.NoError(t, err) + assert.EqualValues(t, gold, out, fmt.Sprintf("FromFile() = %v, want %v", string(out), string(tt.wantOut))) }) } } diff --git a/internal/code-generator/generator/langs.go b/internal/code-generator/generator/langs.go index 99ec8d5..f69e670 100644 --- a/internal/code-generator/generator/langs.go +++ b/internal/code-generator/generator/langs.go @@ -14,6 +14,7 @@ type languageInfo struct { Type string `yaml:"type,omitempty"` Extensions []string `yaml:"extensions,omitempty,flow"` Interpreters []string `yaml:"interpreters,omitempty,flow"` + Filenames []string `yaml:"filenames,omitempty,flow"` } // Languages reads from buf and builds languages.go file from languagesTmplPath. diff --git a/internal/code-generator/generator/test_files/filenames.gold b/internal/code-generator/generator/test_files/filenames.gold new file mode 100644 index 0000000..f3ac3b5 --- /dev/null +++ b/internal/code-generator/generator/test_files/filenames.gold @@ -0,0 +1,12 @@ +package slinguist + +// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/simple-linguist.v1/internal/code-generator +// THIS FILE SHOULD NOT BE EDITED BY HAND +// Extracted from github/linguist commit: fe8b44ab8a225b1ffa75b983b916ea22fee5b6f7 + +var languagesByFilename = map[string]string{ + "APKBUILD": "Alpine Abuild", + "CMakeLists.txt": "CMake", + "Cakefile": "CoffeeScript", + "mix.lock": "Elixir", +} diff --git a/internal/code-generator/generator/test_files/filenames.test.go.tmpl b/internal/code-generator/generator/test_files/filenames.test.go.tmpl new file mode 100644 index 0000000..f0a7952 --- /dev/null +++ b/internal/code-generator/generator/test_files/filenames.test.go.tmpl @@ -0,0 +1,11 @@ +package slinguist + +// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/simple-linguist.v1/internal/code-generator +// THIS FILE SHOULD NOT BE EDITED BY HAND +// Extracted from github/linguist commit: {{ getCommit }} + +var languagesByFilename = map[string]string{ + {{range $filename, $language := . -}} + "{{ $filename }}": {{- printf "%q" $language -}}, + {{end -}} +} diff --git a/internal/code-generator/generator/test_files/filenames.test.yml b/internal/code-generator/generator/test_files/filenames.test.yml new file mode 100644 index 0000000..30113db --- /dev/null +++ b/internal/code-generator/generator/test_files/filenames.test.yml @@ -0,0 +1,13 @@ +--- +Alpine Abuild: + filenames: + - APKBUILD +CMake: + filenames: + - CMakeLists.txt +CoffeeScript: + filenames: + - Cakefile +Elixir: + filenames: + - mix.lock diff --git a/internal/code-generator/main.go b/internal/code-generator/main.go index 8a060d0..5dd29d9 100644 --- a/internal/code-generator/main.go +++ b/internal/code-generator/main.go @@ -42,6 +42,11 @@ const ( interpretersTmplPath = "internal/code-generator/assets/interpreters.go.tmpl" interpretersTmpl = "interpreters.go.tmpl" + // filenames_map.go generation + filenamesFile = "filenames_map.go" + filenamesTmplPath = "internal/code-generator/assets/filenames.go.tmpl" + filenamesTmpl = "filenames.go.tmpl" + commitPath = ".git/refs/heads/master" ) @@ -67,6 +72,7 @@ func main() { &generatorArgs{documentationYAML, documentationFile, documentationTmplPath, documentationTmpl, commit, generator.Documentation}, &generatorArgs{languagesYAML, typeFile, typeTmplPath, typeTmpl, commit, generator.Types}, &generatorArgs{languagesYAML, interpretersFile, interpretersTmplPath, interpretersTmpl, commit, generator.Interpreters}, + &generatorArgs{languagesYAML, filenamesFile, filenamesTmplPath, filenamesTmpl, commit, generator.Filenames}, } for _, args := range argsList { From df60eab1ad1140453163a46d63a3a471aa7fdd04 Mon Sep 17 00:00:00 2001 From: Manuel Carmona Date: Wed, 19 Apr 2017 10:59:34 +0200 Subject: [PATCH 2/6] added language detection by filename strategy --- cli/slinguist/main.go | 2 +- common.go | 4 ++++ filename_test.go | 37 +++++++++++++++++++++++++++++++++++++ 3 files changed, 42 insertions(+), 1 deletion(-) create mode 100644 filename_test.go diff --git a/cli/slinguist/main.go b/cli/slinguist/main.go index b6c475d..019621e 100644 --- a/cli/slinguist/main.go +++ b/cli/slinguist/main.go @@ -48,7 +48,7 @@ func main() { return nil } - l := slinguist.GetLanguage(path, content) + l := slinguist.GetLanguage(filepath.Base(path), content) r, err := filepath.Rel(root, path) if err != nil { diff --git a/common.go b/common.go index 6c00e7e..8b15640 100644 --- a/common.go +++ b/common.go @@ -36,6 +36,10 @@ func GetLanguageExtensions(language string) []string { // GetLanguage return the Language for a given filename and file content. func GetLanguage(filename string, content []byte) string { + if lang, safe := GetLanguageByFilename(filename); safe { + return lang + } + if lang, safe := GetLanguageByShebang(content); safe { return lang } diff --git a/filename_test.go b/filename_test.go new file mode 100644 index 0000000..239e4e3 --- /dev/null +++ b/filename_test.go @@ -0,0 +1,37 @@ +package slinguist + +import . "gopkg.in/check.v1" + +func (s *TSuite) TestGetLanguageByFilename(c *C) { + lang, safe := GetLanguageByFilename(`unknown.interpreter`) + c.Assert(lang, Equals, OtherLanguage) + c.Assert(safe, Equals, false) + + lang, safe = GetLanguageByFilename(`.bashrc`) + c.Assert(lang, Equals, "Shell") + c.Assert(safe, Equals, true) + + lang, safe = GetLanguageByFilename(`Dockerfile`) + c.Assert(lang, Equals, "Dockerfile") + c.Assert(safe, Equals, true) + + lang, safe = GetLanguageByFilename(`Makefile.frag`) + c.Assert(lang, Equals, "Makefile") + c.Assert(safe, Equals, true) + + lang, safe = GetLanguageByFilename(`makefile`) + c.Assert(lang, Equals, "Makefile") + c.Assert(safe, Equals, true) + + lang, safe = GetLanguageByFilename(`Vagrantfile`) + c.Assert(lang, Equals, "Ruby") + c.Assert(safe, Equals, true) + + lang, safe = GetLanguageByFilename(`_vimrc`) + c.Assert(lang, Equals, "Vim script") + c.Assert(safe, Equals, true) + + lang, safe = GetLanguageByFilename(`pom.xml`) + c.Assert(lang, Equals, "Maven POM") + c.Assert(safe, Equals, true) +} From f63a25d794a0fc4661ca674df6f566b2ec662cec Mon Sep 17 00:00:00 2001 From: Manuel Carmona Date: Wed, 19 Apr 2017 11:24:01 +0200 Subject: [PATCH 3/6] all related to extension strategy renamed to reference it --- languages.go => extensions_map.go | 0 .../{languages.go.tmpl => extensions.go.tmpl} | 0 .../generator/{langs.go => extensions.go} | 30 ++++--------------- .../generator/generator_test.go | 18 +++++------ internal/code-generator/generator/langinfo.go | 20 +++++++++++++ .../{languages.gold => extensions.gold} | 0 ...s.test.go.tmpl => extensions.test.go.tmpl} | 0 ...languages.test.yml => extensions.test.yml} | 0 internal/code-generator/main.go | 14 +++++---- 9 files changed, 43 insertions(+), 39 deletions(-) rename languages.go => extensions_map.go (100%) rename internal/code-generator/assets/{languages.go.tmpl => extensions.go.tmpl} (100%) rename internal/code-generator/generator/{langs.go => extensions.go} (51%) create mode 100644 internal/code-generator/generator/langinfo.go rename internal/code-generator/generator/test_files/{languages.gold => extensions.gold} (100%) rename internal/code-generator/generator/test_files/{languages.test.go.tmpl => extensions.test.go.tmpl} (100%) rename internal/code-generator/generator/test_files/{languages.test.yml => extensions.test.yml} (100%) diff --git a/languages.go b/extensions_map.go similarity index 100% rename from languages.go rename to extensions_map.go diff --git a/internal/code-generator/assets/languages.go.tmpl b/internal/code-generator/assets/extensions.go.tmpl similarity index 100% rename from internal/code-generator/assets/languages.go.tmpl rename to internal/code-generator/assets/extensions.go.tmpl diff --git a/internal/code-generator/generator/langs.go b/internal/code-generator/generator/extensions.go similarity index 51% rename from internal/code-generator/generator/langs.go rename to internal/code-generator/generator/extensions.go index f69e670..3ea7f35 100644 --- a/internal/code-generator/generator/langs.go +++ b/internal/code-generator/generator/extensions.go @@ -3,22 +3,14 @@ package generator import ( "bytes" "io" - "sort" "strings" "text/template" - "gopkg.in/yaml.v2" + yaml "gopkg.in/yaml.v2" ) -type languageInfo struct { - Type string `yaml:"type,omitempty"` - Extensions []string `yaml:"extensions,omitempty,flow"` - Interpreters []string `yaml:"interpreters,omitempty,flow"` - Filenames []string `yaml:"filenames,omitempty,flow"` -} - -// Languages reads from buf and builds languages.go file from languagesTmplPath. -func Languages(data []byte, languagesTmplPath, languagesTmplName, commit string) ([]byte, error) { +// Extensions reads from buf and builds extensions_map.go file from extensionsTmplPath. +func Extensions(data []byte, extensionsTmplPath, extensionsTmplName, commit string) ([]byte, error) { languages := make(map[string]*languageInfo) if err := yaml.Unmarshal(data, &languages); err != nil { return nil, err @@ -28,23 +20,13 @@ func Languages(data []byte, languagesTmplPath, languagesTmplName, commit string) languagesByExtension := buildExtensionLanguageMap(languages, orderedKeyList) buf := &bytes.Buffer{} - if err := executeLanguagesTemplate(buf, languagesByExtension, languagesTmplPath, languagesTmplName, commit); err != nil { + if err := executeExtensionsTemplate(buf, languagesByExtension, extensionsTmplPath, extensionsTmplName, commit); err != nil { return nil, err } return buf.Bytes(), nil } -func getAlphabeticalOrderedKeys(languages map[string]*languageInfo) []string { - keyList := make([]string, 0) - for lang := range languages { - keyList = append(keyList, lang) - } - - sort.Strings(keyList) - return keyList -} - func buildExtensionLanguageMap(languages map[string]*languageInfo, orderedKeyList []string) map[string][]string { extensionLangsMap := make(map[string][]string) for _, lang := range orderedKeyList { @@ -57,13 +39,13 @@ func buildExtensionLanguageMap(languages map[string]*languageInfo, orderedKeyLis return extensionLangsMap } -func executeLanguagesTemplate(out io.Writer, languagesByExtension map[string][]string, languagesTmplPath, languagesTmpl, commit string) error { +func executeExtensionsTemplate(out io.Writer, languagesByExtension map[string][]string, extensionsTmplPath, extensionsTmpl, commit string) error { fmap := template.FuncMap{ "getCommit": func() string { return commit }, "formatStringSlice": func(slice []string) string { return `"` + strings.Join(slice, `","`) + `"` }, } - t := template.Must(template.New(languagesTmpl).Funcs(fmap).ParseFiles(languagesTmplPath)) + t := template.Must(template.New(extensionsTmpl).Funcs(fmap).ParseFiles(extensionsTmplPath)) if err := t.Execute(out, languagesByExtension); err != nil { return err } diff --git a/internal/code-generator/generator/generator_test.go b/internal/code-generator/generator/generator_test.go index 46e8fe1..0487cca 100644 --- a/internal/code-generator/generator/generator_test.go +++ b/internal/code-generator/generator/generator_test.go @@ -13,10 +13,10 @@ const ( commitTest = "fe8b44ab8a225b1ffa75b983b916ea22fee5b6f7" // Languages test - ymlTestFile = "test_files/languages.test.yml" - langGold = "test_files/languages.gold" - languagesTestTmplPath = "test_files/languages.test.go.tmpl" - languagesTestTmplName = "languages.test.go.tmpl" + extensionsTestFile = "test_files/extensions.test.yml" + extensionsGold = "test_files/extensions.gold" + extensionsTestTmplPath = "test_files/extensions.test.go.tmpl" + extensionsTestTmplName = "extensions.test.go.tmpl" // Heuristics test heuristicsTestFile = "test_files/heuristics.test.rb" @@ -67,12 +67,12 @@ func TestFromFile(t *testing.T) { }{ { name: "TestFromFile_Language", - fileToParse: ymlTestFile, - tmplPath: languagesTestTmplPath, - tmplName: languagesTestTmplName, + fileToParse: extensionsTestFile, + tmplPath: extensionsTestTmplPath, + tmplName: extensionsTestTmplName, commit: commitTest, - generate: Languages, - wantOut: langGold, + generate: Extensions, + wantOut: extensionsGold, }, { name: "TestFromFile_Heuristics", diff --git a/internal/code-generator/generator/langinfo.go b/internal/code-generator/generator/langinfo.go new file mode 100644 index 0000000..e61d335 --- /dev/null +++ b/internal/code-generator/generator/langinfo.go @@ -0,0 +1,20 @@ +package generator + +import "sort" + +type languageInfo struct { + Type string `yaml:"type,omitempty"` + Extensions []string `yaml:"extensions,omitempty,flow"` + Interpreters []string `yaml:"interpreters,omitempty,flow"` + Filenames []string `yaml:"filenames,omitempty,flow"` +} + +func getAlphabeticalOrderedKeys(languages map[string]*languageInfo) []string { + keyList := make([]string, 0) + for lang := range languages { + keyList = append(keyList, lang) + } + + sort.Strings(keyList) + return keyList +} diff --git a/internal/code-generator/generator/test_files/languages.gold b/internal/code-generator/generator/test_files/extensions.gold similarity index 100% rename from internal/code-generator/generator/test_files/languages.gold rename to internal/code-generator/generator/test_files/extensions.gold diff --git a/internal/code-generator/generator/test_files/languages.test.go.tmpl b/internal/code-generator/generator/test_files/extensions.test.go.tmpl similarity index 100% rename from internal/code-generator/generator/test_files/languages.test.go.tmpl rename to internal/code-generator/generator/test_files/extensions.test.go.tmpl diff --git a/internal/code-generator/generator/test_files/languages.test.yml b/internal/code-generator/generator/test_files/extensions.test.yml similarity index 100% rename from internal/code-generator/generator/test_files/languages.test.yml rename to internal/code-generator/generator/test_files/extensions.test.yml diff --git a/internal/code-generator/main.go b/internal/code-generator/main.go index 5dd29d9..59a784d 100644 --- a/internal/code-generator/main.go +++ b/internal/code-generator/main.go @@ -8,11 +8,13 @@ import ( ) const ( - // languages.go generation - languagesYAML = ".linguist/lib/linguist/languages.yml" - langFile = "languages.go" - languagesTmplPath = "internal/code-generator/assets/languages.go.tmpl" - languagesTmpl = "languages.go.tmpl" + // languages info file + languagesYAML = ".linguist/lib/linguist/languages.yml" + + // extensions_map.go generation + extensionsFile = "extensions_map.go" + extensionsTmplPath = "internal/code-generator/assets/extensions.go.tmpl" + extensionsTmpl = "extensions.go.tmpl" // content.go generation heuristicsRuby = ".linguist/lib/linguist/heuristics.rb" @@ -66,7 +68,7 @@ func main() { } argsList := []*generatorArgs{ - &generatorArgs{languagesYAML, langFile, languagesTmplPath, languagesTmpl, commit, generator.Languages}, + &generatorArgs{languagesYAML, extensionsFile, extensionsTmplPath, extensionsTmpl, commit, generator.Extensions}, &generatorArgs{heuristicsRuby, contentFile, contentTmplPath, contentTmpl, commit, generator.Heuristics}, &generatorArgs{vendorYAML, vendorFile, vendorTmplPath, vendorTmpl, commit, generator.Vendor}, &generatorArgs{documentationYAML, documentationFile, documentationTmplPath, documentationTmpl, commit, generator.Documentation}, From 7b7157e061b305fe324364741e514f591f9a3cfe Mon Sep 17 00:00:00 2001 From: Manuel Carmona Date: Wed, 19 Apr 2017 11:48:01 +0200 Subject: [PATCH 4/6] shebang #! now is a constant --- shebang.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/shebang.go b/shebang.go index b13dc26..664def6 100644 --- a/shebang.go +++ b/shebang.go @@ -7,6 +7,8 @@ import ( "strings" ) +const shebang = `#!` + var ( shebangExecHack = regexp.MustCompile(`exec (\w+).+\$0.+\$@`) pythonVersion = regexp.MustCompile(`python\d\.\d+`) @@ -68,7 +70,7 @@ func getFirstLine(data []byte) []byte { } func hasShebang(line []byte) bool { - shebang := []byte{'#', '!'} + shebang := []byte(shebang) return bytes.HasPrefix(line, shebang) } From 63d4d9bf2420fcacee79028214a445bbd39523b6 Mon Sep 17 00:00:00 2001 From: Manuel Carmona Date: Thu, 20 Apr 2017 08:12:56 +0200 Subject: [PATCH 5/6] removed templates from test_files directory to use templates from assets directory in tests --- .../code-generator/generator/documentation.go | 2 +- .../generator/generator_test.go | 28 +++++----- .../generator/test_files/content.test.go.tmpl | 51 ------------------- .../test_files/documentation.test.go.tmpl | 13 ----- .../test_files/extensions.test.go.tmpl | 11 ---- .../test_files/filenames.test.go.tmpl | 11 ---- .../test_files/interpreters.test.go.tmpl | 11 ---- .../generator/test_files/vendor.test.go.tmpl | 13 ----- 8 files changed, 15 insertions(+), 125 deletions(-) delete mode 100644 internal/code-generator/generator/test_files/content.test.go.tmpl delete mode 100644 internal/code-generator/generator/test_files/documentation.test.go.tmpl delete mode 100644 internal/code-generator/generator/test_files/extensions.test.go.tmpl delete mode 100644 internal/code-generator/generator/test_files/filenames.test.go.tmpl delete mode 100644 internal/code-generator/generator/test_files/interpreters.test.go.tmpl delete mode 100644 internal/code-generator/generator/test_files/vendor.test.go.tmpl diff --git a/internal/code-generator/generator/documentation.go b/internal/code-generator/generator/documentation.go index b5a4bd9..9d87ac2 100644 --- a/internal/code-generator/generator/documentation.go +++ b/internal/code-generator/generator/documentation.go @@ -16,7 +16,7 @@ func Documentation(data []byte, documentationTmplPath, documentationTmplName, co } buf := &bytes.Buffer{} - if err := executeVendorTemplate(buf, regexpList, documentationTmplPath, documentationTmplName, commit); err != nil { + if err := executeDocumentationTemplate(buf, regexpList, documentationTmplPath, documentationTmplName, commit); err != nil { return nil, err } diff --git a/internal/code-generator/generator/generator_test.go b/internal/code-generator/generator/generator_test.go index 0487cca..93af6e1 100644 --- a/internal/code-generator/generator/generator_test.go +++ b/internal/code-generator/generator/generator_test.go @@ -15,44 +15,44 @@ const ( // Languages test extensionsTestFile = "test_files/extensions.test.yml" extensionsGold = "test_files/extensions.gold" - extensionsTestTmplPath = "test_files/extensions.test.go.tmpl" - extensionsTestTmplName = "extensions.test.go.tmpl" + extensionsTestTmplPath = "../assets/extensions.go.tmpl" + extensionsTestTmplName = "extensions.go.tmpl" // Heuristics test heuristicsTestFile = "test_files/heuristics.test.rb" contentGold = "test_files/content.gold" - contentTestTmplPath = "test_files/content.test.go.tmpl" - contentTestTmplName = "content.test.go.tmpl" + contentTestTmplPath = "../assets/content.go.tmpl" + contentTestTmplName = "content.go.tmpl" // Vendor test vendorTestFile = "test_files/vendor.test.yml" vendorGold = "test_files/vendor.gold" - vendorTestTmplPath = "test_files/vendor.test.go.tmpl" - vendorTestTmplName = "vendor.test.go.tmpl" + vendorTestTmplPath = "../assets/vendor.go.tmpl" + vendorTestTmplName = "vendor.go.tmpl" // Documentation test documentationTestFile = "test_files/documentation.test.yml" documentationGold = "test_files/documentation.gold" - documentationTestTmplPath = "test_files/documentation.test.go.tmpl" - documentationTestTmplName = "documentation.test.go.tmpl" + documentationTestTmplPath = "../assets/documentation.go.tmpl" + documentationTestTmplName = "documentation.go.tmpl" // Types test typesTestFile = "test_files/type.test.yml" typesGold = "test_files/type.gold" - typesTestTmplPath = "test_files/type.test.go.tmpl" - typesTestTmplName = "type.test.go.tmpl" + typesTestTmplPath = "../assets/type.go.tmpl" + typesTestTmplName = "type.go.tmpl" // Interpreters test interpretersTestFile = "test_files/interpreters.test.yml" interpretersGold = "test_files/interpreters.gold" - interpretersTestTmplPath = "test_files/interpreters.test.go.tmpl" - interpretersTestTmplName = "interpreters.test.go.tmpl" + interpretersTestTmplPath = "../assets/interpreters.go.tmpl" + interpretersTestTmplName = "interpreters.go.tmpl" // Filenames test filenamesTestFile = "test_files/filenames.test.yml" filenamesGold = "test_files/filenames.gold" - filenamesTestTmplPath = "test_files/filenames.test.go.tmpl" - filenamesTestTmplName = "filenames.test.go.tmpl" + filenamesTestTmplPath = "../assets/filenames.go.tmpl" + filenamesTestTmplName = "filenames.go.tmpl" ) func TestFromFile(t *testing.T) { diff --git a/internal/code-generator/generator/test_files/content.test.go.tmpl b/internal/code-generator/generator/test_files/content.test.go.tmpl deleted file mode 100644 index 332b3e7..0000000 --- a/internal/code-generator/generator/test_files/content.test.go.tmpl +++ /dev/null @@ -1,51 +0,0 @@ -package slinguist - -// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/simple-linguist.v1/internal/code-generator -// THIS FILE SHOULD NOT BE EDITED BY HAND -// Extracted from github/linguist commit: {{ getCommit }} - -import ( - "path/filepath" - "regexp" - "strings" -) - -func GetLanguageByContent(filename string, content []byte) (lang string, safe bool) { - ext := strings.ToLower(filepath.Ext(filename)) - if fnMatcher, ok := matchers[ext]; ok { - lang, safe = fnMatcher(content) - return - } - - return GetLanguageByExtension(filename) -} - -type languageMatcher func ([]byte) (string, bool) - -var matchers = map[string]languageMatcher{ - {{ range $index, $disambiguator := . -}} - {{ printf "%q" $disambiguator.Extension }}: func(i []byte) (string, bool) { - {{ range $i, $language := $disambiguator.Languages -}} - - {{- if not (avoidLanguage $language) }} - {{- if gt (len $language.Heuristics) 0 }} - {{- if gt $i 0 }} else {{ end -}} - if {{- range $j, $heuristic := $language.Heuristics }} {{ $heuristic.Name }}.Match(i) - {{- if lt $j (len $language.LogicRelations) }} {{index $language.LogicRelations $j}} {{- end -}} {{ end }} { - return {{ printf "%q" $language.Language }}, true - } - - {{- end -}} - {{- end -}} - {{- end}} - - return {{ returnLanguage $disambiguator.Languages }}, {{ safeLanguage $disambiguator.Languages }} - }, - {{ end -}} -} - -var ( - {{ range $index, $heuristic := getAllHeuristics . -}} - {{ $heuristic.Name }} = regexp.MustCompile(`{{ $heuristic.Regexp }}`) - {{ end -}} -) \ No newline at end of file diff --git a/internal/code-generator/generator/test_files/documentation.test.go.tmpl b/internal/code-generator/generator/test_files/documentation.test.go.tmpl deleted file mode 100644 index ef346f2..0000000 --- a/internal/code-generator/generator/test_files/documentation.test.go.tmpl +++ /dev/null @@ -1,13 +0,0 @@ -package slinguist - -// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/simple-linguist.v1/internal/code-generator -// THIS FILE SHOULD NOT BE EDITED BY HAND -// Extracted from github/linguist commit: {{ getCommit }} - -import "gopkg.in/toqueteos/substring.v1" - -var documentationMatchers = substring.Or( - {{range $regexp := . -}} - substring.Regexp(`{{ $regexp }}`), - {{end -}} -) diff --git a/internal/code-generator/generator/test_files/extensions.test.go.tmpl b/internal/code-generator/generator/test_files/extensions.test.go.tmpl deleted file mode 100644 index 70a9641..0000000 --- a/internal/code-generator/generator/test_files/extensions.test.go.tmpl +++ /dev/null @@ -1,11 +0,0 @@ -package slinguist - -// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/simple-linguist.v1/internal/code-generator -// THIS FILE SHOULD NOT BE EDITED BY HAND -// Extracted from github/linguist commit: {{ getCommit }} - -var languagesByExtension = map[string][]string{ - {{range $extension, $languages := . -}} - "{{ $extension }}": { {{- $languages | formatStringSlice -}} }, - {{end -}} -} diff --git a/internal/code-generator/generator/test_files/filenames.test.go.tmpl b/internal/code-generator/generator/test_files/filenames.test.go.tmpl deleted file mode 100644 index f0a7952..0000000 --- a/internal/code-generator/generator/test_files/filenames.test.go.tmpl +++ /dev/null @@ -1,11 +0,0 @@ -package slinguist - -// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/simple-linguist.v1/internal/code-generator -// THIS FILE SHOULD NOT BE EDITED BY HAND -// Extracted from github/linguist commit: {{ getCommit }} - -var languagesByFilename = map[string]string{ - {{range $filename, $language := . -}} - "{{ $filename }}": {{- printf "%q" $language -}}, - {{end -}} -} diff --git a/internal/code-generator/generator/test_files/interpreters.test.go.tmpl b/internal/code-generator/generator/test_files/interpreters.test.go.tmpl deleted file mode 100644 index 0ecb444..0000000 --- a/internal/code-generator/generator/test_files/interpreters.test.go.tmpl +++ /dev/null @@ -1,11 +0,0 @@ -package slinguist - -// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/simple-linguist.v1/internal/code-generator -// THIS FILE SHOULD NOT BE EDITED BY HAND -// Extracted from github/linguist commit: {{ getCommit }} - -var languagesByInterpreter = map[string][]string{ - {{range $interpreter, $languages := . -}} - "{{ $interpreter }}": { {{- $languages | formatStringSlice -}} }, - {{end -}} -} diff --git a/internal/code-generator/generator/test_files/vendor.test.go.tmpl b/internal/code-generator/generator/test_files/vendor.test.go.tmpl deleted file mode 100644 index 89a65be..0000000 --- a/internal/code-generator/generator/test_files/vendor.test.go.tmpl +++ /dev/null @@ -1,13 +0,0 @@ -package slinguist - -// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/simple-linguist.v1/internal/code-generator -// THIS FILE SHOULD NOT BE EDITED BY HAND -// Extracted from github/linguist commit: {{ getCommit }} - -import "gopkg.in/toqueteos/substring.v1" - -var vendorMatchers = substring.Or( - {{range $regexp := . -}} - substring.Regexp(`{{ $regexp }}`), - {{end -}} -) From 28dc452853f1cc842040d8f74c303a9c4a432d08 Mon Sep 17 00:00:00 2001 From: Manuel Carmona Date: Thu, 20 Apr 2017 08:29:12 +0200 Subject: [PATCH 6/6] added some corner cases to content.go generation tests --- .../code-generator/generator/heuristics.go | 4 +- .../generator/test_files/content.gold | 97 ++++++++++++++++--- .../generator/test_files/heuristics.test.rb | 47 +++++++++ 3 files changed, 134 insertions(+), 14 deletions(-) diff --git a/internal/code-generator/generator/heuristics.go b/internal/code-generator/generator/heuristics.go index 077b4b8..f0ddedf 100644 --- a/internal/code-generator/generator/heuristics.go +++ b/internal/code-generator/generator/heuristics.go @@ -299,7 +299,7 @@ func getHeuristics(line string) []*heuristic { } if reg != "" { - reg = convToValidRegexp(reg) + reg = convertToValidRegexp(reg) heuristics = append(heuristics, &heuristic{Regexp: reg}) } } @@ -327,7 +327,7 @@ func replaceRegexpVariables(reg string) string { return repl } -func convToValidRegexp(reg string) string { +func convertToValidRegexp(reg string) string { // example: `/^(\s*)(])`) + h_CPlusPlus_Matcher_0 = regexp.MustCompile(`(?m)^\s*#\s*include <(cstdint|string|vector|map|list|array|bitset|queue|stack|forward_list|unordered_map|unordered_set|(i|o|io)stream)>`) + h_CPlusPlus_Matcher_1 = regexp.MustCompile(`(?m)^\s*template\s*<`) + h_CPlusPlus_Matcher_2 = regexp.MustCompile(`(?m)^[ \t]*try`) + h_CPlusPlus_Matcher_3 = regexp.MustCompile(`(?m)^[ \t]*catch\s*\(`) + h_CPlusPlus_Matcher_4 = regexp.MustCompile(`(?m)^[ \t]*(class|(using[ \t]+)?namespace)\s+\w+`) + h_CPlusPlus_Matcher_5 = regexp.MustCompile(`(?m)^[ \t]*(private|public|protected):$`) + h_CPlusPlus_Matcher_6 = regexp.MustCompile(`(?m)std::\w+`) + lsp_CommonLisp_Matcher_0 = regexp.MustCompile(`(?mi)^\s*\((defun|in-package|defpackage) `) + lsp_NewLisp_Matcher_0 = regexp.MustCompile(`(?m)^\s*\(define `) + lisp_CommonLisp_Matcher_0 = regexp.MustCompile(`(?mi)^\s*\((defun|in-package|defpackage) `) + lisp_NewLisp_Matcher_0 = regexp.MustCompile(`(?m)^\s*\(define `) + md_Markdown_Matcher_0 = regexp.MustCompile(`(?mi)(^[-a-z0-9=#!\*\[|>])|<\/`) + md_Markdown_Matcher_1 = regexp.MustCompile(`(?m)^$`) + md_GCCmachinedescription_Matcher_0 = regexp.MustCompile(`(?m)^(;;|\(define_)`) + ms_Groff_Matcher_0 = regexp.MustCompile(`(?mi)^[.'][a-z][a-z](\s|$)`) + mod_XML_Matcher_0 = regexp.MustCompile(`(?m)/.match(data) || + /^\s*template\s*])|<\//i.match(data) || data.empty? + Language["Markdown"] + elsif /^(;;|\(define_)/.match(data) + Language["GCC machine description"] + else + Language["Markdown"] + end + end + disambiguate ".ms" do |data| if /^[.'][a-z][a-z](\s|$)/i.match(data) Language["Groff"] @@ -42,3 +81,11 @@ Language["IDL"] end end + + disambiguate ".rpy" do |data| + if /(^(import|from|class|def)\s)/m.match(data) + Language["Python"] + else + Language["Ren'Py"] + end + end