diff --git a/alias.go b/alias.go new file mode 100644 index 0000000..d355612 --- /dev/null +++ b/alias.go @@ -0,0 +1,15 @@ +package slinguist + +import "strings" + +// GetLanguageByAlias returns the language related to the given alias or Otherlanguage otherwise. +func GetLanguageByAlias(alias string) (lang string) { + a := strings.Split(alias, `,`)[0] + a = strings.ToLower(a) + lang, ok := languagesByAlias[a] + if !ok { + lang = OtherLanguage + } + + return +} diff --git a/alias_test.go b/alias_test.go new file mode 100644 index 0000000..9bdbb43 --- /dev/null +++ b/alias_test.go @@ -0,0 +1,26 @@ +package slinguist + +import . "gopkg.in/check.v1" + +func (s *TSuite) TestGetLanguageByAlias(c *C) { + tests := []struct { + alias string + expectedLang string + }{ + {alias: "BestLanguageEver", expectedLang: OtherLanguage}, + {alias: "aspx-vb", expectedLang: "ASP"}, + {alias: "C++", expectedLang: "C++"}, + {alias: "c++", expectedLang: "C++"}, + {alias: "objc", expectedLang: "Objective-C"}, + {alias: "golang", expectedLang: "Go"}, + {alias: "GOLANG", expectedLang: "Go"}, + {alias: "bsdmake", expectedLang: "Makefile"}, + {alias: "xhTmL", expectedLang: "HTML"}, + {alias: "python", expectedLang: "Python"}, + } + + for _, test := range tests { + lang := GetLanguageByAlias(test.alias) + c.Assert(lang, Equals, test.expectedLang) + } +} diff --git a/aliases_map.go b/aliases_map.go new file mode 100644 index 0000000..bea090c --- /dev/null +++ b/aliases_map.go @@ -0,0 +1,637 @@ +package slinguist + +// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/simple-linguist.v1/internal/code-generator +// THIS FILE SHOULD NOT BE EDITED BY HAND +// Extracted from github/linguist commit: dae33dc2b20cddc85d1300435c3be7118a7115a9 + +// languagesByAlias keeps alias for different languages and use the name of the languages as a alias too. All the +// keys (alias or not) are written in lower case and the whitespaces has been replaced by underscores. +var languagesByAlias = map[string]string{ + "1c_enterprise": "1C Enterprise", + "abap": "ABAP", + "abl": "OpenEdge ABL", + "abnf": "ABNF", + "abuild": "Alpine Abuild", + "aconf": "ApacheConf", + "actionscript": "ActionScript", + "actionscript3": "ActionScript", + "actionscript_3": "ActionScript", + "ada": "Ada", + "ada2005": "Ada", + "ada95": "Ada", + "advpl": "xBase", + "afdko": "OpenType Feature File", + "agda": "Agda", + "ags": "AGS Script", + "ags_script": "AGS Script", + "ahk": "AutoHotkey", + "alloy": "Alloy", + "alpine_abuild": "Alpine Abuild", + "ampl": "AMPL", + "ant_build_system": "Ant Build System", + "antlr": "ANTLR", + "apache": "ApacheConf", + "apacheconf": "ApacheConf", + "apex": "Apex", + "api_blueprint": "API Blueprint", + "apkbuild": "Alpine Abuild", + "apl": "APL", + "apollo_guidance_computer": "Apollo Guidance Computer", + "applescript": "AppleScript", + "arc": "Arc", + "arduino": "Arduino", + "arexx": "REXX", + "as3": "ActionScript", + "asciidoc": "AsciiDoc", + "asn.1": "ASN.1", + "asp": "ASP", + "aspectj": "AspectJ", + "aspx": "ASP", + "aspx-vb": "ASP", + "assembly": "Assembly", + "ats": "ATS", + "ats2": "ATS", + "au3": "AutoIt", + "augeas": "Augeas", + "autoconf": "M4Sugar", + "autohotkey": "AutoHotkey", + "autoit": "AutoIt", + "autoit3": "AutoIt", + "autoitscript": "AutoIt", + "awk": "Awk", + "b3d": "BlitzBasic", + "bash": "Shell", + "bash_session": "ShellSession", + "bat": "Batchfile", + "batch": "Batchfile", + "batchfile": "Batchfile", + "befunge": "Befunge", + "bison": "Bison", + "bitbake": "BitBake", + "blade": "Blade", + "blitz3d": "BlitzBasic", + "blitzbasic": "BlitzBasic", + "blitzmax": "BlitzMax", + "blitzplus": "BlitzBasic", + "bluespec": "Bluespec", + "bmax": "BlitzMax", + "boo": "Boo", + "bplus": "BlitzBasic", + "brainfuck": "Brainfuck", + "brightscript": "Brightscript", + "bro": "Bro", + "bsdmake": "Makefile", + "byond": "DM", + "c": "C", + "c#": "C#", + "c++": "C++", + "c++-objdump": "Cpp-ObjDump", + "c-objdump": "C-ObjDump", + "c2hs": "C2hs Haskell", + "c2hs_haskell": "C2hs Haskell", + "cap'n_proto": "Cap'n Proto", + "carto": "CartoCSS", + "cartocss": "CartoCSS", + "ceylon": "Ceylon", + "cfc": "ColdFusion CFC", + "cfm": "ColdFusion", + "cfml": "ColdFusion", + "chapel": "Chapel", + "charity": "Charity", + "chpl": "Chapel", + "chuck": "ChucK", + "cirru": "Cirru", + "clarion": "Clarion", + "clean": "Clean", + "click": "Click", + "clipper": "xBase", + "clips": "CLIPS", + "clojure": "Clojure", + "cmake": "CMake", + "cobol": "COBOL", + "coffee": "CoffeeScript", + "coffee-script": "CoffeeScript", + "coffeescript": "CoffeeScript", + "coldfusion": "ColdFusion", + "coldfusion_cfc": "ColdFusion CFC", + "coldfusion_html": "ColdFusion", + "collada": "COLLADA", + "common_lisp": "Common Lisp", + "component_pascal": "Component Pascal", + "console": "ShellSession", + "cool": "Cool", + "coq": "Coq", + "cpp": "C++", + "cpp-objdump": "Cpp-ObjDump", + "creole": "Creole", + "crystal": "Crystal", + "csharp": "C#", + "cson": "CSON", + "csound": "Csound", + "csound-csd": "Csound Document", + "csound-orc": "Csound", + "csound-sco": "Csound Score", + "csound_document": "Csound Document", + "csound_score": "Csound Score", + "css": "CSS", + "csv": "CSV", + "cucumber": "Gherkin", + "cuda": "Cuda", + "cycript": "Cycript", + "cython": "Cython", + "d": "D", + "d-objdump": "D-ObjDump", + "darcs_patch": "Darcs Patch", + "dart": "Dart", + "dcl": "DIGITAL Command Language", + "delphi": "Component Pascal", + "desktop": "desktop", + "diff": "Diff", + "digital_command_language": "DIGITAL Command Language", + "django": "HTML+Django", + "dm": "DM", + "dns_zone": "DNS Zone", + "dockerfile": "Dockerfile", + "dogescript": "Dogescript", + "dosbatch": "Batchfile", + "dosini": "INI", + "dpatch": "Darcs Patch", + "dtrace": "DTrace", + "dtrace-script": "DTrace", + "dylan": "Dylan", + "e": "E", + "eagle": "Eagle", + "ebnf": "EBNF", + "ec": "eC", + "ecere_projects": "Ecere Projects", + "ecl": "ECL", + "eclipse": "ECLiPSe", + "ecr": "HTML+ECR", + "edn": "edn", + "eex": "HTML+EEX", + "eiffel": "Eiffel", + "ejs": "EJS", + "elisp": "Emacs Lisp", + "elixir": "Elixir", + "elm": "Elm", + "emacs": "Emacs Lisp", + "emacs_lisp": "Emacs Lisp", + "emberscript": "EmberScript", + "eq": "EQ", + "erb": "HTML+ERB", + "erlang": "Erlang", + "f#": "F#", + "factor": "Factor", + "fancy": "Fancy", + "fantom": "Fantom", + "filebench_wml": "Filebench WML", + "filterscript": "Filterscript", + "fish": "fish", + "flex": "Lex", + "flux": "FLUX", + "formatted": "Formatted", + "forth": "Forth", + "fortran": "Fortran", + "foxpro": "xBase", + "freemarker": "FreeMarker", + "frege": "Frege", + "fsharp": "F#", + "ftl": "FreeMarker", + "fundamental": "Text", + "g-code": "G-code", + "game_maker_language": "Game Maker Language", + "gams": "GAMS", + "gap": "GAP", + "gcc_machine_description": "GCC Machine Description", + "gdb": "GDB", + "gdscript": "GDScript", + "genie": "Genie", + "genshi": "Genshi", + "gentoo_ebuild": "Gentoo Ebuild", + "gentoo_eclass": "Gentoo Eclass", + "gettext_catalog": "Gettext Catalog", + "gf": "Grammatical Framework", + "gherkin": "Gherkin", + "glsl": "GLSL", + "glyph": "Glyph", + "gn": "GN", + "gnuplot": "Gnuplot", + "go": "Go", + "golang": "Go", + "golo": "Golo", + "gosu": "Gosu", + "grace": "Grace", + "gradle": "Gradle", + "grammatical_framework": "Grammatical Framework", + "graph_modeling_language": "Graph Modeling Language", + "graphql": "GraphQL", + "graphviz_(dot)": "Graphviz (DOT)", + "groovy": "Groovy", + "groovy_server_pages": "Groovy Server Pages", + "gsp": "Groovy Server Pages", + "hack": "Hack", + "haml": "Haml", + "handlebars": "Handlebars", + "harbour": "Harbour", + "haskell": "Haskell", + "haxe": "Haxe", + "hbs": "Handlebars", + "hcl": "HCL", + "hlsl": "HLSL", + "html": "HTML", + "html+django": "HTML+Django", + "html+django/jinja": "HTML+Django", + "html+ecr": "HTML+ECR", + "html+eex": "HTML+EEX", + "html+erb": "HTML+ERB", + "html+jinja": "HTML+Django", + "html+php": "HTML+PHP", + "html+ruby": "RHTML", + "htmlbars": "Handlebars", + "htmldjango": "HTML+Django", + "http": "HTTP", + "hy": "Hy", + "hylang": "Hy", + "hyphy": "HyPhy", + "i7": "Inform 7", + "idl": "IDL", + "idris": "Idris", + "igor": "IGOR Pro", + "igor_pro": "IGOR Pro", + "igorpro": "IGOR Pro", + "inc": "PHP", + "inform7": "Inform 7", + "inform_7": "Inform 7", + "ini": "INI", + "inno_setup": "Inno Setup", + "io": "Io", + "ioke": "Ioke", + "ipython_notebook": "Jupyter Notebook", + "irc": "IRC log", + "irc_log": "IRC log", + "irc_logs": "IRC log", + "isabelle": "Isabelle", + "isabelle_root": "Isabelle ROOT", + "j": "J", + "jasmin": "Jasmin", + "java": "Java", + "java_server_page": "Groovy Server Pages", + "java_server_pages": "Java Server Pages", + "javascript": "JavaScript", + "jflex": "JFlex", + "jison": "Jison", + "jison_lex": "Jison Lex", + "jruby": "Ruby", + "js": "JavaScript", + "json": "JSON", + "json5": "JSON5", + "jsoniq": "JSONiq", + "jsonld": "JSONLD", + "jsp": "Java Server Pages", + "jsx": "JSX", + "julia": "Julia", + "jupyter_notebook": "Jupyter Notebook", + "kicad": "KiCad", + "kit": "Kit", + "kotlin": "Kotlin", + "krl": "KRL", + "labview": "LabVIEW", + "lasso": "Lasso", + "lassoscript": "Lasso", + "latex": "TeX", + "latte": "Latte", + "lean": "Lean", + "less": "Less", + "lex": "Lex", + "lfe": "LFE", + "lhaskell": "Literate Haskell", + "lhs": "Literate Haskell", + "lilypond": "LilyPond", + "limbo": "Limbo", + "linker_script": "Linker Script", + "linux_kernel_module": "Linux Kernel Module", + "liquid": "Liquid", + "lisp": "Common Lisp", + "litcoffee": "Literate CoffeeScript", + "literate_agda": "Literate Agda", + "literate_coffeescript": "Literate CoffeeScript", + "literate_haskell": "Literate Haskell", + "live-script": "LiveScript", + "livescript": "LiveScript", + "llvm": "LLVM", + "logos": "Logos", + "logtalk": "Logtalk", + "lolcode": "LOLCODE", + "lookml": "LookML", + "loomscript": "LoomScript", + "ls": "LiveScript", + "lsl": "LSL", + "lua": "Lua", + "m": "M", + "m4": "M4", + "m4sugar": "M4Sugar", + "macruby": "Ruby", + "make": "Makefile", + "makefile": "Makefile", + "mako": "Mako", + "markdown": "Markdown", + "marko": "Marko", + "markojs": "Marko", + "mask": "Mask", + "mathematica": "Mathematica", + "matlab": "Matlab", + "maven_pom": "Maven POM", + "max": "Max", + "max/msp": "Max", + "maxmsp": "Max", + "maxscript": "MAXScript", + "mediawiki": "MediaWiki", + "mercury": "Mercury", + "meson": "Meson", + "metal": "Metal", + "mf": "Makefile", + "minid": "MiniD", + "mirah": "Mirah", + "mma": "Mathematica", + "modelica": "Modelica", + "modula-2": "Modula-2", + "module_management_system": "Module Management System", + "monkey": "Monkey", + "moocode": "Moocode", + "moonscript": "MoonScript", + "mql4": "MQL4", + "mql5": "MQL5", + "mtml": "MTML", + "muf": "MUF", + "mumps": "M", + "mupad": "mupad", + "myghty": "Myghty", + "nasm": "Assembly", + "ncl": "NCL", + "nemerle": "Nemerle", + "nesc": "nesC", + "netlinx": "NetLinx", + "netlinx+erb": "NetLinx+ERB", + "netlogo": "NetLogo", + "newlisp": "NewLisp", + "nginx": "Nginx", + "nginx_configuration_file": "Nginx", + "nim": "Nim", + "ninja": "Ninja", + "nit": "Nit", + "nix": "Nix", + "nixos": "Nix", + "njk": "HTML+Django", + "nl": "NL", + "node": "JavaScript", + "nroff": "Roff", + "nsis": "NSIS", + "nu": "Nu", + "numpy": "NumPy", + "nunjucks": "HTML+Django", + "nush": "Nu", + "nvim": "Vim script", + "obj-c": "Objective-C", + "obj-c++": "Objective-C++", + "obj-j": "Objective-J", + "objc": "Objective-C", + "objc++": "Objective-C++", + "objdump": "ObjDump", + "objective-c": "Objective-C", + "objective-c++": "Objective-C++", + "objective-j": "Objective-J", + "objectivec": "Objective-C", + "objectivec++": "Objective-C++", + "objectivej": "Objective-J", + "objectpascal": "Component Pascal", + "objj": "Objective-J", + "ocaml": "OCaml", + "octave": "Matlab", + "omgrofl": "Omgrofl", + "ooc": "ooc", + "opa": "Opa", + "opal": "Opal", + "opencl": "OpenCL", + "openedge": "OpenEdge ABL", + "openedge_abl": "OpenEdge ABL", + "openrc": "OpenRC runscript", + "openrc_runscript": "OpenRC runscript", + "openscad": "OpenSCAD", + "opentype_feature_file": "OpenType Feature File", + "org": "Org", + "osascript": "AppleScript", + "ox": "Ox", + "oxygene": "Oxygene", + "oz": "Oz", + "p4": "P4", + "pan": "Pan", + "pandoc": "Markdown", + "papyrus": "Papyrus", + "parrot": "Parrot", + "parrot_assembly": "Parrot Assembly", + "parrot_internal_representation": "Parrot Internal Representation", + "pascal": "Pascal", + "pasm": "Parrot Assembly", + "pawn": "PAWN", + "perl": "Perl", + "perl6": "Perl6", + "php": "PHP", + "pic": "Pic", + "pickle": "Pickle", + "picolisp": "PicoLisp", + "piglatin": "PigLatin", + "pike": "Pike", + "pir": "Parrot Internal Representation", + "plpgsql": "PLpgSQL", + "plsql": "PLSQL", + "pod": "Pod", + "pogoscript": "PogoScript", + "pony": "Pony", + "posh": "PowerShell", + "postscr": "PostScript", + "postscript": "PostScript", + "pot": "Gettext Catalog", + "pov-ray": "POV-Ray SDL", + "pov-ray_sdl": "POV-Ray SDL", + "povray": "POV-Ray SDL", + "powerbuilder": "PowerBuilder", + "powershell": "PowerShell", + "processing": "Processing", + "progress": "OpenEdge ABL", + "prolog": "Prolog", + "propeller_spin": "Propeller Spin", + "protobuf": "Protocol Buffer", + "protocol_buffer": "Protocol Buffer", + "protocol_buffers": "Protocol Buffer", + "public_key": "Public Key", + "pug": "Pug", + "puppet": "Puppet", + "pure_data": "Pure Data", + "purebasic": "PureBasic", + "purescript": "PureScript", + "pycon": "Python console", + "pyrex": "Cython", + "python": "Python", + "python_console": "Python console", + "python_traceback": "Python traceback", + "qmake": "QMake", + "qml": "QML", + "r": "R", + "racket": "Racket", + "ragel": "Ragel", + "ragel-rb": "Ragel", + "ragel-ruby": "Ragel", + "rake": "Ruby", + "raml": "RAML", + "rascal": "Rascal", + "raw": "Raw token data", + "raw_token_data": "Raw token data", + "rb": "Ruby", + "rbx": "Ruby", + "rdoc": "RDoc", + "realbasic": "REALbasic", + "reason": "Reason", + "rebol": "Rebol", + "red": "Red", + "red/system": "Red", + "redcode": "Redcode", + "regex": "Regular Expression", + "regexp": "Regular Expression", + "regular_expression": "Regular Expression", + "ren'py": "Ren'Py", + "renderscript": "RenderScript", + "renpy": "Ren'Py", + "restructuredtext": "reStructuredText", + "rexx": "REXX", + "rhtml": "RHTML", + "rmarkdown": "RMarkdown", + "robotframework": "RobotFramework", + "roff": "Roff", + "rouge": "Rouge", + "rpm_spec": "RPM Spec", + "rscript": "R", + "rss": "XML", + "rst": "reStructuredText", + "ruby": "Ruby", + "runoff": "RUNOFF", + "rust": "Rust", + "rusthon": "Python", + "sage": "Sage", + "salt": "SaltStack", + "saltstack": "SaltStack", + "saltstate": "SaltStack", + "sas": "SAS", + "sass": "Sass", + "scala": "Scala", + "scaml": "Scaml", + "scheme": "Scheme", + "scilab": "Scilab", + "scss": "SCSS", + "self": "Self", + "sh": "Shell", + "shell": "Shell", + "shell-script": "Shell", + "shellsession": "ShellSession", + "shen": "Shen", + "slash": "Slash", + "slim": "Slim", + "smali": "Smali", + "smalltalk": "Smalltalk", + "smarty": "Smarty", + "sml": "Standard ML", + "smt": "SMT", + "sourcemod": "SourcePawn", + "sourcepawn": "SourcePawn", + "sparql": "SPARQL", + "specfile": "RPM Spec", + "spline_font_database": "Spline Font Database", + "splus": "R", + "sqf": "SQF", + "sql": "SQL", + "sqlpl": "SQLPL", + "squeak": "Smalltalk", + "squirrel": "Squirrel", + "srecode_template": "SRecode Template", + "stan": "Stan", + "standard_ml": "Standard ML", + "stata": "Stata", + "ston": "STON", + "stylus": "Stylus", + "sublime_text_config": "Sublime Text Config", + "subrip_text": "SubRip Text", + "supercollider": "SuperCollider", + "svg": "SVG", + "swift": "Swift", + "systemverilog": "SystemVerilog", + "tcl": "Tcl", + "tcsh": "Tcsh", + "tea": "Tea", + "terra": "Terra", + "tex": "TeX", + "text": "Text", + "textile": "Textile", + "thrift": "Thrift", + "ti_program": "TI Program", + "tla": "TLA", + "toml": "TOML", + "ts": "TypeScript", + "turing": "Turing", + "turtle": "Turtle", + "twig": "Twig", + "txl": "TXL", + "typescript": "TypeScript", + "udiff": "Diff", + "unified_parallel_c": "Unified Parallel C", + "unity3d_asset": "Unity3D Asset", + "unix_assembly": "Unix Assembly", + "uno": "Uno", + "unrealscript": "UnrealScript", + "ur": "UrWeb", + "ur/web": "UrWeb", + "urweb": "UrWeb", + "vala": "Vala", + "vb.net": "Visual Basic", + "vbnet": "Visual Basic", + "vcl": "VCL", + "verilog": "Verilog", + "vhdl": "VHDL", + "vim": "Vim script", + "vim_script": "Vim script", + "viml": "Vim script", + "visual_basic": "Visual Basic", + "volt": "Volt", + "vue": "Vue", + "wavefront_material": "Wavefront Material", + "wavefront_object": "Wavefront Object", + "web_ontology_language": "Web Ontology Language", + "webidl": "WebIDL", + "winbatch": "Batchfile", + "wisp": "wisp", + "world_of_warcraft_addon_data": "World of Warcraft Addon Data", + "wsdl": "XML", + "x10": "X10", + "xbase": "xBase", + "xc": "XC", + "xcompose": "XCompose", + "xhtml": "HTML", + "xml": "XML", + "xml+genshi": "Genshi", + "xml+kid": "Genshi", + "xojo": "Xojo", + "xpages": "XPages", + "xproc": "XProc", + "xquery": "XQuery", + "xs": "XS", + "xsd": "XML", + "xsl": "XSLT", + "xslt": "XSLT", + "xten": "X10", + "xtend": "Xtend", + "yacc": "Yacc", + "yaml": "YAML", + "yang": "YANG", + "yml": "YAML", + "zephir": "Zephir", + "zimpl": "Zimpl", + "zsh": "Shell", +} diff --git a/common.go b/common.go index 8b15640..5935ad3 100644 --- a/common.go +++ b/common.go @@ -36,6 +36,10 @@ func GetLanguageExtensions(language string) []string { // GetLanguage return the Language for a given filename and file content. func GetLanguage(filename string, content []byte) string { + if lang, safe := GetLanguageByModeline(content); safe { + return lang + } + if lang, safe := GetLanguageByFilename(filename); safe { return lang } diff --git a/content.go b/content.go index 4c33972..2cef019 100644 --- a/content.go +++ b/content.go @@ -14,10 +14,11 @@ func GetLanguageByContent(filename string, content []byte) (lang string, safe bo ext := strings.ToLower(filepath.Ext(filename)) if fnMatcher, ok := matchers[ext]; ok { lang, safe = fnMatcher(content) - return + } else { + lang = OtherLanguage } - return GetLanguageByExtension(filename) + return } type languageMatcher func([]byte) (string, bool) diff --git a/extension.go b/extension.go index da97414..d3e7484 100644 --- a/extension.go +++ b/extension.go @@ -7,7 +7,6 @@ import ( func GetLanguageByExtension(filename string) (lang string, safe bool) { ext := strings.ToLower(filepath.Ext(filename)) - lang = OtherLanguage langs, ok := languagesByExtension[ext] if !ok { @@ -15,9 +14,6 @@ func GetLanguageByExtension(filename string) (lang string, safe bool) { } lang = langs[0] - if len(langs) == 1 { - safe = true - } - + safe = len(langs) == 1 return } diff --git a/internal/code-generator/assets/aliases.go.tmpl b/internal/code-generator/assets/aliases.go.tmpl new file mode 100644 index 0000000..88e5fc7 --- /dev/null +++ b/internal/code-generator/assets/aliases.go.tmpl @@ -0,0 +1,13 @@ +package slinguist + +// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/simple-linguist.v1/internal/code-generator +// THIS FILE SHOULD NOT BE EDITED BY HAND +// Extracted from github/linguist commit: {{ getCommit }} + +// languagesByAlias keeps alias for different languages and use the name of the languages as a alias too. All the +// keys (alias or not) are written in lower case and the whitespaces has been replaced by underscores. +var languagesByAlias = map[string]string{ + {{range $alias, $language := . -}} + "{{ $alias }}": {{ printf "%q" $language -}}, + {{end -}} +} diff --git a/internal/code-generator/assets/content.go.tmpl b/internal/code-generator/assets/content.go.tmpl index 332b3e7..c7a54c5 100644 --- a/internal/code-generator/assets/content.go.tmpl +++ b/internal/code-generator/assets/content.go.tmpl @@ -14,10 +14,11 @@ func GetLanguageByContent(filename string, content []byte) (lang string, safe bo ext := strings.ToLower(filepath.Ext(filename)) if fnMatcher, ok := matchers[ext]; ok { lang, safe = fnMatcher(content) - return + } else { + lang = OtherLanguage } - return GetLanguageByExtension(filename) + return } type languageMatcher func ([]byte) (string, bool) diff --git a/internal/code-generator/assets/type.go.tmpl b/internal/code-generator/assets/type.go.tmpl deleted file mode 100644 index de7592b..0000000 --- a/internal/code-generator/assets/type.go.tmpl +++ /dev/null @@ -1,28 +0,0 @@ -package slinguist - -// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/simple-linguist.v1/internal/code-generator -// THIS FILE SHOULD NOT BE EDITED BY HAND -// Extracted from github/linguist commit: {{ getCommit }} - -type Type int - -const ( - // Language's type. Either data, programming, markup, prose, or unknown. - Unknown Type = iota - Data - Programming - Markup - Prose -) - -// GetLanguageType returns the given language's type. -func GetLanguageType(language string) (langType Type) { - langType, _ = languagesType[language] - return langType -} - -var languagesType = map[string]Type{ - {{range $language, $type := . -}} - "{{ $language }}": {{ $type -}}, - {{end -}} -} diff --git a/internal/code-generator/generator/test_files/languages.test.tmpl b/internal/code-generator/assets/types.go.tmpl similarity index 59% rename from internal/code-generator/generator/test_files/languages.test.tmpl rename to internal/code-generator/assets/types.go.tmpl index 70a9641..c47d0e9 100644 --- a/internal/code-generator/generator/test_files/languages.test.tmpl +++ b/internal/code-generator/assets/types.go.tmpl @@ -4,8 +4,8 @@ package slinguist // THIS FILE SHOULD NOT BE EDITED BY HAND // Extracted from github/linguist commit: {{ getCommit }} -var languagesByExtension = map[string][]string{ - {{range $extension, $languages := . -}} - "{{ $extension }}": { {{- $languages | formatStringSlice -}} }, +var languagesType = map[string]Type{ + {{range $language, $type := . -}} + "{{ $language }}": {{ $type -}}, {{end -}} } diff --git a/internal/code-generator/generator/aliases.go b/internal/code-generator/generator/aliases.go new file mode 100644 index 0000000..f0519ce --- /dev/null +++ b/internal/code-generator/generator/aliases.go @@ -0,0 +1,63 @@ +package generator + +import ( + "bytes" + "io" + "text/template" + + "strings" + + yaml "gopkg.in/yaml.v2" +) + +// Aliases reads from buf and builds aliases_map.go file from aliasesTmplPath. +func Aliases(data []byte, aliasesTmplPath, aliasesTmplName, commit string) ([]byte, error) { + languages := make(map[string]*languageInfo) + if err := yaml.Unmarshal(data, &languages); err != nil { + return nil, err + } + + orderedLangList := getAlphabeticalOrderedKeys(languages) + languagesByAlias := buildAliasLanguageMap(languages, orderedLangList) + + buf := &bytes.Buffer{} + if err := executeAliasesTemplate(buf, languagesByAlias, aliasesTmplPath, aliasesTmplName, commit); err != nil { + return nil, err + } + + return buf.Bytes(), nil +} + +func buildAliasLanguageMap(languages map[string]*languageInfo, orderedLangList []string) map[string]string { + aliasLangsMap := make(map[string]string) + for _, lang := range orderedLangList { + langInfo := languages[lang] + key := convertToAliasKey(lang) + aliasLangsMap[key] = lang + for _, alias := range langInfo.Aliases { + key := convertToAliasKey(alias) + aliasLangsMap[key] = lang + } + } + + return aliasLangsMap +} + +func convertToAliasKey(s string) (key string) { + key = strings.Replace(s, ` `, `_`, -1) + key = strings.ToLower(key) + return +} + +func executeAliasesTemplate(out io.Writer, languagesByAlias map[string]string, aliasesTmplPath, aliasesTmpl, commit string) error { + fmap := template.FuncMap{ + "getCommit": func() string { return commit }, + } + + t := template.Must(template.New(aliasesTmpl).Funcs(fmap).ParseFiles(aliasesTmplPath)) + if err := t.Execute(out, languagesByAlias); err != nil { + return err + } + + return nil +} diff --git a/internal/code-generator/generator/generator_test.go b/internal/code-generator/generator/generator_test.go index 93af6e1..f3a5a6a 100644 --- a/internal/code-generator/generator/generator_test.go +++ b/internal/code-generator/generator/generator_test.go @@ -37,10 +37,10 @@ const ( documentationTestTmplName = "documentation.go.tmpl" // Types test - typesTestFile = "test_files/type.test.yml" - typesGold = "test_files/type.gold" - typesTestTmplPath = "../assets/type.go.tmpl" - typesTestTmplName = "type.go.tmpl" + typesTestFile = "test_files/types.test.yml" + typesGold = "test_files/types.gold" + typesTestTmplPath = "../assets/types.go.tmpl" + typesTestTmplName = "types.go.tmpl" // Interpreters test interpretersTestFile = "test_files/interpreters.test.yml" @@ -53,6 +53,12 @@ const ( filenamesGold = "test_files/filenames.gold" filenamesTestTmplPath = "../assets/filenames.go.tmpl" filenamesTestTmplName = "filenames.go.tmpl" + + // Aliases test + aliasesTestFile = "test_files/aliases.test.yml" + aliasesGold = "test_files/aliases.gold" + aliasesTestTmplPath = "../assets/aliases.go.tmpl" + aliasesTestTmplName = "aliases.go.tmpl" ) func TestFromFile(t *testing.T) { @@ -128,6 +134,15 @@ func TestFromFile(t *testing.T) { generate: Filenames, wantOut: filenamesGold, }, + { + name: "TestFromFile_Aliases", + fileToParse: aliasesTestFile, + tmplPath: aliasesTestTmplPath, + tmplName: aliasesTestTmplName, + commit: commitTest, + generate: Aliases, + wantOut: aliasesGold, + }, } for _, tt := range tests { diff --git a/internal/code-generator/generator/langinfo.go b/internal/code-generator/generator/langinfo.go index e61d335..a37abcf 100644 --- a/internal/code-generator/generator/langinfo.go +++ b/internal/code-generator/generator/langinfo.go @@ -4,6 +4,7 @@ import "sort" type languageInfo struct { Type string `yaml:"type,omitempty"` + Aliases []string `yaml:"aliases,omitempty"` Extensions []string `yaml:"extensions,omitempty,flow"` Interpreters []string `yaml:"interpreters,omitempty,flow"` Filenames []string `yaml:"filenames,omitempty,flow"` diff --git a/internal/code-generator/generator/test_files/aliases.gold b/internal/code-generator/generator/test_files/aliases.gold new file mode 100644 index 0000000..c2181aa --- /dev/null +++ b/internal/code-generator/generator/test_files/aliases.gold @@ -0,0 +1,42 @@ +package slinguist + +// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/simple-linguist.v1/internal/code-generator +// THIS FILE SHOULD NOT BE EDITED BY HAND +// Extracted from github/linguist commit: fe8b44ab8a225b1ffa75b983b916ea22fee5b6f7 + +// languagesByAlias keeps alias for different languages and use the name of the languages as a alias too. All the +// keys (alias or not) are written in lower case and the whitespaces has been replaced by underscores. +var languagesByAlias = map[string]string{ + "asp": "ASP", + "aspx": "ASP", + "aspx-vb": "ASP", + "au3": "AutoIt", + "autoit": "AutoIt", + "autoit3": "AutoIt", + "autoitscript": "AutoIt", + "bat": "Batchfile", + "batch": "Batchfile", + "batchfile": "Batchfile", + "bsdmake": "Makefile", + "c++": "C++", + "cpp": "C++", + "django": "HTML+Django", + "dosbatch": "Batchfile", + "go": "Go", + "golang": "Go", + "gradle": "Gradle", + "html+django": "HTML+Django", + "html+django/jinja": "HTML+Django", + "html+jinja": "HTML+Django", + "htmldjango": "HTML+Django", + "make": "Makefile", + "makefile": "Makefile", + "mf": "Makefile", + "njk": "HTML+Django", + "nunjucks": "HTML+Django", + "obj-c": "Objective-C", + "objc": "Objective-C", + "objective-c": "Objective-C", + "objectivec": "Objective-C", + "winbatch": "Batchfile", +} diff --git a/internal/code-generator/generator/test_files/aliases.test.yml b/internal/code-generator/generator/test_files/aliases.test.yml new file mode 100644 index 0000000..e138134 --- /dev/null +++ b/internal/code-generator/generator/test_files/aliases.test.yml @@ -0,0 +1,42 @@ +--- +ASP: + aliases: + - aspx + - aspx-vb +AutoIt: + aliases: + - au3 + - AutoIt3 + - AutoItScript +Batchfile: + aliases: + - bat + - batch + - dosbatch + - winbatch +C++: + aliases: + - cpp +Go: + aliases: + - golang +Gradle: + type: data +HTML+Django: + aliases: + - django + - html+django/jinja + - html+jinja + - htmldjango + - njk + - nunjucks +Makefile: + aliases: + - bsdmake + - make + - mf +Objective-C: + aliases: + - obj-c + - objc + - objectivec diff --git a/internal/code-generator/generator/test_files/content.gold b/internal/code-generator/generator/test_files/content.gold index 608b4fa..b18141d 100644 --- a/internal/code-generator/generator/test_files/content.gold +++ b/internal/code-generator/generator/test_files/content.gold @@ -14,10 +14,11 @@ func GetLanguageByContent(filename string, content []byte) (lang string, safe bo ext := strings.ToLower(filepath.Ext(filename)) if fnMatcher, ok := matchers[ext]; ok { lang, safe = fnMatcher(content) - return + } else { + lang = OtherLanguage } - return GetLanguageByExtension(filename) + return } type languageMatcher func([]byte) (string, bool) diff --git a/internal/code-generator/generator/test_files/type.test.go.tmpl b/internal/code-generator/generator/test_files/type.test.go.tmpl deleted file mode 100644 index de7592b..0000000 --- a/internal/code-generator/generator/test_files/type.test.go.tmpl +++ /dev/null @@ -1,28 +0,0 @@ -package slinguist - -// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/simple-linguist.v1/internal/code-generator -// THIS FILE SHOULD NOT BE EDITED BY HAND -// Extracted from github/linguist commit: {{ getCommit }} - -type Type int - -const ( - // Language's type. Either data, programming, markup, prose, or unknown. - Unknown Type = iota - Data - Programming - Markup - Prose -) - -// GetLanguageType returns the given language's type. -func GetLanguageType(language string) (langType Type) { - langType, _ = languagesType[language] - return langType -} - -var languagesType = map[string]Type{ - {{range $language, $type := . -}} - "{{ $language }}": {{ $type -}}, - {{end -}} -} diff --git a/internal/code-generator/generator/test_files/type.gold b/internal/code-generator/generator/test_files/types.gold similarity index 53% rename from internal/code-generator/generator/test_files/type.gold rename to internal/code-generator/generator/test_files/types.gold index a54d81c..a13f45a 100644 --- a/internal/code-generator/generator/test_files/type.gold +++ b/internal/code-generator/generator/test_files/types.gold @@ -4,23 +4,6 @@ package slinguist // THIS FILE SHOULD NOT BE EDITED BY HAND // Extracted from github/linguist commit: fe8b44ab8a225b1ffa75b983b916ea22fee5b6f7 -type Type int - -const ( - // Language's type. Either data, programming, markup, prose, or unknown. - Unknown Type = iota - Data - Programming - Markup - Prose -) - -// GetLanguageType returns the given language's type. -func GetLanguageType(language string) (langType Type) { - langType, _ = languagesType[language] - return langType -} - var languagesType = map[string]Type{ "Scaml": Markup, "Scheme": Programming, diff --git a/internal/code-generator/generator/test_files/type.test.yml b/internal/code-generator/generator/test_files/types.test.yml similarity index 100% rename from internal/code-generator/generator/test_files/type.test.yml rename to internal/code-generator/generator/test_files/types.test.yml diff --git a/internal/code-generator/main.go b/internal/code-generator/main.go index 59a784d..1d40efc 100644 --- a/internal/code-generator/main.go +++ b/internal/code-generator/main.go @@ -35,9 +35,9 @@ const ( documentationTmpl = "documentation.go.tmpl" // type.go generation - typeFile = "type.go" - typeTmplPath = "internal/code-generator/assets/type.go.tmpl" - typeTmpl = "type.go.tmpl" + typeFile = "types_map.go" + typeTmplPath = "internal/code-generator/assets/types.go.tmpl" + typeTmpl = "types.go.tmpl" // interpreters_map.go generation interpretersFile = "interpreters_map.go" @@ -49,6 +49,11 @@ const ( filenamesTmplPath = "internal/code-generator/assets/filenames.go.tmpl" filenamesTmpl = "filenames.go.tmpl" + // aliases_map.go generation + aliasesFile = "aliases_map.go" + aliasesTmplPath = "internal/code-generator/assets/aliases.go.tmpl" + aliasesTmpl = "aliases.go.tmpl" + commitPath = ".git/refs/heads/master" ) @@ -75,6 +80,7 @@ func main() { &generatorArgs{languagesYAML, typeFile, typeTmplPath, typeTmpl, commit, generator.Types}, &generatorArgs{languagesYAML, interpretersFile, interpretersTmplPath, interpretersTmpl, commit, generator.Interpreters}, &generatorArgs{languagesYAML, filenamesFile, filenamesTmplPath, filenamesTmpl, commit, generator.Filenames}, + &generatorArgs{languagesYAML, aliasesFile, aliasesTmplPath, aliasesTmpl, commit, generator.Aliases}, } for _, args := range argsList { diff --git a/modeline.go b/modeline.go new file mode 100644 index 0000000..af88822 --- /dev/null +++ b/modeline.go @@ -0,0 +1,105 @@ +package slinguist + +import ( + "bytes" + "regexp" +) + +// GetLanguageByModeline returns the language of the given content looking for the modeline, +// and safe to indicate the sureness of returned language. +func GetLanguageByModeline(content []byte) (lang string, safe bool) { + headFoot := getHeaderAndFooter(content) + for _, getLang := range modelinesFunc { + lang = getLang(headFoot) + safe = lang != OtherLanguage + if safe { + return + } + } + + return +} + +func getHeaderAndFooter(content []byte) []byte { + const ( + searchScope = 5 + eol = `\n` + ) + + if bytes.Count(content, []byte(eol)) < 2*searchScope { + return content + } + + splitted := bytes.Split(content, []byte(eol)) + header := splitted[:searchScope] + footer := splitted[len(splitted)-searchScope:] + headerAndFooter := append(header, footer...) + return bytes.Join(headerAndFooter, []byte(eol)) +} + +var modelinesFunc = []func(content []byte) string{ + GetLanguageByEmacsModeline, + GetLanguageByVimModeline, +} + +var ( + reEmacsModeline = regexp.MustCompile(`.*-\*-\s*(.+?)\s*-\*-.*(?m:$)`) + reEmacsLang = regexp.MustCompile(`.*(?i:mode)\s*:\s*([^\s;]+)\s*;*.*`) + reVimModeline = regexp.MustCompile(`(?:(?m:\s|^)vi(?:m[<=>]?\d+|m)?|[\t\x20]*ex)\s*[:]\s*(.*)(?m:$)`) + reVimLang = regexp.MustCompile(`(?i:filetype|ft|syntax)\s*=(\w+)(?:\s|:|$)`) +) + +// GetLanguageByEmacsModeline detecs if the content has a emacs modeline and try to get a +// language basing on alias. If couldn't retrieve a valid language, it returns OtherLanguage. +func GetLanguageByEmacsModeline(content []byte) (lang string) { + matched := reEmacsModeline.FindAllSubmatch(content, -1) + if matched == nil { + return OtherLanguage + } + + // only take the last matched line, discard previous lines + lastLineMatched := matched[len(matched)-1][1] + matchedAlias := reEmacsLang.FindSubmatch(lastLineMatched) + var alias string + if matchedAlias != nil { + alias = string(matchedAlias[1]) + } else { + alias = string(lastLineMatched) + } + + lang = GetLanguageByAlias(alias) + return +} + +// GetLanguageByVimModeline detecs if the content has a vim modeline and try to get a +// language basing on alias. If couldn't retrieve a valid language, it returns OtherLanguage. +func GetLanguageByVimModeline(content []byte) (lang string) { + matched := reVimModeline.FindAllSubmatch(content, -1) + if matched == nil { + return OtherLanguage + } + + // only take the last matched line, discard previous lines + lastLineMatched := matched[len(matched)-1][1] + matchedAlias := reVimLang.FindAllSubmatch(lastLineMatched, -1) + if matchedAlias == nil { + return OtherLanguage + } + + alias := string(matchedAlias[0][1]) + if len(matchedAlias) > 1 { + // cases: + // matchedAlias = [["syntax=ruby " "ruby"] ["ft=python " "python"] ["filetype=perl " "perl"]] returns OtherLanguage; + // matchedAlias = [["syntax=python " "python"] ["ft=python " "python"] ["filetype=python " "python"]] returns "Python"; + for _, match := range matchedAlias { + otherAlias := string(match[1]) + if otherAlias != alias { + alias = OtherLanguage + break + } + } + } + + lang = GetLanguageByAlias(alias) + return +} diff --git a/modeline_test.go b/modeline_test.go new file mode 100644 index 0000000..d117eff --- /dev/null +++ b/modeline_test.go @@ -0,0 +1,83 @@ +package slinguist + +import ( + "io/ioutil" + "path/filepath" + + . "gopkg.in/check.v1" +) + +const ( + modelinesDir = ".linguist/test/fixtures/Data/Modelines" +) + +func (s *TSuite) TestGetLanguageByModeline(c *C) { + linguistTests := []struct { + filename string + expectedLang string + expectedSafe bool + }{ + // Emacs + {filename: "example_smalltalk.md", expectedLang: "Smalltalk", expectedSafe: true}, + {filename: "fundamentalEmacs.c", expectedLang: "Text", expectedSafe: true}, + {filename: "iamphp.inc", expectedLang: "PHP", expectedSafe: true}, + {filename: "seeplusplusEmacs1", expectedLang: "C++", expectedSafe: true}, + {filename: "seeplusplusEmacs2", expectedLang: "C++", expectedSafe: true}, + {filename: "seeplusplusEmacs3", expectedLang: "C++", expectedSafe: true}, + {filename: "seeplusplusEmacs4", expectedLang: "C++", expectedSafe: true}, + {filename: "seeplusplusEmacs5", expectedLang: "C++", expectedSafe: true}, + {filename: "seeplusplusEmacs6", expectedLang: "C++", expectedSafe: true}, + {filename: "seeplusplusEmacs7", expectedLang: "C++", expectedSafe: true}, + {filename: "seeplusplusEmacs9", expectedLang: "C++", expectedSafe: true}, + {filename: "seeplusplusEmacs10", expectedLang: "C++", expectedSafe: true}, + {filename: "seeplusplusEmacs11", expectedLang: "C++", expectedSafe: true}, + {filename: "seeplusplusEmacs12", expectedLang: "C++", expectedSafe: true}, + + // Vim + {filename: "seeplusplus", expectedLang: "C++", expectedSafe: true}, + {filename: "iamjs.pl", expectedLang: "JavaScript", expectedSafe: true}, + {filename: "iamjs2.pl", expectedLang: "JavaScript", expectedSafe: true}, + {filename: "not_perl.pl", expectedLang: "Prolog", expectedSafe: true}, + {filename: "ruby", expectedLang: "Ruby", expectedSafe: true}, + {filename: "ruby2", expectedLang: "Ruby", expectedSafe: true}, + {filename: "ruby3", expectedLang: "Ruby", expectedSafe: true}, + {filename: "ruby4", expectedLang: "Ruby", expectedSafe: true}, + {filename: "ruby5", expectedLang: "Ruby", expectedSafe: true}, + {filename: "ruby6", expectedLang: "Ruby", expectedSafe: true}, + {filename: "ruby7", expectedLang: "Ruby", expectedSafe: true}, + {filename: "ruby8", expectedLang: "Ruby", expectedSafe: true}, + {filename: "ruby9", expectedLang: "Ruby", expectedSafe: true}, + {filename: "ruby10", expectedLang: "Ruby", expectedSafe: true}, + {filename: "ruby11", expectedLang: "Ruby", expectedSafe: true}, + {filename: "ruby12", expectedLang: "Ruby", expectedSafe: true}, + } + + for _, test := range linguistTests { + content, err := ioutil.ReadFile(filepath.Join(modelinesDir, test.filename)) + c.Assert(err, Equals, nil) + + lang, safe := GetLanguageByModeline(content) + c.Assert(lang, Equals, test.expectedLang) + c.Assert(safe, Equals, test.expectedSafe) + } + + const ( + wrongVim = `# vim: set syntax=ruby ft =python filetype=perl :` + rightVim = `/* vim: set syntax=python ft =python filetype=python */` + ) + + tests := []struct { + content []byte + expectedLang string + expectedSafe bool + }{ + {content: []byte(wrongVim), expectedLang: OtherLanguage, expectedSafe: false}, + {content: []byte(rightVim), expectedLang: "Python", expectedSafe: true}, + } + + for _, test := range tests { + lang, safe := GetLanguageByModeline(test.content) + c.Assert(lang, Equals, test.expectedLang) + c.Assert(safe, Equals, test.expectedSafe) + } +} diff --git a/type.go b/type.go index 4c715d2..cdca17e 100644 --- a/type.go +++ b/type.go @@ -1,9 +1,5 @@ package slinguist -// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/simple-linguist.v1/internal/code-generator -// THIS FILE SHOULD NOT BE EDITED BY HAND -// Extracted from github/linguist commit: dae33dc2b20cddc85d1300435c3be7118a7115a9 - type Type int const ( @@ -17,453 +13,10 @@ const ( // GetLanguageType returns the given language's type. func GetLanguageType(language string) (langType Type) { - langType, _ = languagesType[language] + langType, ok := languagesType[language] + if !ok { + langType = Unknown + } + return langType } - -var languagesType = map[string]Type{ - "1C Enterprise": Programming, - "ABAP": Programming, - "ABNF": Data, - "AGS Script": Programming, - "AMPL": Programming, - "ANTLR": Programming, - "API Blueprint": Markup, - "APL": Programming, - "ASN.1": Data, - "ASP": Programming, - "ATS": Programming, - "ActionScript": Programming, - "Ada": Programming, - "Agda": Programming, - "Alloy": Programming, - "Alpine Abuild": Programming, - "Ant Build System": Data, - "ApacheConf": Markup, - "Apex": Programming, - "Apollo Guidance Computer": Programming, - "AppleScript": Programming, - "Arc": Programming, - "Arduino": Programming, - "AsciiDoc": Prose, - "AspectJ": Programming, - "Assembly": Programming, - "Augeas": Programming, - "AutoHotkey": Programming, - "AutoIt": Programming, - "Awk": Programming, - "Batchfile": Programming, - "Befunge": Programming, - "Bison": Programming, - "BitBake": Programming, - "Blade": Markup, - "BlitzBasic": Programming, - "BlitzMax": Programming, - "Bluespec": Programming, - "Boo": Programming, - "Brainfuck": Programming, - "Brightscript": Programming, - "Bro": Programming, - "C": Programming, - "C#": Programming, - "C++": Programming, - "C-ObjDump": Data, - "C2hs Haskell": Programming, - "CLIPS": Programming, - "CMake": Programming, - "COBOL": Programming, - "COLLADA": Data, - "CSON": Data, - "CSS": Markup, - "CSV": Data, - "Cap'n Proto": Programming, - "CartoCSS": Programming, - "Ceylon": Programming, - "Chapel": Programming, - "Charity": Programming, - "ChucK": Programming, - "Cirru": Programming, - "Clarion": Programming, - "Clean": Programming, - "Click": Programming, - "Clojure": Programming, - "CoffeeScript": Programming, - "ColdFusion": Programming, - "ColdFusion CFC": Programming, - "Common Lisp": Programming, - "Component Pascal": Programming, - "Cool": Programming, - "Coq": Programming, - "Cpp-ObjDump": Data, - "Creole": Prose, - "Crystal": Programming, - "Csound": Programming, - "Csound Document": Programming, - "Csound Score": Programming, - "Cuda": Programming, - "Cycript": Programming, - "Cython": Programming, - "D": Programming, - "D-ObjDump": Data, - "DIGITAL Command Language": Programming, - "DM": Programming, - "DNS Zone": Data, - "DTrace": Programming, - "Darcs Patch": Data, - "Dart": Programming, - "Diff": Data, - "Dockerfile": Data, - "Dogescript": Programming, - "Dylan": Programming, - "E": Programming, - "EBNF": Data, - "ECL": Programming, - "ECLiPSe": Programming, - "EJS": Markup, - "EQ": Programming, - "Eagle": Markup, - "Ecere Projects": Data, - "Eiffel": Programming, - "Elixir": Programming, - "Elm": Programming, - "Emacs Lisp": Programming, - "EmberScript": Programming, - "Erlang": Programming, - "F#": Programming, - "FLUX": Programming, - "Factor": Programming, - "Fancy": Programming, - "Fantom": Programming, - "Filebench WML": Programming, - "Filterscript": Programming, - "Formatted": Data, - "Forth": Programming, - "Fortran": Programming, - "FreeMarker": Programming, - "Frege": Programming, - "G-code": Data, - "GAMS": Programming, - "GAP": Programming, - "GCC Machine Description": Programming, - "GDB": Programming, - "GDScript": Programming, - "GLSL": Programming, - "GN": Data, - "Game Maker Language": Programming, - "Genie": Programming, - "Genshi": Programming, - "Gentoo Ebuild": Programming, - "Gentoo Eclass": Programming, - "Gettext Catalog": Prose, - "Gherkin": Programming, - "Glyph": Programming, - "Gnuplot": Programming, - "Go": Programming, - "Golo": Programming, - "Gosu": Programming, - "Grace": Programming, - "Gradle": Data, - "Grammatical Framework": Programming, - "Graph Modeling Language": Data, - "GraphQL": Data, - "Graphviz (DOT)": Data, - "Groovy": Programming, - "Groovy Server Pages": Programming, - "HCL": Programming, - "HLSL": Programming, - "HTML": Markup, - "HTML+Django": Markup, - "HTML+ECR": Markup, - "HTML+EEX": Markup, - "HTML+ERB": Markup, - "HTML+PHP": Markup, - "HTTP": Data, - "Hack": Programming, - "Haml": Markup, - "Handlebars": Markup, - "Harbour": Programming, - "Haskell": Programming, - "Haxe": Programming, - "Hy": Programming, - "HyPhy": Programming, - "IDL": Programming, - "IGOR Pro": Programming, - "INI": Data, - "IRC log": Data, - "Idris": Programming, - "Inform 7": Programming, - "Inno Setup": Programming, - "Io": Programming, - "Ioke": Programming, - "Isabelle": Programming, - "Isabelle ROOT": Programming, - "J": Programming, - "JFlex": Programming, - "JSON": Data, - "JSON5": Data, - "JSONLD": Data, - "JSONiq": Programming, - "JSX": Programming, - "Jasmin": Programming, - "Java": Programming, - "Java Server Pages": Programming, - "JavaScript": Programming, - "Jison": Programming, - "Jison Lex": Programming, - "Julia": Programming, - "Jupyter Notebook": Markup, - "KRL": Programming, - "KiCad": Programming, - "Kit": Markup, - "Kotlin": Programming, - "LFE": Programming, - "LLVM": Programming, - "LOLCODE": Programming, - "LSL": Programming, - "LabVIEW": Programming, - "Lasso": Programming, - "Latte": Markup, - "Lean": Programming, - "Less": Markup, - "Lex": Programming, - "LilyPond": Programming, - "Limbo": Programming, - "Linker Script": Data, - "Linux Kernel Module": Data, - "Liquid": Markup, - "Literate Agda": Programming, - "Literate CoffeeScript": Programming, - "Literate Haskell": Programming, - "LiveScript": Programming, - "Logos": Programming, - "Logtalk": Programming, - "LookML": Programming, - "LoomScript": Programming, - "Lua": Programming, - "M": Programming, - "M4": Programming, - "M4Sugar": Programming, - "MAXScript": Programming, - "MQL4": Programming, - "MQL5": Programming, - "MTML": Markup, - "MUF": Programming, - "Makefile": Programming, - "Mako": Programming, - "Markdown": Prose, - "Marko": Markup, - "Mask": Markup, - "Mathematica": Programming, - "Matlab": Programming, - "Maven POM": Data, - "Max": Programming, - "MediaWiki": Prose, - "Mercury": Programming, - "Meson": Programming, - "Metal": Programming, - "MiniD": Programming, - "Mirah": Programming, - "Modelica": Programming, - "Modula-2": Programming, - "Module Management System": Programming, - "Monkey": Programming, - "Moocode": Programming, - "MoonScript": Programming, - "Myghty": Programming, - "NCL": Programming, - "NL": Data, - "NSIS": Programming, - "Nemerle": Programming, - "NetLinx": Programming, - "NetLinx+ERB": Programming, - "NetLogo": Programming, - "NewLisp": Programming, - "Nginx": Markup, - "Nim": Programming, - "Ninja": Data, - "Nit": Programming, - "Nix": Programming, - "Nu": Programming, - "NumPy": Programming, - "OCaml": Programming, - "ObjDump": Data, - "Objective-C": Programming, - "Objective-C++": Programming, - "Objective-J": Programming, - "Omgrofl": Programming, - "Opa": Programming, - "Opal": Programming, - "OpenCL": Programming, - "OpenEdge ABL": Programming, - "OpenRC runscript": Programming, - "OpenSCAD": Programming, - "OpenType Feature File": Data, - "Org": Prose, - "Ox": Programming, - "Oxygene": Programming, - "Oz": Programming, - "P4": Programming, - "PAWN": Programming, - "PHP": Programming, - "PLSQL": Programming, - "PLpgSQL": Programming, - "POV-Ray SDL": Programming, - "Pan": Programming, - "Papyrus": Programming, - "Parrot": Programming, - "Parrot Assembly": Programming, - "Parrot Internal Representation": Programming, - "Pascal": Programming, - "Perl": Programming, - "Perl6": Programming, - "Pic": Markup, - "Pickle": Data, - "PicoLisp": Programming, - "PigLatin": Programming, - "Pike": Programming, - "Pod": Prose, - "PogoScript": Programming, - "Pony": Programming, - "PostScript": Markup, - "PowerBuilder": Programming, - "PowerShell": Programming, - "Processing": Programming, - "Prolog": Programming, - "Propeller Spin": Programming, - "Protocol Buffer": Markup, - "Public Key": Data, - "Pug": Markup, - "Puppet": Programming, - "Pure Data": Programming, - "PureBasic": Programming, - "PureScript": Programming, - "Python": Programming, - "Python console": Programming, - "Python traceback": Data, - "QML": Programming, - "QMake": Programming, - "R": Programming, - "RAML": Markup, - "RDoc": Prose, - "REALbasic": Programming, - "REXX": Programming, - "RHTML": Markup, - "RMarkdown": Prose, - "RPM Spec": Data, - "RUNOFF": Markup, - "Racket": Programming, - "Ragel": Programming, - "Rascal": Programming, - "Raw token data": Data, - "Reason": Programming, - "Rebol": Programming, - "Red": Programming, - "Redcode": Programming, - "Regular Expression": Data, - "Ren'Py": Programming, - "RenderScript": Programming, - "RobotFramework": Programming, - "Roff": Markup, - "Rouge": Programming, - "Ruby": Programming, - "Rust": Programming, - "SAS": Programming, - "SCSS": Markup, - "SMT": Programming, - "SPARQL": Data, - "SQF": Programming, - "SQL": Data, - "SQLPL": Programming, - "SRecode Template": Markup, - "STON": Data, - "SVG": Data, - "Sage": Programming, - "SaltStack": Programming, - "Sass": Markup, - "Scala": Programming, - "Scaml": Markup, - "Scheme": Programming, - "Scilab": Programming, - "Self": Programming, - "Shell": Programming, - "ShellSession": Programming, - "Shen": Programming, - "Slash": Programming, - "Slim": Markup, - "Smali": Programming, - "Smalltalk": Programming, - "Smarty": Programming, - "SourcePawn": Programming, - "Spline Font Database": Data, - "Squirrel": Programming, - "Stan": Programming, - "Standard ML": Programming, - "Stata": Programming, - "Stylus": Markup, - "SubRip Text": Data, - "Sublime Text Config": Data, - "SuperCollider": Programming, - "Swift": Programming, - "SystemVerilog": Programming, - "TI Program": Programming, - "TLA": Programming, - "TOML": Data, - "TXL": Programming, - "Tcl": Programming, - "Tcsh": Programming, - "TeX": Markup, - "Tea": Markup, - "Terra": Programming, - "Text": Prose, - "Textile": Prose, - "Thrift": Programming, - "Turing": Programming, - "Turtle": Data, - "Twig": Markup, - "TypeScript": Programming, - "Unified Parallel C": Programming, - "Unity3D Asset": Data, - "Unix Assembly": Programming, - "Uno": Programming, - "UnrealScript": Programming, - "UrWeb": Programming, - "VCL": Programming, - "VHDL": Programming, - "Vala": Programming, - "Verilog": Programming, - "Vim script": Programming, - "Visual Basic": Programming, - "Volt": Programming, - "Vue": Markup, - "Wavefront Material": Data, - "Wavefront Object": Data, - "Web Ontology Language": Markup, - "WebIDL": Programming, - "World of Warcraft Addon Data": Data, - "X10": Programming, - "XC": Programming, - "XCompose": Data, - "XML": Data, - "XPages": Programming, - "XProc": Programming, - "XQuery": Programming, - "XS": Programming, - "XSLT": Programming, - "Xojo": Programming, - "Xtend": Programming, - "YAML": Data, - "YANG": Data, - "Yacc": Programming, - "Zephir": Programming, - "Zimpl": Programming, - "desktop": Data, - "eC": Programming, - "edn": Data, - "fish": Programming, - "mupad": Programming, - "nesC": Programming, - "ooc": Programming, - "reStructuredText": Prose, - "wisp": Programming, - "xBase": Programming, -} diff --git a/type_test.go b/type_test.go index 094a5a7..5607e34 100644 --- a/type_test.go +++ b/type_test.go @@ -29,4 +29,7 @@ func (s *TSuite) TestGetLanguageType(c *C) { langType = GetLanguageType("Textile") c.Assert(langType, Equals, Prose) + + langType = GetLanguageType("Whatever") + c.Assert(langType, Equals, Unknown) } diff --git a/types_map.go b/types_map.go new file mode 100644 index 0000000..a75adc5 --- /dev/null +++ b/types_map.go @@ -0,0 +1,452 @@ +package slinguist + +// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/simple-linguist.v1/internal/code-generator +// THIS FILE SHOULD NOT BE EDITED BY HAND +// Extracted from github/linguist commit: dae33dc2b20cddc85d1300435c3be7118a7115a9 + +var languagesType = map[string]Type{ + "1C Enterprise": Programming, + "ABAP": Programming, + "ABNF": Data, + "AGS Script": Programming, + "AMPL": Programming, + "ANTLR": Programming, + "API Blueprint": Markup, + "APL": Programming, + "ASN.1": Data, + "ASP": Programming, + "ATS": Programming, + "ActionScript": Programming, + "Ada": Programming, + "Agda": Programming, + "Alloy": Programming, + "Alpine Abuild": Programming, + "Ant Build System": Data, + "ApacheConf": Markup, + "Apex": Programming, + "Apollo Guidance Computer": Programming, + "AppleScript": Programming, + "Arc": Programming, + "Arduino": Programming, + "AsciiDoc": Prose, + "AspectJ": Programming, + "Assembly": Programming, + "Augeas": Programming, + "AutoHotkey": Programming, + "AutoIt": Programming, + "Awk": Programming, + "Batchfile": Programming, + "Befunge": Programming, + "Bison": Programming, + "BitBake": Programming, + "Blade": Markup, + "BlitzBasic": Programming, + "BlitzMax": Programming, + "Bluespec": Programming, + "Boo": Programming, + "Brainfuck": Programming, + "Brightscript": Programming, + "Bro": Programming, + "C": Programming, + "C#": Programming, + "C++": Programming, + "C-ObjDump": Data, + "C2hs Haskell": Programming, + "CLIPS": Programming, + "CMake": Programming, + "COBOL": Programming, + "COLLADA": Data, + "CSON": Data, + "CSS": Markup, + "CSV": Data, + "Cap'n Proto": Programming, + "CartoCSS": Programming, + "Ceylon": Programming, + "Chapel": Programming, + "Charity": Programming, + "ChucK": Programming, + "Cirru": Programming, + "Clarion": Programming, + "Clean": Programming, + "Click": Programming, + "Clojure": Programming, + "CoffeeScript": Programming, + "ColdFusion": Programming, + "ColdFusion CFC": Programming, + "Common Lisp": Programming, + "Component Pascal": Programming, + "Cool": Programming, + "Coq": Programming, + "Cpp-ObjDump": Data, + "Creole": Prose, + "Crystal": Programming, + "Csound": Programming, + "Csound Document": Programming, + "Csound Score": Programming, + "Cuda": Programming, + "Cycript": Programming, + "Cython": Programming, + "D": Programming, + "D-ObjDump": Data, + "DIGITAL Command Language": Programming, + "DM": Programming, + "DNS Zone": Data, + "DTrace": Programming, + "Darcs Patch": Data, + "Dart": Programming, + "Diff": Data, + "Dockerfile": Data, + "Dogescript": Programming, + "Dylan": Programming, + "E": Programming, + "EBNF": Data, + "ECL": Programming, + "ECLiPSe": Programming, + "EJS": Markup, + "EQ": Programming, + "Eagle": Markup, + "Ecere Projects": Data, + "Eiffel": Programming, + "Elixir": Programming, + "Elm": Programming, + "Emacs Lisp": Programming, + "EmberScript": Programming, + "Erlang": Programming, + "F#": Programming, + "FLUX": Programming, + "Factor": Programming, + "Fancy": Programming, + "Fantom": Programming, + "Filebench WML": Programming, + "Filterscript": Programming, + "Formatted": Data, + "Forth": Programming, + "Fortran": Programming, + "FreeMarker": Programming, + "Frege": Programming, + "G-code": Data, + "GAMS": Programming, + "GAP": Programming, + "GCC Machine Description": Programming, + "GDB": Programming, + "GDScript": Programming, + "GLSL": Programming, + "GN": Data, + "Game Maker Language": Programming, + "Genie": Programming, + "Genshi": Programming, + "Gentoo Ebuild": Programming, + "Gentoo Eclass": Programming, + "Gettext Catalog": Prose, + "Gherkin": Programming, + "Glyph": Programming, + "Gnuplot": Programming, + "Go": Programming, + "Golo": Programming, + "Gosu": Programming, + "Grace": Programming, + "Gradle": Data, + "Grammatical Framework": Programming, + "Graph Modeling Language": Data, + "GraphQL": Data, + "Graphviz (DOT)": Data, + "Groovy": Programming, + "Groovy Server Pages": Programming, + "HCL": Programming, + "HLSL": Programming, + "HTML": Markup, + "HTML+Django": Markup, + "HTML+ECR": Markup, + "HTML+EEX": Markup, + "HTML+ERB": Markup, + "HTML+PHP": Markup, + "HTTP": Data, + "Hack": Programming, + "Haml": Markup, + "Handlebars": Markup, + "Harbour": Programming, + "Haskell": Programming, + "Haxe": Programming, + "Hy": Programming, + "HyPhy": Programming, + "IDL": Programming, + "IGOR Pro": Programming, + "INI": Data, + "IRC log": Data, + "Idris": Programming, + "Inform 7": Programming, + "Inno Setup": Programming, + "Io": Programming, + "Ioke": Programming, + "Isabelle": Programming, + "Isabelle ROOT": Programming, + "J": Programming, + "JFlex": Programming, + "JSON": Data, + "JSON5": Data, + "JSONLD": Data, + "JSONiq": Programming, + "JSX": Programming, + "Jasmin": Programming, + "Java": Programming, + "Java Server Pages": Programming, + "JavaScript": Programming, + "Jison": Programming, + "Jison Lex": Programming, + "Julia": Programming, + "Jupyter Notebook": Markup, + "KRL": Programming, + "KiCad": Programming, + "Kit": Markup, + "Kotlin": Programming, + "LFE": Programming, + "LLVM": Programming, + "LOLCODE": Programming, + "LSL": Programming, + "LabVIEW": Programming, + "Lasso": Programming, + "Latte": Markup, + "Lean": Programming, + "Less": Markup, + "Lex": Programming, + "LilyPond": Programming, + "Limbo": Programming, + "Linker Script": Data, + "Linux Kernel Module": Data, + "Liquid": Markup, + "Literate Agda": Programming, + "Literate CoffeeScript": Programming, + "Literate Haskell": Programming, + "LiveScript": Programming, + "Logos": Programming, + "Logtalk": Programming, + "LookML": Programming, + "LoomScript": Programming, + "Lua": Programming, + "M": Programming, + "M4": Programming, + "M4Sugar": Programming, + "MAXScript": Programming, + "MQL4": Programming, + "MQL5": Programming, + "MTML": Markup, + "MUF": Programming, + "Makefile": Programming, + "Mako": Programming, + "Markdown": Prose, + "Marko": Markup, + "Mask": Markup, + "Mathematica": Programming, + "Matlab": Programming, + "Maven POM": Data, + "Max": Programming, + "MediaWiki": Prose, + "Mercury": Programming, + "Meson": Programming, + "Metal": Programming, + "MiniD": Programming, + "Mirah": Programming, + "Modelica": Programming, + "Modula-2": Programming, + "Module Management System": Programming, + "Monkey": Programming, + "Moocode": Programming, + "MoonScript": Programming, + "Myghty": Programming, + "NCL": Programming, + "NL": Data, + "NSIS": Programming, + "Nemerle": Programming, + "NetLinx": Programming, + "NetLinx+ERB": Programming, + "NetLogo": Programming, + "NewLisp": Programming, + "Nginx": Markup, + "Nim": Programming, + "Ninja": Data, + "Nit": Programming, + "Nix": Programming, + "Nu": Programming, + "NumPy": Programming, + "OCaml": Programming, + "ObjDump": Data, + "Objective-C": Programming, + "Objective-C++": Programming, + "Objective-J": Programming, + "Omgrofl": Programming, + "Opa": Programming, + "Opal": Programming, + "OpenCL": Programming, + "OpenEdge ABL": Programming, + "OpenRC runscript": Programming, + "OpenSCAD": Programming, + "OpenType Feature File": Data, + "Org": Prose, + "Ox": Programming, + "Oxygene": Programming, + "Oz": Programming, + "P4": Programming, + "PAWN": Programming, + "PHP": Programming, + "PLSQL": Programming, + "PLpgSQL": Programming, + "POV-Ray SDL": Programming, + "Pan": Programming, + "Papyrus": Programming, + "Parrot": Programming, + "Parrot Assembly": Programming, + "Parrot Internal Representation": Programming, + "Pascal": Programming, + "Perl": Programming, + "Perl6": Programming, + "Pic": Markup, + "Pickle": Data, + "PicoLisp": Programming, + "PigLatin": Programming, + "Pike": Programming, + "Pod": Prose, + "PogoScript": Programming, + "Pony": Programming, + "PostScript": Markup, + "PowerBuilder": Programming, + "PowerShell": Programming, + "Processing": Programming, + "Prolog": Programming, + "Propeller Spin": Programming, + "Protocol Buffer": Markup, + "Public Key": Data, + "Pug": Markup, + "Puppet": Programming, + "Pure Data": Programming, + "PureBasic": Programming, + "PureScript": Programming, + "Python": Programming, + "Python console": Programming, + "Python traceback": Data, + "QML": Programming, + "QMake": Programming, + "R": Programming, + "RAML": Markup, + "RDoc": Prose, + "REALbasic": Programming, + "REXX": Programming, + "RHTML": Markup, + "RMarkdown": Prose, + "RPM Spec": Data, + "RUNOFF": Markup, + "Racket": Programming, + "Ragel": Programming, + "Rascal": Programming, + "Raw token data": Data, + "Reason": Programming, + "Rebol": Programming, + "Red": Programming, + "Redcode": Programming, + "Regular Expression": Data, + "Ren'Py": Programming, + "RenderScript": Programming, + "RobotFramework": Programming, + "Roff": Markup, + "Rouge": Programming, + "Ruby": Programming, + "Rust": Programming, + "SAS": Programming, + "SCSS": Markup, + "SMT": Programming, + "SPARQL": Data, + "SQF": Programming, + "SQL": Data, + "SQLPL": Programming, + "SRecode Template": Markup, + "STON": Data, + "SVG": Data, + "Sage": Programming, + "SaltStack": Programming, + "Sass": Markup, + "Scala": Programming, + "Scaml": Markup, + "Scheme": Programming, + "Scilab": Programming, + "Self": Programming, + "Shell": Programming, + "ShellSession": Programming, + "Shen": Programming, + "Slash": Programming, + "Slim": Markup, + "Smali": Programming, + "Smalltalk": Programming, + "Smarty": Programming, + "SourcePawn": Programming, + "Spline Font Database": Data, + "Squirrel": Programming, + "Stan": Programming, + "Standard ML": Programming, + "Stata": Programming, + "Stylus": Markup, + "SubRip Text": Data, + "Sublime Text Config": Data, + "SuperCollider": Programming, + "Swift": Programming, + "SystemVerilog": Programming, + "TI Program": Programming, + "TLA": Programming, + "TOML": Data, + "TXL": Programming, + "Tcl": Programming, + "Tcsh": Programming, + "TeX": Markup, + "Tea": Markup, + "Terra": Programming, + "Text": Prose, + "Textile": Prose, + "Thrift": Programming, + "Turing": Programming, + "Turtle": Data, + "Twig": Markup, + "TypeScript": Programming, + "Unified Parallel C": Programming, + "Unity3D Asset": Data, + "Unix Assembly": Programming, + "Uno": Programming, + "UnrealScript": Programming, + "UrWeb": Programming, + "VCL": Programming, + "VHDL": Programming, + "Vala": Programming, + "Verilog": Programming, + "Vim script": Programming, + "Visual Basic": Programming, + "Volt": Programming, + "Vue": Markup, + "Wavefront Material": Data, + "Wavefront Object": Data, + "Web Ontology Language": Markup, + "WebIDL": Programming, + "World of Warcraft Addon Data": Data, + "X10": Programming, + "XC": Programming, + "XCompose": Data, + "XML": Data, + "XPages": Programming, + "XProc": Programming, + "XQuery": Programming, + "XS": Programming, + "XSLT": Programming, + "Xojo": Programming, + "Xtend": Programming, + "YAML": Data, + "YANG": Data, + "Yacc": Programming, + "Zephir": Programming, + "Zimpl": Programming, + "desktop": Data, + "eC": Programming, + "edn": Data, + "fish": Programming, + "mupad": Programming, + "nesC": Programming, + "ooc": Programming, + "reStructuredText": Prose, + "wisp": Programming, + "xBase": Programming, +}