Merge pull request #30 from mcarmonaa/improvement-rearrange

Improvement rearranged code
This commit is contained in:
Máximo Cuadros 2017-06-08 09:01:51 +02:00 committed by GitHub
commit c37fe182d4
54 changed files with 75602 additions and 81872 deletions

655
alias.go
View File

@ -1,16 +1,647 @@
package slinguist
import "strings"
// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/simple-linguist.v1/internal/code-generator
// THIS FILE SHOULD NOT BE EDITED BY HAND
// Extracted from github/linguist commit: b6460f8ed6b249281ada099ca28bd8f1230b8892
// GetLanguageByAlias returns the language related to the given alias and ok set to true,
// or Otherlanguage and ok set to false otherwise.
func GetLanguageByAlias(alias string) (lang string, ok bool) {
a := strings.Split(alias, `,`)[0]
a = strings.ToLower(a)
lang, ok = languagesByAlias[a]
if !ok {
lang = OtherLanguage
}
return
// languagesByAlias keeps alias for different languages and use the name of the languages as an alias too.
// All the keys (alias or not) are written in lower case and the whitespaces has been replaced by underscores.
var languagesByAlias = map[string]string{
"1c_enterprise": "1C Enterprise",
"abap": "ABAP",
"abl": "OpenEdge ABL",
"abnf": "ABNF",
"abuild": "Alpine Abuild",
"aconf": "ApacheConf",
"actionscript": "ActionScript",
"actionscript3": "ActionScript",
"actionscript_3": "ActionScript",
"ada": "Ada",
"ada2005": "Ada",
"ada95": "Ada",
"advpl": "xBase",
"afdko": "OpenType Feature File",
"agda": "Agda",
"ags": "AGS Script",
"ags_script": "AGS Script",
"ahk": "AutoHotkey",
"alloy": "Alloy",
"alpine_abuild": "Alpine Abuild",
"ampl": "AMPL",
"ant_build_system": "Ant Build System",
"antlr": "ANTLR",
"apache": "ApacheConf",
"apacheconf": "ApacheConf",
"apex": "Apex",
"api_blueprint": "API Blueprint",
"apkbuild": "Alpine Abuild",
"apl": "APL",
"apollo_guidance_computer": "Apollo Guidance Computer",
"applescript": "AppleScript",
"arc": "Arc",
"arduino": "Arduino",
"arexx": "REXX",
"as3": "ActionScript",
"asciidoc": "AsciiDoc",
"asn.1": "ASN.1",
"asp": "ASP",
"aspectj": "AspectJ",
"aspx": "ASP",
"aspx-vb": "ASP",
"assembly": "Assembly",
"ats": "ATS",
"ats2": "ATS",
"au3": "AutoIt",
"augeas": "Augeas",
"autoconf": "M4Sugar",
"autohotkey": "AutoHotkey",
"autoit": "AutoIt",
"autoit3": "AutoIt",
"autoitscript": "AutoIt",
"awk": "Awk",
"b3d": "BlitzBasic",
"bash": "Shell",
"bash_session": "ShellSession",
"bat": "Batchfile",
"batch": "Batchfile",
"batchfile": "Batchfile",
"befunge": "Befunge",
"bison": "Bison",
"bitbake": "BitBake",
"blade": "Blade",
"blitz3d": "BlitzBasic",
"blitzbasic": "BlitzBasic",
"blitzmax": "BlitzMax",
"blitzplus": "BlitzBasic",
"bluespec": "Bluespec",
"bmax": "BlitzMax",
"boo": "Boo",
"bplus": "BlitzBasic",
"brainfuck": "Brainfuck",
"brightscript": "Brightscript",
"bro": "Bro",
"bsdmake": "Makefile",
"byond": "DM",
"c": "C",
"c#": "C#",
"c++": "C++",
"c++-objdump": "Cpp-ObjDump",
"c-objdump": "C-ObjDump",
"c2hs": "C2hs Haskell",
"c2hs_haskell": "C2hs Haskell",
"cap'n_proto": "Cap'n Proto",
"carto": "CartoCSS",
"cartocss": "CartoCSS",
"ceylon": "Ceylon",
"cfc": "ColdFusion CFC",
"cfm": "ColdFusion",
"cfml": "ColdFusion",
"chapel": "Chapel",
"charity": "Charity",
"chpl": "Chapel",
"chuck": "ChucK",
"cirru": "Cirru",
"clarion": "Clarion",
"clean": "Clean",
"click": "Click",
"clipper": "xBase",
"clips": "CLIPS",
"clojure": "Clojure",
"closure_templates": "Closure Templates",
"cmake": "CMake",
"cobol": "COBOL",
"coffee": "CoffeeScript",
"coffee-script": "CoffeeScript",
"coffeescript": "CoffeeScript",
"coldfusion": "ColdFusion",
"coldfusion_cfc": "ColdFusion CFC",
"coldfusion_html": "ColdFusion",
"collada": "COLLADA",
"common_lisp": "Common Lisp",
"component_pascal": "Component Pascal",
"console": "ShellSession",
"cool": "Cool",
"coq": "Coq",
"cpp": "C++",
"cpp-objdump": "Cpp-ObjDump",
"creole": "Creole",
"crystal": "Crystal",
"csharp": "C#",
"cson": "CSON",
"csound": "Csound",
"csound-csd": "Csound Document",
"csound-orc": "Csound",
"csound-sco": "Csound Score",
"csound_document": "Csound Document",
"csound_score": "Csound Score",
"css": "CSS",
"csv": "CSV",
"cucumber": "Gherkin",
"cuda": "Cuda",
"cweb": "CWeb",
"cycript": "Cycript",
"cython": "Cython",
"d": "D",
"d-objdump": "D-ObjDump",
"darcs_patch": "Darcs Patch",
"dart": "Dart",
"dcl": "DIGITAL Command Language",
"delphi": "Component Pascal",
"desktop": "desktop",
"diff": "Diff",
"digital_command_language": "DIGITAL Command Language",
"django": "HTML+Django",
"dm": "DM",
"dns_zone": "DNS Zone",
"dockerfile": "Dockerfile",
"dogescript": "Dogescript",
"dosbatch": "Batchfile",
"dosini": "INI",
"dpatch": "Darcs Patch",
"dtrace": "DTrace",
"dtrace-script": "DTrace",
"dylan": "Dylan",
"e": "E",
"eagle": "Eagle",
"ebnf": "EBNF",
"ec": "eC",
"ecere_projects": "Ecere Projects",
"ecl": "ECL",
"eclipse": "ECLiPSe",
"ecr": "HTML+ECR",
"edn": "edn",
"eex": "HTML+EEX",
"eiffel": "Eiffel",
"ejs": "EJS",
"elisp": "Emacs Lisp",
"elixir": "Elixir",
"elm": "Elm",
"emacs": "Emacs Lisp",
"emacs_lisp": "Emacs Lisp",
"emberscript": "EmberScript",
"eq": "EQ",
"erb": "HTML+ERB",
"erlang": "Erlang",
"f#": "F#",
"factor": "Factor",
"fancy": "Fancy",
"fantom": "Fantom",
"filebench_wml": "Filebench WML",
"filterscript": "Filterscript",
"fish": "fish",
"flex": "Lex",
"flux": "FLUX",
"formatted": "Formatted",
"forth": "Forth",
"fortran": "Fortran",
"foxpro": "xBase",
"freemarker": "FreeMarker",
"frege": "Frege",
"fsharp": "F#",
"ftl": "FreeMarker",
"fundamental": "Text",
"g-code": "G-code",
"game_maker_language": "Game Maker Language",
"gams": "GAMS",
"gap": "GAP",
"gcc_machine_description": "GCC Machine Description",
"gdb": "GDB",
"gdscript": "GDScript",
"genie": "Genie",
"genshi": "Genshi",
"gentoo_ebuild": "Gentoo Ebuild",
"gentoo_eclass": "Gentoo Eclass",
"gettext_catalog": "Gettext Catalog",
"gf": "Grammatical Framework",
"gherkin": "Gherkin",
"glsl": "GLSL",
"glyph": "Glyph",
"gn": "GN",
"gnuplot": "Gnuplot",
"go": "Go",
"golang": "Go",
"golo": "Golo",
"gosu": "Gosu",
"grace": "Grace",
"gradle": "Gradle",
"grammatical_framework": "Grammatical Framework",
"graph_modeling_language": "Graph Modeling Language",
"graphql": "GraphQL",
"graphviz_(dot)": "Graphviz (DOT)",
"groovy": "Groovy",
"groovy_server_pages": "Groovy Server Pages",
"gsp": "Groovy Server Pages",
"hack": "Hack",
"haml": "Haml",
"handlebars": "Handlebars",
"harbour": "Harbour",
"haskell": "Haskell",
"haxe": "Haxe",
"hbs": "Handlebars",
"hcl": "HCL",
"hlsl": "HLSL",
"html": "HTML",
"html+django": "HTML+Django",
"html+django/jinja": "HTML+Django",
"html+ecr": "HTML+ECR",
"html+eex": "HTML+EEX",
"html+erb": "HTML+ERB",
"html+jinja": "HTML+Django",
"html+php": "HTML+PHP",
"html+ruby": "RHTML",
"htmlbars": "Handlebars",
"htmldjango": "HTML+Django",
"http": "HTTP",
"hy": "Hy",
"hylang": "Hy",
"hyphy": "HyPhy",
"i7": "Inform 7",
"idl": "IDL",
"idris": "Idris",
"igor": "IGOR Pro",
"igor_pro": "IGOR Pro",
"igorpro": "IGOR Pro",
"inc": "PHP",
"inform7": "Inform 7",
"inform_7": "Inform 7",
"ini": "INI",
"inno_setup": "Inno Setup",
"io": "Io",
"ioke": "Ioke",
"ipython_notebook": "Jupyter Notebook",
"irc": "IRC log",
"irc_log": "IRC log",
"irc_logs": "IRC log",
"isabelle": "Isabelle",
"isabelle_root": "Isabelle ROOT",
"j": "J",
"jasmin": "Jasmin",
"java": "Java",
"java_server_page": "Groovy Server Pages",
"java_server_pages": "Java Server Pages",
"javascript": "JavaScript",
"jflex": "JFlex",
"jison": "Jison",
"jison_lex": "Jison Lex",
"jolie": "Jolie",
"jruby": "Ruby",
"js": "JavaScript",
"json": "JSON",
"json5": "JSON5",
"jsoniq": "JSONiq",
"jsonld": "JSONLD",
"jsp": "Java Server Pages",
"jsx": "JSX",
"julia": "Julia",
"jupyter_notebook": "Jupyter Notebook",
"kicad": "KiCad",
"kit": "Kit",
"kotlin": "Kotlin",
"krl": "KRL",
"labview": "LabVIEW",
"lasso": "Lasso",
"lassoscript": "Lasso",
"latex": "TeX",
"latte": "Latte",
"lean": "Lean",
"less": "Less",
"lex": "Lex",
"lfe": "LFE",
"lhaskell": "Literate Haskell",
"lhs": "Literate Haskell",
"lilypond": "LilyPond",
"limbo": "Limbo",
"linker_script": "Linker Script",
"linux_kernel_module": "Linux Kernel Module",
"liquid": "Liquid",
"lisp": "Common Lisp",
"litcoffee": "Literate CoffeeScript",
"literate_agda": "Literate Agda",
"literate_coffeescript": "Literate CoffeeScript",
"literate_haskell": "Literate Haskell",
"live-script": "LiveScript",
"livescript": "LiveScript",
"llvm": "LLVM",
"logos": "Logos",
"logtalk": "Logtalk",
"lolcode": "LOLCODE",
"lookml": "LookML",
"loomscript": "LoomScript",
"ls": "LiveScript",
"lsl": "LSL",
"lua": "Lua",
"m": "M",
"m4": "M4",
"m4sugar": "M4Sugar",
"macruby": "Ruby",
"make": "Makefile",
"makefile": "Makefile",
"mako": "Mako",
"markdown": "Markdown",
"marko": "Marko",
"markojs": "Marko",
"mask": "Mask",
"mathematica": "Mathematica",
"matlab": "Matlab",
"maven_pom": "Maven POM",
"max": "Max",
"max/msp": "Max",
"maxmsp": "Max",
"maxscript": "MAXScript",
"mediawiki": "MediaWiki",
"mercury": "Mercury",
"meson": "Meson",
"metal": "Metal",
"mf": "Makefile",
"minid": "MiniD",
"mirah": "Mirah",
"mma": "Mathematica",
"modelica": "Modelica",
"modula-2": "Modula-2",
"module_management_system": "Module Management System",
"monkey": "Monkey",
"moocode": "Moocode",
"moonscript": "MoonScript",
"mql4": "MQL4",
"mql5": "MQL5",
"mtml": "MTML",
"muf": "MUF",
"mumps": "M",
"mupad": "mupad",
"myghty": "Myghty",
"nasm": "Assembly",
"ncl": "NCL",
"nemerle": "Nemerle",
"nesc": "nesC",
"netlinx": "NetLinx",
"netlinx+erb": "NetLinx+ERB",
"netlogo": "NetLogo",
"newlisp": "NewLisp",
"nginx": "Nginx",
"nginx_configuration_file": "Nginx",
"nim": "Nim",
"ninja": "Ninja",
"nit": "Nit",
"nix": "Nix",
"nixos": "Nix",
"njk": "HTML+Django",
"nl": "NL",
"node": "JavaScript",
"nroff": "Roff",
"nsis": "NSIS",
"nu": "Nu",
"numpy": "NumPy",
"nunjucks": "HTML+Django",
"nush": "Nu",
"nvim": "Vim script",
"obj-c": "Objective-C",
"obj-c++": "Objective-C++",
"obj-j": "Objective-J",
"objc": "Objective-C",
"objc++": "Objective-C++",
"objdump": "ObjDump",
"objective-c": "Objective-C",
"objective-c++": "Objective-C++",
"objective-j": "Objective-J",
"objectivec": "Objective-C",
"objectivec++": "Objective-C++",
"objectivej": "Objective-J",
"objectpascal": "Component Pascal",
"objj": "Objective-J",
"ocaml": "OCaml",
"octave": "Matlab",
"omgrofl": "Omgrofl",
"ooc": "ooc",
"opa": "Opa",
"opal": "Opal",
"opencl": "OpenCL",
"openedge": "OpenEdge ABL",
"openedge_abl": "OpenEdge ABL",
"openrc": "OpenRC runscript",
"openrc_runscript": "OpenRC runscript",
"openscad": "OpenSCAD",
"opentype_feature_file": "OpenType Feature File",
"org": "Org",
"osascript": "AppleScript",
"ox": "Ox",
"oxygene": "Oxygene",
"oz": "Oz",
"p4": "P4",
"pan": "Pan",
"pandoc": "Markdown",
"papyrus": "Papyrus",
"parrot": "Parrot",
"parrot_assembly": "Parrot Assembly",
"parrot_internal_representation": "Parrot Internal Representation",
"pascal": "Pascal",
"pasm": "Parrot Assembly",
"pawn": "PAWN",
"pep8": "Pep8",
"perl": "Perl",
"perl6": "Perl6",
"php": "PHP",
"pic": "Pic",
"pickle": "Pickle",
"picolisp": "PicoLisp",
"piglatin": "PigLatin",
"pike": "Pike",
"pir": "Parrot Internal Representation",
"plpgsql": "PLpgSQL",
"plsql": "PLSQL",
"pod": "Pod",
"pogoscript": "PogoScript",
"pony": "Pony",
"posh": "PowerShell",
"postscr": "PostScript",
"postscript": "PostScript",
"pot": "Gettext Catalog",
"pov-ray": "POV-Ray SDL",
"pov-ray_sdl": "POV-Ray SDL",
"povray": "POV-Ray SDL",
"powerbuilder": "PowerBuilder",
"powershell": "PowerShell",
"processing": "Processing",
"progress": "OpenEdge ABL",
"prolog": "Prolog",
"propeller_spin": "Propeller Spin",
"protobuf": "Protocol Buffer",
"protocol_buffer": "Protocol Buffer",
"protocol_buffers": "Protocol Buffer",
"public_key": "Public Key",
"pug": "Pug",
"puppet": "Puppet",
"pure_data": "Pure Data",
"purebasic": "PureBasic",
"purescript": "PureScript",
"pycon": "Python console",
"pyrex": "Cython",
"python": "Python",
"python_console": "Python console",
"python_traceback": "Python traceback",
"qmake": "QMake",
"qml": "QML",
"r": "R",
"racket": "Racket",
"ragel": "Ragel",
"ragel-rb": "Ragel",
"ragel-ruby": "Ragel",
"rake": "Ruby",
"raml": "RAML",
"rascal": "Rascal",
"raw": "Raw token data",
"raw_token_data": "Raw token data",
"rb": "Ruby",
"rbx": "Ruby",
"rdoc": "RDoc",
"realbasic": "REALbasic",
"reason": "Reason",
"rebol": "Rebol",
"red": "Red",
"red/system": "Red",
"redcode": "Redcode",
"regex": "Regular Expression",
"regexp": "Regular Expression",
"regular_expression": "Regular Expression",
"ren'py": "Ren'Py",
"renderscript": "RenderScript",
"renpy": "Ren'Py",
"restructuredtext": "reStructuredText",
"rexx": "REXX",
"rhtml": "RHTML",
"rmarkdown": "RMarkdown",
"robotframework": "RobotFramework",
"roff": "Roff",
"rouge": "Rouge",
"rpm_spec": "RPM Spec",
"rscript": "R",
"rss": "XML",
"rst": "reStructuredText",
"ruby": "Ruby",
"runoff": "RUNOFF",
"rust": "Rust",
"rusthon": "Python",
"sage": "Sage",
"salt": "SaltStack",
"saltstack": "SaltStack",
"saltstate": "SaltStack",
"sas": "SAS",
"sass": "Sass",
"scala": "Scala",
"scaml": "Scaml",
"scheme": "Scheme",
"scilab": "Scilab",
"scss": "SCSS",
"self": "Self",
"sh": "Shell",
"shaderlab": "ShaderLab",
"shell": "Shell",
"shell-script": "Shell",
"shellsession": "ShellSession",
"shen": "Shen",
"slash": "Slash",
"slim": "Slim",
"smali": "Smali",
"smalltalk": "Smalltalk",
"smarty": "Smarty",
"sml": "Standard ML",
"smt": "SMT",
"sourcemod": "SourcePawn",
"sourcepawn": "SourcePawn",
"sparql": "SPARQL",
"specfile": "RPM Spec",
"spline_font_database": "Spline Font Database",
"splus": "R",
"sqf": "SQF",
"sql": "SQL",
"sqlpl": "SQLPL",
"squeak": "Smalltalk",
"squirrel": "Squirrel",
"srecode_template": "SRecode Template",
"stan": "Stan",
"standard_ml": "Standard ML",
"stata": "Stata",
"ston": "STON",
"stylus": "Stylus",
"sublime_text_config": "Sublime Text Config",
"subrip_text": "SubRip Text",
"supercollider": "SuperCollider",
"svg": "SVG",
"swift": "Swift",
"systemverilog": "SystemVerilog",
"tcl": "Tcl",
"tcsh": "Tcsh",
"tea": "Tea",
"terra": "Terra",
"tex": "TeX",
"text": "Text",
"textile": "Textile",
"thrift": "Thrift",
"ti_program": "TI Program",
"tl": "Type Language",
"tla": "TLA",
"toml": "TOML",
"ts": "TypeScript",
"turing": "Turing",
"turtle": "Turtle",
"twig": "Twig",
"txl": "TXL",
"type_language": "Type Language",
"typescript": "TypeScript",
"udiff": "Diff",
"unified_parallel_c": "Unified Parallel C",
"unity3d_asset": "Unity3D Asset",
"unix_assembly": "Unix Assembly",
"uno": "Uno",
"unrealscript": "UnrealScript",
"ur": "UrWeb",
"ur/web": "UrWeb",
"urweb": "UrWeb",
"vala": "Vala",
"vb.net": "Visual Basic",
"vbnet": "Visual Basic",
"vcl": "VCL",
"verilog": "Verilog",
"vhdl": "VHDL",
"vim": "Vim script",
"vim_script": "Vim script",
"viml": "Vim script",
"visual_basic": "Visual Basic",
"volt": "Volt",
"vue": "Vue",
"wasm": "WebAssembly",
"wast": "WebAssembly",
"wavefront_material": "Wavefront Material",
"wavefront_object": "Wavefront Object",
"web_ontology_language": "Web Ontology Language",
"webassembly": "WebAssembly",
"webidl": "WebIDL",
"winbatch": "Batchfile",
"wisp": "wisp",
"world_of_warcraft_addon_data": "World of Warcraft Addon Data",
"wsdl": "XML",
"x10": "X10",
"xbase": "xBase",
"xc": "XC",
"xcompose": "XCompose",
"xhtml": "HTML",
"xml": "XML",
"xml+genshi": "Genshi",
"xml+kid": "Genshi",
"xojo": "Xojo",
"xpages": "XPages",
"xproc": "XProc",
"xquery": "XQuery",
"xs": "XS",
"xsd": "XML",
"xsl": "XSLT",
"xslt": "XSLT",
"xten": "X10",
"xtend": "Xtend",
"yacc": "Yacc",
"yaml": "YAML",
"yang": "YANG",
"yml": "YAML",
"zephir": "Zephir",
"zimpl": "Zimpl",
"zsh": "Shell",
}

View File

@ -1,28 +0,0 @@
package slinguist
import . "gopkg.in/check.v1"
func (s *TSuite) TestGetLanguageByAlias(c *C) {
tests := []struct {
alias string
expectedLang string
expectedOk bool
}{
{alias: "BestLanguageEver", expectedLang: OtherLanguage, expectedOk: false},
{alias: "aspx-vb", expectedLang: "ASP", expectedOk: true},
{alias: "C++", expectedLang: "C++", expectedOk: true},
{alias: "c++", expectedLang: "C++", expectedOk: true},
{alias: "objc", expectedLang: "Objective-C", expectedOk: true},
{alias: "golang", expectedLang: "Go", expectedOk: true},
{alias: "GOLANG", expectedLang: "Go", expectedOk: true},
{alias: "bsdmake", expectedLang: "Makefile", expectedOk: true},
{alias: "xhTmL", expectedLang: "HTML", expectedOk: true},
{alias: "python", expectedLang: "Python", expectedOk: true},
}
for _, test := range tests {
lang, ok := GetLanguageByAlias(test.alias)
c.Assert(lang, Equals, test.expectedLang)
c.Assert(ok, Equals, test.expectedOk)
}
}

View File

@ -1,644 +0,0 @@
package slinguist
// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/simple-linguist.v1/internal/code-generator
// THIS FILE SHOULD NOT BE EDITED BY HAND
// Extracted from github/linguist commit: 60f864a138650dd17fafc94814be9ee2d3aaef8c
// languagesByAlias keeps alias for different languages and use the name of the languages as a alias too. All the
// keys (alias or not) are written in lower case and the whitespaces has been replaced by underscores.
var languagesByAlias = map[string]string{
"1c_enterprise": "1C Enterprise",
"abap": "ABAP",
"abl": "OpenEdge ABL",
"abnf": "ABNF",
"abuild": "Alpine Abuild",
"aconf": "ApacheConf",
"actionscript": "ActionScript",
"actionscript3": "ActionScript",
"actionscript_3": "ActionScript",
"ada": "Ada",
"ada2005": "Ada",
"ada95": "Ada",
"advpl": "xBase",
"afdko": "OpenType Feature File",
"agda": "Agda",
"ags": "AGS Script",
"ags_script": "AGS Script",
"ahk": "AutoHotkey",
"alloy": "Alloy",
"alpine_abuild": "Alpine Abuild",
"ampl": "AMPL",
"ant_build_system": "Ant Build System",
"antlr": "ANTLR",
"apache": "ApacheConf",
"apacheconf": "ApacheConf",
"apex": "Apex",
"api_blueprint": "API Blueprint",
"apkbuild": "Alpine Abuild",
"apl": "APL",
"apollo_guidance_computer": "Apollo Guidance Computer",
"applescript": "AppleScript",
"arc": "Arc",
"arduino": "Arduino",
"arexx": "REXX",
"as3": "ActionScript",
"asciidoc": "AsciiDoc",
"asn.1": "ASN.1",
"asp": "ASP",
"aspectj": "AspectJ",
"aspx": "ASP",
"aspx-vb": "ASP",
"assembly": "Assembly",
"ats": "ATS",
"ats2": "ATS",
"au3": "AutoIt",
"augeas": "Augeas",
"autoconf": "M4Sugar",
"autohotkey": "AutoHotkey",
"autoit": "AutoIt",
"autoit3": "AutoIt",
"autoitscript": "AutoIt",
"awk": "Awk",
"b3d": "BlitzBasic",
"bash": "Shell",
"bash_session": "ShellSession",
"bat": "Batchfile",
"batch": "Batchfile",
"batchfile": "Batchfile",
"befunge": "Befunge",
"bison": "Bison",
"bitbake": "BitBake",
"blade": "Blade",
"blitz3d": "BlitzBasic",
"blitzbasic": "BlitzBasic",
"blitzmax": "BlitzMax",
"blitzplus": "BlitzBasic",
"bluespec": "Bluespec",
"bmax": "BlitzMax",
"boo": "Boo",
"bplus": "BlitzBasic",
"brainfuck": "Brainfuck",
"brightscript": "Brightscript",
"bro": "Bro",
"bsdmake": "Makefile",
"byond": "DM",
"c": "C",
"c#": "C#",
"c++": "C++",
"c++-objdump": "Cpp-ObjDump",
"c-objdump": "C-ObjDump",
"c2hs": "C2hs Haskell",
"c2hs_haskell": "C2hs Haskell",
"cap'n_proto": "Cap'n Proto",
"carto": "CartoCSS",
"cartocss": "CartoCSS",
"ceylon": "Ceylon",
"cfc": "ColdFusion CFC",
"cfm": "ColdFusion",
"cfml": "ColdFusion",
"chapel": "Chapel",
"charity": "Charity",
"chpl": "Chapel",
"chuck": "ChucK",
"cirru": "Cirru",
"clarion": "Clarion",
"clean": "Clean",
"click": "Click",
"clipper": "xBase",
"clips": "CLIPS",
"clojure": "Clojure",
"closure_templates": "Closure Templates",
"cmake": "CMake",
"cobol": "COBOL",
"coffee": "CoffeeScript",
"coffee-script": "CoffeeScript",
"coffeescript": "CoffeeScript",
"coldfusion": "ColdFusion",
"coldfusion_cfc": "ColdFusion CFC",
"coldfusion_html": "ColdFusion",
"collada": "COLLADA",
"common_lisp": "Common Lisp",
"component_pascal": "Component Pascal",
"console": "ShellSession",
"cool": "Cool",
"coq": "Coq",
"cpp": "C++",
"cpp-objdump": "Cpp-ObjDump",
"creole": "Creole",
"crystal": "Crystal",
"csharp": "C#",
"cson": "CSON",
"csound": "Csound",
"csound-csd": "Csound Document",
"csound-orc": "Csound",
"csound-sco": "Csound Score",
"csound_document": "Csound Document",
"csound_score": "Csound Score",
"css": "CSS",
"csv": "CSV",
"cucumber": "Gherkin",
"cuda": "Cuda",
"cweb": "CWeb",
"cycript": "Cycript",
"cython": "Cython",
"d": "D",
"d-objdump": "D-ObjDump",
"darcs_patch": "Darcs Patch",
"dart": "Dart",
"dcl": "DIGITAL Command Language",
"delphi": "Component Pascal",
"desktop": "desktop",
"diff": "Diff",
"digital_command_language": "DIGITAL Command Language",
"django": "HTML+Django",
"dm": "DM",
"dns_zone": "DNS Zone",
"dockerfile": "Dockerfile",
"dogescript": "Dogescript",
"dosbatch": "Batchfile",
"dosini": "INI",
"dpatch": "Darcs Patch",
"dtrace": "DTrace",
"dtrace-script": "DTrace",
"dylan": "Dylan",
"e": "E",
"eagle": "Eagle",
"ebnf": "EBNF",
"ec": "eC",
"ecere_projects": "Ecere Projects",
"ecl": "ECL",
"eclipse": "ECLiPSe",
"ecr": "HTML+ECR",
"edn": "edn",
"eex": "HTML+EEX",
"eiffel": "Eiffel",
"ejs": "EJS",
"elisp": "Emacs Lisp",
"elixir": "Elixir",
"elm": "Elm",
"emacs": "Emacs Lisp",
"emacs_lisp": "Emacs Lisp",
"emberscript": "EmberScript",
"eq": "EQ",
"erb": "HTML+ERB",
"erlang": "Erlang",
"f#": "F#",
"factor": "Factor",
"fancy": "Fancy",
"fantom": "Fantom",
"filebench_wml": "Filebench WML",
"filterscript": "Filterscript",
"fish": "fish",
"flex": "Lex",
"flux": "FLUX",
"formatted": "Formatted",
"forth": "Forth",
"fortran": "Fortran",
"foxpro": "xBase",
"freemarker": "FreeMarker",
"frege": "Frege",
"fsharp": "F#",
"ftl": "FreeMarker",
"fundamental": "Text",
"g-code": "G-code",
"game_maker_language": "Game Maker Language",
"gams": "GAMS",
"gap": "GAP",
"gcc_machine_description": "GCC Machine Description",
"gdb": "GDB",
"gdscript": "GDScript",
"genie": "Genie",
"genshi": "Genshi",
"gentoo_ebuild": "Gentoo Ebuild",
"gentoo_eclass": "Gentoo Eclass",
"gettext_catalog": "Gettext Catalog",
"gf": "Grammatical Framework",
"gherkin": "Gherkin",
"glsl": "GLSL",
"glyph": "Glyph",
"gn": "GN",
"gnuplot": "Gnuplot",
"go": "Go",
"golang": "Go",
"golo": "Golo",
"gosu": "Gosu",
"grace": "Grace",
"gradle": "Gradle",
"grammatical_framework": "Grammatical Framework",
"graph_modeling_language": "Graph Modeling Language",
"graphql": "GraphQL",
"graphviz_(dot)": "Graphviz (DOT)",
"groovy": "Groovy",
"groovy_server_pages": "Groovy Server Pages",
"gsp": "Groovy Server Pages",
"hack": "Hack",
"haml": "Haml",
"handlebars": "Handlebars",
"harbour": "Harbour",
"haskell": "Haskell",
"haxe": "Haxe",
"hbs": "Handlebars",
"hcl": "HCL",
"hlsl": "HLSL",
"html": "HTML",
"html+django": "HTML+Django",
"html+django/jinja": "HTML+Django",
"html+ecr": "HTML+ECR",
"html+eex": "HTML+EEX",
"html+erb": "HTML+ERB",
"html+jinja": "HTML+Django",
"html+php": "HTML+PHP",
"html+ruby": "RHTML",
"htmlbars": "Handlebars",
"htmldjango": "HTML+Django",
"http": "HTTP",
"hy": "Hy",
"hylang": "Hy",
"hyphy": "HyPhy",
"i7": "Inform 7",
"idl": "IDL",
"idris": "Idris",
"igor": "IGOR Pro",
"igor_pro": "IGOR Pro",
"igorpro": "IGOR Pro",
"inc": "PHP",
"inform7": "Inform 7",
"inform_7": "Inform 7",
"ini": "INI",
"inno_setup": "Inno Setup",
"io": "Io",
"ioke": "Ioke",
"ipython_notebook": "Jupyter Notebook",
"irc": "IRC log",
"irc_log": "IRC log",
"irc_logs": "IRC log",
"isabelle": "Isabelle",
"isabelle_root": "Isabelle ROOT",
"j": "J",
"jasmin": "Jasmin",
"java": "Java",
"java_server_page": "Groovy Server Pages",
"java_server_pages": "Java Server Pages",
"javascript": "JavaScript",
"jflex": "JFlex",
"jison": "Jison",
"jison_lex": "Jison Lex",
"jolie": "Jolie",
"jruby": "Ruby",
"js": "JavaScript",
"json": "JSON",
"json5": "JSON5",
"jsoniq": "JSONiq",
"jsonld": "JSONLD",
"jsp": "Java Server Pages",
"jsx": "JSX",
"julia": "Julia",
"jupyter_notebook": "Jupyter Notebook",
"kicad": "KiCad",
"kit": "Kit",
"kotlin": "Kotlin",
"krl": "KRL",
"labview": "LabVIEW",
"lasso": "Lasso",
"lassoscript": "Lasso",
"latex": "TeX",
"latte": "Latte",
"lean": "Lean",
"less": "Less",
"lex": "Lex",
"lfe": "LFE",
"lhaskell": "Literate Haskell",
"lhs": "Literate Haskell",
"lilypond": "LilyPond",
"limbo": "Limbo",
"linker_script": "Linker Script",
"linux_kernel_module": "Linux Kernel Module",
"liquid": "Liquid",
"lisp": "Common Lisp",
"litcoffee": "Literate CoffeeScript",
"literate_agda": "Literate Agda",
"literate_coffeescript": "Literate CoffeeScript",
"literate_haskell": "Literate Haskell",
"live-script": "LiveScript",
"livescript": "LiveScript",
"llvm": "LLVM",
"logos": "Logos",
"logtalk": "Logtalk",
"lolcode": "LOLCODE",
"lookml": "LookML",
"loomscript": "LoomScript",
"ls": "LiveScript",
"lsl": "LSL",
"lua": "Lua",
"m": "M",
"m4": "M4",
"m4sugar": "M4Sugar",
"macruby": "Ruby",
"make": "Makefile",
"makefile": "Makefile",
"mako": "Mako",
"markdown": "Markdown",
"marko": "Marko",
"markojs": "Marko",
"mask": "Mask",
"mathematica": "Mathematica",
"matlab": "Matlab",
"maven_pom": "Maven POM",
"max": "Max",
"max/msp": "Max",
"maxmsp": "Max",
"maxscript": "MAXScript",
"mediawiki": "MediaWiki",
"mercury": "Mercury",
"meson": "Meson",
"metal": "Metal",
"mf": "Makefile",
"minid": "MiniD",
"mirah": "Mirah",
"mma": "Mathematica",
"modelica": "Modelica",
"modula-2": "Modula-2",
"module_management_system": "Module Management System",
"monkey": "Monkey",
"moocode": "Moocode",
"moonscript": "MoonScript",
"mql4": "MQL4",
"mql5": "MQL5",
"mtml": "MTML",
"muf": "MUF",
"mumps": "M",
"mupad": "mupad",
"myghty": "Myghty",
"nasm": "Assembly",
"ncl": "NCL",
"nemerle": "Nemerle",
"nesc": "nesC",
"netlinx": "NetLinx",
"netlinx+erb": "NetLinx+ERB",
"netlogo": "NetLogo",
"newlisp": "NewLisp",
"nginx": "Nginx",
"nginx_configuration_file": "Nginx",
"nim": "Nim",
"ninja": "Ninja",
"nit": "Nit",
"nix": "Nix",
"nixos": "Nix",
"njk": "HTML+Django",
"nl": "NL",
"node": "JavaScript",
"nroff": "Roff",
"nsis": "NSIS",
"nu": "Nu",
"numpy": "NumPy",
"nunjucks": "HTML+Django",
"nush": "Nu",
"nvim": "Vim script",
"obj-c": "Objective-C",
"obj-c++": "Objective-C++",
"obj-j": "Objective-J",
"objc": "Objective-C",
"objc++": "Objective-C++",
"objdump": "ObjDump",
"objective-c": "Objective-C",
"objective-c++": "Objective-C++",
"objective-j": "Objective-J",
"objectivec": "Objective-C",
"objectivec++": "Objective-C++",
"objectivej": "Objective-J",
"objectpascal": "Component Pascal",
"objj": "Objective-J",
"ocaml": "OCaml",
"octave": "Matlab",
"omgrofl": "Omgrofl",
"ooc": "ooc",
"opa": "Opa",
"opal": "Opal",
"opencl": "OpenCL",
"openedge": "OpenEdge ABL",
"openedge_abl": "OpenEdge ABL",
"openrc": "OpenRC runscript",
"openrc_runscript": "OpenRC runscript",
"openscad": "OpenSCAD",
"opentype_feature_file": "OpenType Feature File",
"org": "Org",
"osascript": "AppleScript",
"ox": "Ox",
"oxygene": "Oxygene",
"oz": "Oz",
"p4": "P4",
"pan": "Pan",
"pandoc": "Markdown",
"papyrus": "Papyrus",
"parrot": "Parrot",
"parrot_assembly": "Parrot Assembly",
"parrot_internal_representation": "Parrot Internal Representation",
"pascal": "Pascal",
"pasm": "Parrot Assembly",
"pawn": "PAWN",
"pep8": "Pep8",
"perl": "Perl",
"perl6": "Perl6",
"php": "PHP",
"pic": "Pic",
"pickle": "Pickle",
"picolisp": "PicoLisp",
"piglatin": "PigLatin",
"pike": "Pike",
"pir": "Parrot Internal Representation",
"plpgsql": "PLpgSQL",
"plsql": "PLSQL",
"pod": "Pod",
"pogoscript": "PogoScript",
"pony": "Pony",
"posh": "PowerShell",
"postscr": "PostScript",
"postscript": "PostScript",
"pot": "Gettext Catalog",
"pov-ray": "POV-Ray SDL",
"pov-ray_sdl": "POV-Ray SDL",
"povray": "POV-Ray SDL",
"powerbuilder": "PowerBuilder",
"powershell": "PowerShell",
"processing": "Processing",
"progress": "OpenEdge ABL",
"prolog": "Prolog",
"propeller_spin": "Propeller Spin",
"protobuf": "Protocol Buffer",
"protocol_buffer": "Protocol Buffer",
"protocol_buffers": "Protocol Buffer",
"public_key": "Public Key",
"pug": "Pug",
"puppet": "Puppet",
"pure_data": "Pure Data",
"purebasic": "PureBasic",
"purescript": "PureScript",
"pycon": "Python console",
"pyrex": "Cython",
"python": "Python",
"python_console": "Python console",
"python_traceback": "Python traceback",
"qmake": "QMake",
"qml": "QML",
"r": "R",
"racket": "Racket",
"ragel": "Ragel",
"ragel-rb": "Ragel",
"ragel-ruby": "Ragel",
"rake": "Ruby",
"raml": "RAML",
"rascal": "Rascal",
"raw": "Raw token data",
"raw_token_data": "Raw token data",
"rb": "Ruby",
"rbx": "Ruby",
"rdoc": "RDoc",
"realbasic": "REALbasic",
"reason": "Reason",
"rebol": "Rebol",
"red": "Red",
"red/system": "Red",
"redcode": "Redcode",
"regex": "Regular Expression",
"regexp": "Regular Expression",
"regular_expression": "Regular Expression",
"ren'py": "Ren'Py",
"renderscript": "RenderScript",
"renpy": "Ren'Py",
"restructuredtext": "reStructuredText",
"rexx": "REXX",
"rhtml": "RHTML",
"rmarkdown": "RMarkdown",
"robotframework": "RobotFramework",
"roff": "Roff",
"rouge": "Rouge",
"rpm_spec": "RPM Spec",
"rscript": "R",
"rss": "XML",
"rst": "reStructuredText",
"ruby": "Ruby",
"runoff": "RUNOFF",
"rust": "Rust",
"rusthon": "Python",
"sage": "Sage",
"salt": "SaltStack",
"saltstack": "SaltStack",
"saltstate": "SaltStack",
"sas": "SAS",
"sass": "Sass",
"scala": "Scala",
"scaml": "Scaml",
"scheme": "Scheme",
"scilab": "Scilab",
"scss": "SCSS",
"self": "Self",
"sh": "Shell",
"shaderlab": "ShaderLab",
"shell": "Shell",
"shell-script": "Shell",
"shellsession": "ShellSession",
"shen": "Shen",
"slash": "Slash",
"slim": "Slim",
"smali": "Smali",
"smalltalk": "Smalltalk",
"smarty": "Smarty",
"sml": "Standard ML",
"smt": "SMT",
"sourcemod": "SourcePawn",
"sourcepawn": "SourcePawn",
"sparql": "SPARQL",
"specfile": "RPM Spec",
"spline_font_database": "Spline Font Database",
"splus": "R",
"sqf": "SQF",
"sql": "SQL",
"sqlpl": "SQLPL",
"squeak": "Smalltalk",
"squirrel": "Squirrel",
"srecode_template": "SRecode Template",
"stan": "Stan",
"standard_ml": "Standard ML",
"stata": "Stata",
"ston": "STON",
"stylus": "Stylus",
"sublime_text_config": "Sublime Text Config",
"subrip_text": "SubRip Text",
"supercollider": "SuperCollider",
"svg": "SVG",
"swift": "Swift",
"systemverilog": "SystemVerilog",
"tcl": "Tcl",
"tcsh": "Tcsh",
"tea": "Tea",
"terra": "Terra",
"tex": "TeX",
"text": "Text",
"textile": "Textile",
"thrift": "Thrift",
"ti_program": "TI Program",
"tl": "Type Language",
"tla": "TLA",
"toml": "TOML",
"ts": "TypeScript",
"turing": "Turing",
"turtle": "Turtle",
"twig": "Twig",
"txl": "TXL",
"type_language": "Type Language",
"typescript": "TypeScript",
"udiff": "Diff",
"unified_parallel_c": "Unified Parallel C",
"unity3d_asset": "Unity3D Asset",
"unix_assembly": "Unix Assembly",
"uno": "Uno",
"unrealscript": "UnrealScript",
"ur": "UrWeb",
"ur/web": "UrWeb",
"urweb": "UrWeb",
"vala": "Vala",
"vb.net": "Visual Basic",
"vbnet": "Visual Basic",
"vcl": "VCL",
"verilog": "Verilog",
"vhdl": "VHDL",
"vim": "Vim script",
"vim_script": "Vim script",
"viml": "Vim script",
"visual_basic": "Visual Basic",
"volt": "Volt",
"vue": "Vue",
"wavefront_material": "Wavefront Material",
"wavefront_object": "Wavefront Object",
"web_ontology_language": "Web Ontology Language",
"webidl": "WebIDL",
"winbatch": "Batchfile",
"wisp": "wisp",
"world_of_warcraft_addon_data": "World of Warcraft Addon Data",
"wsdl": "XML",
"x10": "X10",
"xbase": "xBase",
"xc": "XC",
"xcompose": "XCompose",
"xhtml": "HTML",
"xml": "XML",
"xml+genshi": "Genshi",
"xml+kid": "Genshi",
"xojo": "Xojo",
"xpages": "XPages",
"xproc": "XProc",
"xquery": "XQuery",
"xs": "XS",
"xsd": "XML",
"xsl": "XSLT",
"xslt": "XSLT",
"xten": "X10",
"xtend": "Xtend",
"yacc": "Yacc",
"yaml": "YAML",
"yang": "YANG",
"yml": "YAML",
"zephir": "Zephir",
"zimpl": "Zimpl",
"zsh": "Shell",
}

View File

@ -6,38 +6,11 @@ import (
"gopkg.in/src-d/simple-linguist.v1/internal/tokenizer"
)
// GetLanguageByClassifier takes in a content and a list of candidates, and apply the classifier's Classify method to
// get the most probably language. If classifier is null then DefaultClassfier will be used.
func GetLanguageByClassifier(content []byte, candidates []string, classifier Classifier) string {
if classifier == nil {
classifier = DefaultClassifier
}
scores := classifier.Classify(content, candidates)
if len(scores) == 0 {
return OtherLanguage
}
return getLangugeHigherScore(scores)
}
func getLangugeHigherScore(scores map[string]float64) string {
var language string
higher := -math.MaxFloat64
for lang, score := range scores {
if higher < score {
language = lang
higher = score
}
}
return language
}
// Classifier is the interface that contains the method Classify which is in charge to assign scores to the possibles candidates.
// The scores must order the candidates so as the highest score be the most probably language of the content.
// The scores must order the candidates so as the highest score be the most probably language of the content. The candidates is
// a map which can be used to assign weights to languages dynamically.
type Classifier interface {
Classify(content []byte, candidates []string) map[string]float64
Classify(content []byte, candidates map[string]float64) map[string]float64
}
type classifier struct {
@ -46,36 +19,36 @@ type classifier struct {
tokensTotal float64
}
func (c *classifier) Classify(content []byte, candidates []string) map[string]float64 {
func (c *classifier) Classify(content []byte, candidates map[string]float64) map[string]float64 {
if len(content) == 0 {
return nil
}
var languages []string
var languages map[string]float64
if len(candidates) == 0 {
languages = c.knownLangs()
} else {
languages = make([]string, 0, len(candidates))
for _, candidate := range candidates {
languages = make(map[string]float64, len(candidates))
for candidate, weight := range candidates {
if lang, ok := GetLanguageByAlias(candidate); ok {
languages = append(languages, lang)
languages[lang] = weight
}
}
}
tokens := tokenizer.Tokenize(content)
scores := make(map[string]float64, len(languages))
for _, language := range languages {
for language := range languages {
scores[language] = c.tokensLogProbability(tokens, language) + c.languagesLogProbabilities[language]
}
return scores
}
func (c *classifier) knownLangs() []string {
langs := make([]string, 0, len(c.languagesLogProbabilities))
func (c *classifier) knownLangs() map[string]float64 {
langs := make(map[string]float64, len(c.languagesLogProbabilities))
for lang := range c.languagesLogProbabilities {
langs = append(langs, lang)
langs[lang]++
}
return langs

View File

@ -1,32 +0,0 @@
package slinguist
import (
"io/ioutil"
"path/filepath"
. "gopkg.in/check.v1"
)
func (s *TSuite) TestGetLanguageByClassifier(c *C) {
const samples = `.linguist/samples/`
test := []struct {
filename string
candidates []string
expectedLang string
}{
{filename: filepath.Join(samples, "C/blob.c"), candidates: []string{"python", "ruby", "c", "c++"}, expectedLang: "C"},
{filename: filepath.Join(samples, "C/blob.c"), candidates: nil, expectedLang: "C"},
{filename: filepath.Join(samples, "C/main.c"), candidates: nil, expectedLang: "C"},
{filename: filepath.Join(samples, "C/blob.c"), candidates: []string{"python", "ruby", "c++"}, expectedLang: "C++"},
{filename: filepath.Join(samples, "C/blob.c"), candidates: []string{"ruby"}, expectedLang: "Ruby"},
{filename: filepath.Join(samples, "Python/django-models-base.py"), candidates: []string{"python", "ruby", "c", "c++"}, expectedLang: "Python"},
{filename: filepath.Join(samples, "Python/django-models-base.py"), candidates: nil, expectedLang: "Python"},
}
for _, test := range test {
content, err := ioutil.ReadFile(test.filename)
c.Assert(err, Equals, nil)
lang := GetLanguageByClassifier(content, test.candidates, nil)
c.Assert(lang, Equals, test.expectedLang)
}
}

View File

@ -21,7 +21,7 @@ func main() {
}
errors := false
o := make(map[string][]string, 0)
out := make(map[string][]string, 0)
err = filepath.Walk(root, func(path string, f os.FileInfo, err error) error {
if err != nil {
errors = true
@ -29,7 +29,23 @@ func main() {
return filepath.SkipDir
}
if slinguist.IsVendor(f.Name()) || slinguist.IsDotFile(f.Name()) {
relativePath, err := filepath.Rel(root, path)
if err != nil {
errors = true
log.Println(err)
return nil
}
if relativePath == "." {
return nil
}
if f.IsDir() {
relativePath = relativePath + "/"
}
if slinguist.IsVendor(relativePath) || slinguist.IsDotFile(relativePath) ||
slinguist.IsDocumentation(relativePath) || slinguist.IsConfiguration(relativePath) {
if f.IsDir() {
return filepath.SkipDir
}
@ -48,16 +64,12 @@ func main() {
return nil
}
l := slinguist.GetLanguage(filepath.Base(path), content)
r, err := filepath.Rel(root, path)
if err != nil {
errors = true
log.Println(err)
language := slinguist.GetLanguage(filepath.Base(path), content)
if language == slinguist.OtherLanguage {
return nil
}
o[l] = append(o[l], r)
out[language] = append(out[language], relativePath)
return nil
})
@ -65,8 +77,8 @@ func main() {
log.Fatal(err)
}
js, _ := json.MarshalIndent(o, "", " ")
fmt.Printf("%s\n", js)
data, _ := json.MarshalIndent(out, "", " ")
fmt.Printf("%s\n", data)
if errors {
os.Exit(2)

210
common.go
View File

@ -1,71 +1,187 @@
package slinguist
const OtherLanguage = "Other"
var (
ExtensionsByLanguage map[string][]string
ignoredExtensions = map[string]bool{
".asc": true, ".cgi": true, ".fcgi": true, ".gml": true, ".fx": true,
".vhost": true,
}
auxiliaryLanguages = map[string]bool{
"Other": true, "XML": true, "YAML": true, "TOML": true, "INI": true,
"JSON": true, "TeX": true, "Public Key": true, "AsciiDoc": true,
"AGS Script": true, "VimL": true, "Diff": true, "CMake": true, "fish": true,
"Awk": true, "Graphviz (DOT)": true, "Markdown": true, "desktop": true,
"XSLT": true, "SQL": true, "RMarkdown": true, "IRC log": true,
"reStructuredText": true, "Twig": true, "CSS": true, "Batchfile": true,
"Text": true, "HTML+ERB": true, "HTML": true, "Gettext Catalog": true,
"Smarty": true, "Raw token data": true,
}
import (
"math"
"path/filepath"
"strings"
)
func init() {
for l, _ := range ignoredExtensions {
languagesByExtension[l] = []string{OtherLanguage}
// OtherLanguage is used as a zero value when a function can not return a specific language.
const OtherLanguage = "Other"
// Strategy type fix the signature for the functions that can be used as a strategy.
type Strategy func(filename string, content []byte) (languages []string)
var strategies = []Strategy{
GetLanguagesByModeline,
GetLanguagesByFilename,
GetLanguagesByShebang,
GetLanguagesByExtension,
GetLanguagesByContent,
}
ExtensionsByLanguage = reverseStringListMap(languagesByExtension)
}
// GetLanguageExtensions returns the different extensions being used by the
// language.
func GetLanguageExtensions(language string) []string {
return ExtensionsByLanguage[language]
}
// GetLanguage return the Language for a given filename and file content.
// GetLanguage applies a sequence of strategies based on the given filename and content
// to find out the most probably language to return.
func GetLanguage(filename string, content []byte) string {
if lang, safe := GetLanguageByModeline(content); safe {
candidates := map[string]float64{}
for _, strategy := range strategies {
languages := strategy(filename, content)
if len(languages) == 1 {
return languages[0]
}
if len(languages) > 0 {
for _, language := range languages {
candidates[language]++
}
}
}
if len(candidates) == 0 {
return OtherLanguage
}
lang := GetLanguageByClassifier(content, candidates, nil)
return lang
}
if lang, safe := GetLanguageByFilename(filename); safe {
return lang
// GetLanguageByModeline returns the language of the given content looking for the modeline,
// and safe to indicate the sureness of returned language.
func GetLanguageByModeline(content []byte) (lang string, safe bool) {
return getLangAndSafe("", content, GetLanguagesByModeline)
}
if lang, safe := GetLanguageByShebang(content); safe {
return lang
// GetLanguageByFilename returns a language based on the given filename, and safe to indicate
// the sureness of returned language.
func GetLanguageByFilename(filename string) (lang string, safe bool) {
return getLangAndSafe(filename, nil, GetLanguagesByFilename)
}
if lang, safe := GetLanguageByExtension(filename); safe {
return lang
// GetLanguagesByFilename returns a slice of possible languages for the given filename, content will be ignored.
// It accomplish the signature to be a Strategy type.
func GetLanguagesByFilename(filename string, content []byte) []string {
return languagesByFilename[filename]
}
if lang, safe := GetLanguageByContent(filename, content); safe {
return lang
// GetLanguageByShebang returns the language of the given content looking for the shebang line,
// and safe to indicate the sureness of returned language.
func GetLanguageByShebang(content []byte) (lang string, safe bool) {
return getLangAndSafe("", content, GetLanguagesByShebang)
}
lang := GetLanguageByClassifier(content, nil, nil)
return lang
// GetLanguageByExtension returns a language based on the given filename, and safe to indicate
// the sureness of returned language.
func GetLanguageByExtension(filename string) (lang string, safe bool) {
return getLangAndSafe(filename, nil, GetLanguagesByExtension)
}
func reverseStringListMap(i map[string][]string) (o map[string][]string) {
o = map[string][]string{}
for key, set := range i {
for _, value := range set {
o[value] = append(o[value], key)
// GetLanguagesByExtension returns a slice of possible languages for the given filename, content will be ignored.
// It accomplish the signature to be a Strategy type.
func GetLanguagesByExtension(filename string, content []byte) []string {
ext := strings.ToLower(filepath.Ext(filename))
return languagesByExtension[ext]
}
// GetLanguageByContent returns a language based on the filename and heuristics applies to the content,
// and safe to indicate the sureness of returned language.
func GetLanguageByContent(filename string, content []byte) (lang string, safe bool) {
return getLangAndSafe(filename, content, GetLanguagesByContent)
}
// GetLanguagesByContent returns a slice of possible languages for the given content, filename will be ignored.
// It accomplish the signature to be a Strategy type.
func GetLanguagesByContent(filename string, content []byte) []string {
ext := strings.ToLower(filepath.Ext(filename))
fnMatcher, ok := contentMatchers[ext]
if !ok {
return nil
}
return fnMatcher(content)
}
func getLangAndSafe(filename string, content []byte, getLanguageByStrategy Strategy) (lang string, safe bool) {
languages := getLanguageByStrategy(filename, content)
if len(languages) == 0 {
lang = OtherLanguage
return
}
lang = languages[0]
safe = len(languages) == 1
return
}
// GetLanguageByClassifier takes in a content and a list of candidates, and apply the classifier's Classify method to
// get the most probably language. If classifier is null then DefaultClassfier will be used. If there aren't candidates
// OtherLanguage is returned.
func GetLanguageByClassifier(content []byte, candidates map[string]float64, classifier Classifier) string {
scores := GetLanguagesByClassifier(content, candidates, classifier)
if len(scores) == 0 {
return OtherLanguage
}
return getLangugeHigherScore(scores)
}
func getLangugeHigherScore(scores map[string]float64) string {
var language string
higher := -math.MaxFloat64
for lang, score := range scores {
if higher < score {
language = lang
higher = score
}
}
return language
}
// GetLanguagesByClassifier returns a map of possible languages as keys and a score as value based on content and candidates. The values can be ordered
// with the highest value as the most probably language. If classifier is null then DefaultClassfier will be used.
func GetLanguagesByClassifier(content []byte, candidates map[string]float64, classifier Classifier) map[string]float64 {
if classifier == nil {
classifier = DefaultClassifier
}
return classifier.Classify(content, candidates)
}
// GetLanguageExtensions returns the different extensions being used by the language.
func GetLanguageExtensions(language string) []string {
return extensionsByLanguage[language]
}
// Type represent language's type. Either data, programming, markup, prose, or unknown.
type Type int
// Type's values.
const (
Unknown Type = iota
Data
Programming
Markup
Prose
)
// GetLanguageType returns the given language's type.
func GetLanguageType(language string) (langType Type) {
langType, ok := languagesType[language]
if !ok {
langType = Unknown
}
return langType
}
// GetLanguageByAlias returns either the language related to the given alias and ok set to true
// or Otherlanguage and ok set to false if the alias is not recognized.
func GetLanguageByAlias(alias string) (lang string, ok bool) {
a := strings.Split(alias, `,`)[0]
a = strings.ToLower(a)
lang, ok = languagesByAlias[a]
if !ok {
lang = OtherLanguage
}
return

View File

@ -1,19 +1,351 @@
package slinguist
import (
"fmt"
"io/ioutil"
"os"
"path/filepath"
"testing"
"text/tabwriter"
. "gopkg.in/check.v1"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/suite"
)
func Test(t *testing.T) { TestingT(t) }
type TSuite struct{}
var _ = Suite(&TSuite{})
func (s *TSuite) TestGetLanguage(c *C) {
c.Assert(GetLanguage("foo.py", []byte{}), Equals, "Python")
c.Assert(GetLanguage("foo.m", []byte(":- module")), Equals, "Mercury")
c.Assert(GetLanguage("foo.m", []byte{}), Equals, "Other")
type SimpleLinguistTestSuite struct {
suite.Suite
}
func TestSimpleLinguistTestSuite(t *testing.T) {
suite.Run(t, new(SimpleLinguistTestSuite))
}
func (s *SimpleLinguistTestSuite) TestGetLanguage() {
tests := []struct {
name string
filename string
content []byte
expected string
}{
{name: "TestGetLanguage_1", filename: "foo.py", content: []byte{}, expected: "Python"},
{name: "TestGetLanguage_2", filename: "foo.m", content: []byte(":- module"), expected: "Mercury"},
{name: "TestGetLanguage_3", filename: "foo.m", content: nil, expected: OtherLanguage},
}
for _, test := range tests {
language := GetLanguage(test.filename, test.content)
assert.Equal(s.T(), test.expected, language, fmt.Sprintf("%v: %v, expected: %v", test.name, language, test.expected))
}
}
func (s *SimpleLinguistTestSuite) TestGetLanguageByModelineLinguist() {
const (
modelinesDir = ".linguist/test/fixtures/Data/Modelines"
samplesDir = ".linguist/samples"
)
tests := []struct {
name string
filename string
expectedLang string
expectedSafe bool
}{
// Emacs
{name: "TestGetLanguageByModelineLinguist_1", filename: filepath.Join(modelinesDir, "example_smalltalk.md"), expectedLang: "Smalltalk", expectedSafe: true},
{name: "TestGetLanguageByModelineLinguist_2", filename: filepath.Join(modelinesDir, "fundamentalEmacs.c"), expectedLang: "Text", expectedSafe: true},
{name: "TestGetLanguageByModelineLinguist_3", filename: filepath.Join(modelinesDir, "iamphp.inc"), expectedLang: "PHP", expectedSafe: true},
{name: "TestGetLanguageByModelineLinguist_4", filename: filepath.Join(modelinesDir, "seeplusplusEmacs1"), expectedLang: "C++", expectedSafe: true},
{name: "TestGetLanguageByModelineLinguist_5", filename: filepath.Join(modelinesDir, "seeplusplusEmacs2"), expectedLang: "C++", expectedSafe: true},
{name: "TestGetLanguageByModelineLinguist_6", filename: filepath.Join(modelinesDir, "seeplusplusEmacs3"), expectedLang: "C++", expectedSafe: true},
{name: "TestGetLanguageByModelineLinguist_7", filename: filepath.Join(modelinesDir, "seeplusplusEmacs4"), expectedLang: "C++", expectedSafe: true},
{name: "TestGetLanguageByModelineLinguist_8", filename: filepath.Join(modelinesDir, "seeplusplusEmacs5"), expectedLang: "C++", expectedSafe: true},
{name: "TestGetLanguageByModelineLinguist_9", filename: filepath.Join(modelinesDir, "seeplusplusEmacs6"), expectedLang: "C++", expectedSafe: true},
{name: "TestGetLanguageByModelineLinguist_10", filename: filepath.Join(modelinesDir, "seeplusplusEmacs7"), expectedLang: "C++", expectedSafe: true},
{name: "TestGetLanguageByModelineLinguist_11", filename: filepath.Join(modelinesDir, "seeplusplusEmacs9"), expectedLang: "C++", expectedSafe: true},
{name: "TestGetLanguageByModelineLinguist_12", filename: filepath.Join(modelinesDir, "seeplusplusEmacs10"), expectedLang: "C++", expectedSafe: true},
{name: "TestGetLanguageByModelineLinguist_13", filename: filepath.Join(modelinesDir, "seeplusplusEmacs11"), expectedLang: "C++", expectedSafe: true},
{name: "TestGetLanguageByModelineLinguist_14", filename: filepath.Join(modelinesDir, "seeplusplusEmacs12"), expectedLang: "C++", expectedSafe: true},
// Vim
{name: "TestGetLanguageByModelineLinguist_15", filename: filepath.Join(modelinesDir, "seeplusplus"), expectedLang: "C++", expectedSafe: true},
{name: "TestGetLanguageByModelineLinguist_16", filename: filepath.Join(modelinesDir, "iamjs.pl"), expectedLang: "JavaScript", expectedSafe: true},
{name: "TestGetLanguageByModelineLinguist_17", filename: filepath.Join(modelinesDir, "iamjs2.pl"), expectedLang: "JavaScript", expectedSafe: true},
{name: "TestGetLanguageByModelineLinguist_18", filename: filepath.Join(modelinesDir, "not_perl.pl"), expectedLang: "Prolog", expectedSafe: true},
{name: "TestGetLanguageByModelineLinguist_19", filename: filepath.Join(modelinesDir, "ruby"), expectedLang: "Ruby", expectedSafe: true},
{name: "TestGetLanguageByModelineLinguist_20", filename: filepath.Join(modelinesDir, "ruby2"), expectedLang: "Ruby", expectedSafe: true},
{name: "TestGetLanguageByModelineLinguist_21", filename: filepath.Join(modelinesDir, "ruby3"), expectedLang: "Ruby", expectedSafe: true},
{name: "TestGetLanguageByModelineLinguist_22", filename: filepath.Join(modelinesDir, "ruby4"), expectedLang: "Ruby", expectedSafe: true},
{name: "TestGetLanguageByModelineLinguist_23", filename: filepath.Join(modelinesDir, "ruby5"), expectedLang: "Ruby", expectedSafe: true},
{name: "TestGetLanguageByModelineLinguist_24", filename: filepath.Join(modelinesDir, "ruby6"), expectedLang: "Ruby", expectedSafe: true},
{name: "TestGetLanguageByModelineLinguist_25", filename: filepath.Join(modelinesDir, "ruby7"), expectedLang: "Ruby", expectedSafe: true},
{name: "TestGetLanguageByModelineLinguist_26", filename: filepath.Join(modelinesDir, "ruby8"), expectedLang: "Ruby", expectedSafe: true},
{name: "TestGetLanguageByModelineLinguist_27", filename: filepath.Join(modelinesDir, "ruby9"), expectedLang: "Ruby", expectedSafe: true},
{name: "TestGetLanguageByModelineLinguist_28", filename: filepath.Join(modelinesDir, "ruby10"), expectedLang: "Ruby", expectedSafe: true},
{name: "TestGetLanguageByModelineLinguist_29", filename: filepath.Join(modelinesDir, "ruby11"), expectedLang: "Ruby", expectedSafe: true},
{name: "TestGetLanguageByModelineLinguist_30", filename: filepath.Join(modelinesDir, "ruby12"), expectedLang: "Ruby", expectedSafe: true},
{name: "TestGetLanguageByModelineLinguist_31", filename: filepath.Join(samplesDir, "C/main.c"), expectedLang: OtherLanguage, expectedSafe: false},
}
for _, test := range tests {
content, err := ioutil.ReadFile(test.filename)
assert.NoError(s.T(), err)
lang, safe := GetLanguageByModeline(content)
assert.Equal(s.T(), test.expectedLang, lang, fmt.Sprintf("%v: lang = %v, expected: %v", test.name, lang, test.expectedLang))
assert.Equal(s.T(), test.expectedSafe, safe, fmt.Sprintf("%v: safe = %v, expected: %v", test.name, safe, test.expectedSafe))
}
}
func (s *SimpleLinguistTestSuite) TestGetLanguageByModeline() {
const (
wrongVim = `# vim: set syntax=ruby ft =python filetype=perl :`
rightVim = `/* vim: set syntax=python ft =python filetype=python */`
noLangVim = `/* vim: set shiftwidth=4 softtabstop=0 cindent cinoptions={1s: */`
)
tests := []struct {
name string
content []byte
expectedLang string
expectedSafe bool
}{
{name: "TestGetLanguageByModeline_1", content: []byte(wrongVim), expectedLang: OtherLanguage, expectedSafe: false},
{name: "TestGetLanguageByModeline_2", content: []byte(rightVim), expectedLang: "Python", expectedSafe: true},
{name: "TestGetLanguageByModeline_3", content: []byte(noLangVim), expectedLang: OtherLanguage, expectedSafe: false},
}
for _, test := range tests {
lang, safe := GetLanguageByModeline(test.content)
assert.Equal(s.T(), test.expectedLang, lang, fmt.Sprintf("%v: lang = %v, expected: %v", test.name, lang, test.expectedLang))
assert.Equal(s.T(), test.expectedSafe, safe, fmt.Sprintf("%v: safe = %v, expected: %v", test.name, safe, test.expectedSafe))
}
}
func (s *SimpleLinguistTestSuite) TestGetLanguageByFilename() {
tests := []struct {
name string
filename string
expectedLang string
expectedSafe bool
}{
{name: "TestGetLanguageByFilename_1", filename: "unknown.interpreter", expectedLang: OtherLanguage, expectedSafe: false},
{name: "TestGetLanguageByFilename_2", filename: ".bashrc", expectedLang: "Shell", expectedSafe: true},
{name: "TestGetLanguageByFilename_3", filename: "Dockerfile", expectedLang: "Dockerfile", expectedSafe: true},
{name: "TestGetLanguageByFilename_4", filename: "Makefile.frag", expectedLang: "Makefile", expectedSafe: true},
{name: "TestGetLanguageByFilename_5", filename: "makefile", expectedLang: "Makefile", expectedSafe: true},
{name: "TestGetLanguageByFilename_6", filename: "Vagrantfile", expectedLang: "Ruby", expectedSafe: true},
{name: "TestGetLanguageByFilename_7", filename: "_vimrc", expectedLang: "Vim script", expectedSafe: true},
{name: "TestGetLanguageByFilename_8", filename: "pom.xml", expectedLang: "Maven POM", expectedSafe: true},
}
for _, test := range tests {
lang, safe := GetLanguageByFilename(test.filename)
assert.Equal(s.T(), test.expectedLang, lang, fmt.Sprintf("%v: lang = %v, expected: %v", test.name, lang, test.expectedLang))
assert.Equal(s.T(), test.expectedSafe, safe, fmt.Sprintf("%v: safe = %v, expected: %v", test.name, safe, test.expectedSafe))
}
}
func (s *SimpleLinguistTestSuite) TestGetLanguageByShebang() {
const (
multilineExecHack = `#!/bin/sh
# Next line is comment in Tcl, but not in sh... \
exec tclsh "$0" ${1+"$@"}`
multilineNoExecHack = `#!/bin/sh
#<<<#
echo "A shell script in a zkl program ($0)"
echo "Now run zkl <this file> with Hello World as args"
zkl $0 Hello World!
exit
#<<<#
println("The shell script says ",vm.arglist.concat(" "));`
)
tests := []struct {
name string
content []byte
expectedLang string
expectedSafe bool
}{
{name: "TestGetLanguageByShebang_1", content: []byte(`#!/unknown/interpreter`), expectedLang: OtherLanguage, expectedSafe: false},
{name: "TestGetLanguageByShebang_2", content: []byte(`no shebang`), expectedLang: OtherLanguage, expectedSafe: false},
{name: "TestGetLanguageByShebang_3", content: []byte(`#!/usr/bin/env`), expectedLang: OtherLanguage, expectedSafe: false},
{name: "TestGetLanguageByShebang_4", content: []byte(`#!/usr/bin/python -tt`), expectedLang: "Python", expectedSafe: true},
{name: "TestGetLanguageByShebang_5", content: []byte(`#!/usr/bin/env python2.6`), expectedLang: "Python", expectedSafe: true},
{name: "TestGetLanguageByShebang_6", content: []byte(`#!/usr/bin/env perl`), expectedLang: "Perl", expectedSafe: true},
{name: "TestGetLanguageByShebang_7", content: []byte(`#! /bin/sh`), expectedLang: "Shell", expectedSafe: true},
{name: "TestGetLanguageByShebang_8", content: []byte(`#!bash`), expectedLang: "Shell", expectedSafe: true},
{name: "TestGetLanguageByShebang_9", content: []byte(multilineExecHack), expectedLang: "Tcl", expectedSafe: true},
{name: "TestGetLanguageByShebang_10", content: []byte(multilineNoExecHack), expectedLang: "Shell", expectedSafe: true},
}
for _, test := range tests {
lang, safe := GetLanguageByShebang(test.content)
assert.Equal(s.T(), test.expectedLang, lang, fmt.Sprintf("%v: lang = %v, expected: %v", test.name, lang, test.expectedLang))
assert.Equal(s.T(), test.expectedSafe, safe, fmt.Sprintf("%v: safe = %v, expected: %v", test.name, safe, test.expectedSafe))
}
}
func (s *SimpleLinguistTestSuite) TestGetLanguageByExtension() {
tests := []struct {
name string
filename string
expectedLang string
expectedSafe bool
}{
{name: "TestGetLanguageByExtension_1", filename: "foo.foo", expectedLang: OtherLanguage, expectedSafe: false},
{name: "TestGetLanguageByExtension_2", filename: "foo.go", expectedLang: "Go", expectedSafe: true},
{name: "TestGetLanguageByExtension_3", filename: "foo.go.php", expectedLang: "Hack", expectedSafe: false},
}
for _, test := range tests {
lang, safe := GetLanguageByExtension(test.filename)
assert.Equal(s.T(), test.expectedLang, lang, fmt.Sprintf("%v: lang = %v, expected: %v", test.name, lang, test.expectedLang))
assert.Equal(s.T(), test.expectedSafe, safe, fmt.Sprintf("%v: safe = %v, expected: %v", test.name, safe, test.expectedSafe))
}
}
func (s *SimpleLinguistTestSuite) TestGetLanguageByContentLinguistCorpus() {
var total, failed, ok, other, unsafe int
w := new(tabwriter.Writer)
w.Init(os.Stdout, 0, 8, 0, '\t', 0)
filepath.Walk(".linguist/samples", func(path string, f os.FileInfo, err error) error {
if f.IsDir() {
if f.Name() == "filenames" {
return filepath.SkipDir
}
return nil
}
expected := filepath.Base(filepath.Dir(path))
filename := filepath.Base(path)
extension := filepath.Ext(path)
content, _ := ioutil.ReadFile(path)
if extension == "" {
return nil
}
total++
obtained, safe := GetLanguageByContent(filename, content)
if obtained == OtherLanguage {
other++
}
var status string
if expected == obtained {
status = "ok"
ok++
} else {
status = "failed"
failed++
if !safe {
unsafe++
}
}
fmt.Fprintf(w, "%s\t%s\t%s\t%v\t%s\n", filename, expected, obtained, safe, status)
return nil
})
fmt.Fprintln(w)
w.Flush()
fmt.Printf("total files: %d, ok: %d, failed: %d, unsafe: %d, other: %d\n", total, ok, failed, unsafe, other)
}
func (s *SimpleLinguistTestSuite) TestGetLanguageByClassifier() {
const samples = `.linguist/samples/`
test := []struct {
name string
filename string
candidates map[string]float64
expected string
}{
{name: "TestGetLanguageByClassifier_1", filename: filepath.Join(samples, "C/blob.c"), candidates: map[string]float64{"python": 1.00, "ruby": 1.00, "c": 1.00, "c++": 1.00}, expected: "C"},
{name: "TestGetLanguageByClassifier_2", filename: filepath.Join(samples, "C/blob.c"), candidates: nil, expected: "C"},
{name: "TestGetLanguageByClassifier_3", filename: filepath.Join(samples, "C/main.c"), candidates: nil, expected: "C"},
{name: "TestGetLanguageByClassifier_4", filename: filepath.Join(samples, "C/blob.c"), candidates: map[string]float64{"python": 1.00, "ruby": 1.00, "c++": 1.00}, expected: "C++"},
{name: "TestGetLanguageByClassifier_5", filename: filepath.Join(samples, "C/blob.c"), candidates: map[string]float64{"ruby": 1.00}, expected: "Ruby"},
{name: "TestGetLanguageByClassifier_6", filename: filepath.Join(samples, "Python/django-models-base.py"), candidates: map[string]float64{"python": 1.00, "ruby": 1.00, "c": 1.00, "c++": 1.00}, expected: "Python"},
{name: "TestGetLanguageByClassifier_7", filename: filepath.Join(samples, "Python/django-models-base.py"), candidates: nil, expected: "Python"},
}
for _, test := range test {
content, err := ioutil.ReadFile(test.filename)
assert.NoError(s.T(), err)
lang := GetLanguageByClassifier(content, test.candidates, nil)
assert.Equal(s.T(), test.expected, lang, fmt.Sprintf("%v: lang = %v, expected: %v", test.name, lang, test.expected))
}
}
func (s *SimpleLinguistTestSuite) TestGetLanguageExtensions() {
tests := []struct {
name string
language string
expected []string
}{
{name: "TestGetLanguageExtensions_1", language: "foo", expected: nil},
{name: "TestGetLanguageExtensions_2", language: "COBOL", expected: []string{".cob", ".cbl", ".ccp", ".cobol", ".cpy"}},
{name: "TestGetLanguageExtensions_3", language: "Maven POM", expected: nil},
}
for _, test := range tests {
extensions := GetLanguageExtensions(test.language)
assert.EqualValues(s.T(), test.expected, extensions, fmt.Sprintf("%v: extensions = %v, expected: %v", test.name, extensions, test.expected))
}
}
func (s *SimpleLinguistTestSuite) TestGetLanguageType() {
tests := []struct {
name string
language string
expected Type
}{
{name: "TestGetLanguageType_1", language: "BestLanguageEver", expected: Unknown},
{name: "TestGetLanguageType_2", language: "JSON", expected: Data},
{name: "TestGetLanguageType_3", language: "COLLADA", expected: Data},
{name: "TestGetLanguageType_4", language: "Go", expected: Programming},
{name: "TestGetLanguageType_5", language: "Brainfuck", expected: Programming},
{name: "TestGetLanguageType_6", language: "HTML", expected: Markup},
{name: "TestGetLanguageType_7", language: "Sass", expected: Markup},
{name: "TestGetLanguageType_8", language: "AsciiDoc", expected: Prose},
{name: "TestGetLanguageType_9", language: "Textile", expected: Prose},
}
for _, test := range tests {
langType := GetLanguageType(test.language)
assert.Equal(s.T(), test.expected, langType, fmt.Sprintf("%v: langType = %v, expected: %v", test.name, langType, test.expected))
}
}
func (s *SimpleLinguistTestSuite) TestGetLanguageByAlias() {
tests := []struct {
name string
alias string
expectedLang string
expectedOk bool
}{
{name: "TestGetLanguageByAlias_1", alias: "BestLanguageEver", expectedLang: OtherLanguage, expectedOk: false},
{name: "TestGetLanguageByAlias_2", alias: "aspx-vb", expectedLang: "ASP", expectedOk: true},
{name: "TestGetLanguageByAlias_3", alias: "C++", expectedLang: "C++", expectedOk: true},
{name: "TestGetLanguageByAlias_4", alias: "c++", expectedLang: "C++", expectedOk: true},
{name: "TestGetLanguageByAlias_5", alias: "objc", expectedLang: "Objective-C", expectedOk: true},
{name: "TestGetLanguageByAlias_6", alias: "golang", expectedLang: "Go", expectedOk: true},
{name: "TestGetLanguageByAlias_7", alias: "GOLANG", expectedLang: "Go", expectedOk: true},
{name: "TestGetLanguageByAlias_8", alias: "bsdmake", expectedLang: "Makefile", expectedOk: true},
{name: "TestGetLanguageByAlias_9", alias: "xhTmL", expectedLang: "HTML", expectedOk: true},
{name: "TestGetLanguageByAlias_10", alias: "python", expectedLang: "Python", expectedOk: true},
}
for _, test := range tests {
lang, ok := GetLanguageByAlias(test.alias)
assert.Equal(s.T(), test.expectedLang, lang, fmt.Sprintf("%v: lang = %v, expected: %v", test.name, lang, test.expectedLang))
assert.Equal(s.T(), test.expectedOk, ok, fmt.Sprintf("%v: ok = %v, expected: %v", test.name, ok, test.expectedOk))
}
}

View File

@ -2,460 +2,447 @@ package slinguist
// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/simple-linguist.v1/internal/code-generator
// THIS FILE SHOULD NOT BE EDITED BY HAND
// Extracted from github/linguist commit: 60f864a138650dd17fafc94814be9ee2d3aaef8c
// Extracted from github/linguist commit: b6460f8ed6b249281ada099ca28bd8f1230b8892
import (
"path/filepath"
"regexp"
"strings"
)
func GetLanguageByContent(filename string, content []byte) (lang string, safe bool) {
ext := strings.ToLower(filepath.Ext(filename))
if fnMatcher, ok := matchers[ext]; ok {
lang, safe = fnMatcher(content)
} else {
lang = OtherLanguage
}
type languageMatcher func([]byte) []string
return
}
type languageMatcher func([]byte) (string, bool)
var matchers = map[string]languageMatcher{
".asc": func(i []byte) (string, bool) {
var contentMatchers = map[string]languageMatcher{
".asc": func(i []byte) []string {
if asc_PublicKey_Matcher_0.Match(i) {
return "Public Key", true
return []string{"Public Key"}
} else if asc_AsciiDoc_Matcher_0.Match(i) {
return "AsciiDoc", true
return []string{"AsciiDoc"}
} else if asc_AGSScript_Matcher_0.Match(i) {
return "AGS Script", true
return []string{"AGS Script"}
}
return OtherLanguage, false
return nil
},
".bb": func(i []byte) (string, bool) {
".bb": func(i []byte) []string {
if bb_BlitzBasic_Matcher_0.Match(i) || bb_BlitzBasic_Matcher_1.Match(i) {
return "BlitzBasic", true
return []string{"BlitzBasic"}
} else if bb_BitBake_Matcher_0.Match(i) {
return "BitBake", true
return []string{"BitBake"}
}
return OtherLanguage, false
return nil
},
".builds": func(i []byte) (string, bool) {
".builds": func(i []byte) []string {
if builds_XML_Matcher_0.Match(i) {
return "XML", true
return []string{"XML"}
}
return "Text", true
return []string{"Text"}
},
".ch": func(i []byte) (string, bool) {
".ch": func(i []byte) []string {
if ch_xBase_Matcher_0.Match(i) {
return "xBase", true
return []string{"xBase"}
}
return OtherLanguage, false
return nil
},
".cl": func(i []byte) (string, bool) {
".cl": func(i []byte) []string {
if cl_CommonLisp_Matcher_0.Match(i) {
return "Common Lisp", true
return []string{"Common Lisp"}
} else if cl_Cool_Matcher_0.Match(i) {
return "Cool", true
return []string{"Cool"}
} else if cl_OpenCL_Matcher_0.Match(i) {
return "OpenCL", true
return []string{"OpenCL"}
}
return OtherLanguage, false
return nil
},
".cls": func(i []byte) (string, bool) {
".cls": func(i []byte) []string {
if cls_TeX_Matcher_0.Match(i) {
return "TeX", true
return []string{"TeX"}
}
return OtherLanguage, false
return nil
},
".cs": func(i []byte) (string, bool) {
".cs": func(i []byte) []string {
if cs_Smalltalk_Matcher_0.Match(i) {
return "Smalltalk", true
return []string{"Smalltalk"}
} else if cs_CSharp_Matcher_0.Match(i) || cs_CSharp_Matcher_1.Match(i) {
return "C#", true
return []string{"C#"}
}
return OtherLanguage, false
return nil
},
".d": func(i []byte) (string, bool) {
".d": func(i []byte) []string {
if d_D_Matcher_0.Match(i) {
return "D", true
return []string{"D"}
} else if d_DTrace_Matcher_0.Match(i) {
return "DTrace", true
return []string{"DTrace"}
} else if d_Makefile_Matcher_0.Match(i) {
return "Makefile", true
return []string{"Makefile"}
}
return OtherLanguage, false
return nil
},
".ecl": func(i []byte) (string, bool) {
".ecl": func(i []byte) []string {
if ecl_ECLiPSe_Matcher_0.Match(i) {
return "ECLiPSe", true
return []string{"ECLiPSe"}
} else if ecl_ECL_Matcher_0.Match(i) {
return "ECL", true
return []string{"ECL"}
}
return OtherLanguage, false
return nil
},
".es": func(i []byte) (string, bool) {
".es": func(i []byte) []string {
if es_Erlang_Matcher_0.Match(i) {
return "Erlang", true
return []string{"Erlang"}
}
return OtherLanguage, false
return nil
},
".f": func(i []byte) (string, bool) {
".f": func(i []byte) []string {
if f_Forth_Matcher_0.Match(i) {
return "Forth", true
return []string{"Forth"}
} else if f_FilebenchWML_Matcher_0.Match(i) {
return "Filebench WML", true
return []string{"Filebench WML"}
} else if f_Fortran_Matcher_0.Match(i) {
return "Fortran", true
return []string{"Fortran"}
}
return OtherLanguage, false
return nil
},
".for": func(i []byte) (string, bool) {
".for": func(i []byte) []string {
if for_Forth_Matcher_0.Match(i) {
return "Forth", true
return []string{"Forth"}
} else if for_Fortran_Matcher_0.Match(i) {
return "Fortran", true
return []string{"Fortran"}
}
return OtherLanguage, false
return nil
},
".fr": func(i []byte) (string, bool) {
".fr": func(i []byte) []string {
if fr_Forth_Matcher_0.Match(i) {
return "Forth", true
return []string{"Forth"}
} else if fr_Frege_Matcher_0.Match(i) {
return "Frege", true
return []string{"Frege"}
}
return "Text", true
return []string{"Text"}
},
".fs": func(i []byte) (string, bool) {
".fs": func(i []byte) []string {
if fs_Forth_Matcher_0.Match(i) {
return "Forth", true
return []string{"Forth"}
} else if fs_FSharp_Matcher_0.Match(i) {
return "F#", true
return []string{"F#"}
} else if fs_GLSL_Matcher_0.Match(i) {
return "GLSL", true
return []string{"GLSL"}
} else if fs_Filterscript_Matcher_0.Match(i) {
return "Filterscript", true
return []string{"Filterscript"}
}
return OtherLanguage, false
return nil
},
".gs": func(i []byte) (string, bool) {
".gs": func(i []byte) []string {
if gs_Gosu_Matcher_0.Match(i) {
return "Gosu", true
return []string{"Gosu"}
}
return OtherLanguage, false
return nil
},
".h": func(i []byte) (string, bool) {
".h": func(i []byte) []string {
if h_ObjectiveDashC_Matcher_0.Match(i) {
return "Objective-C", true
return []string{"Objective-C"}
} else if h_CPlusPlus_Matcher_0.Match(i) || h_CPlusPlus_Matcher_1.Match(i) || h_CPlusPlus_Matcher_2.Match(i) || h_CPlusPlus_Matcher_3.Match(i) || h_CPlusPlus_Matcher_4.Match(i) || h_CPlusPlus_Matcher_5.Match(i) || h_CPlusPlus_Matcher_6.Match(i) {
return "C++", true
return []string{"C++"}
}
return OtherLanguage, false
return nil
},
".inc": func(i []byte) (string, bool) {
".inc": func(i []byte) []string {
if inc_PHP_Matcher_0.Match(i) {
return "PHP", true
return []string{"PHP"}
} else if inc_POVDashRaySDL_Matcher_0.Match(i) {
return "POV-Ray SDL", true
return []string{"POV-Ray SDL"}
}
return OtherLanguage, false
return nil
},
".l": func(i []byte) (string, bool) {
".l": func(i []byte) []string {
if l_CommonLisp_Matcher_0.Match(i) {
return "Common Lisp", true
return []string{"Common Lisp"}
} else if l_Lex_Matcher_0.Match(i) {
return "Lex", true
return []string{"Lex"}
} else if l_Roff_Matcher_0.Match(i) {
return "Roff", true
return []string{"Roff"}
} else if l_PicoLisp_Matcher_0.Match(i) {
return "PicoLisp", true
return []string{"PicoLisp"}
}
return OtherLanguage, false
return nil
},
".ls": func(i []byte) (string, bool) {
".ls": func(i []byte) []string {
if ls_LoomScript_Matcher_0.Match(i) {
return "LoomScript", true
return []string{"LoomScript"}
}
return "LiveScript", true
return []string{"LiveScript"}
},
".lsp": func(i []byte) (string, bool) {
".lsp": func(i []byte) []string {
if lsp_CommonLisp_Matcher_0.Match(i) {
return "Common Lisp", true
return []string{"Common Lisp"}
} else if lsp_NewLisp_Matcher_0.Match(i) {
return "NewLisp", true
return []string{"NewLisp"}
}
return OtherLanguage, false
return nil
},
".lisp": func(i []byte) (string, bool) {
".lisp": func(i []byte) []string {
if lisp_CommonLisp_Matcher_0.Match(i) {
return "Common Lisp", true
return []string{"Common Lisp"}
} else if lisp_NewLisp_Matcher_0.Match(i) {
return "NewLisp", true
return []string{"NewLisp"}
}
return OtherLanguage, false
return nil
},
".m": func(i []byte) (string, bool) {
".m": func(i []byte) []string {
if m_ObjectiveDashC_Matcher_0.Match(i) {
return "Objective-C", true
return []string{"Objective-C"}
} else if m_Mercury_Matcher_0.Match(i) {
return "Mercury", true
return []string{"Mercury"}
} else if m_MUF_Matcher_0.Match(i) {
return "MUF", true
return []string{"MUF"}
} else if m_M_Matcher_0.Match(i) {
return "M", true
return []string{"M"}
} else if m_Mathematica_Matcher_0.Match(i) {
return "Mathematica", true
return []string{"Mathematica"}
} else if m_Matlab_Matcher_0.Match(i) {
return "Matlab", true
return []string{"Matlab"}
} else if m_Limbo_Matcher_0.Match(i) {
return "Limbo", true
return []string{"Limbo"}
}
return OtherLanguage, false
return nil
},
".md": func(i []byte) (string, bool) {
".md": func(i []byte) []string {
if md_Markdown_Matcher_0.Match(i) || md_Markdown_Matcher_1.Match(i) {
return "Markdown", true
return []string{"Markdown"}
} else if md_GCCMachineDescription_Matcher_0.Match(i) {
return "GCC Machine Description", true
return []string{"GCC Machine Description"}
}
return "Markdown", true
return []string{"Markdown"}
},
".ml": func(i []byte) (string, bool) {
".ml": func(i []byte) []string {
if ml_OCaml_Matcher_0.Match(i) {
return "OCaml", true
return []string{"OCaml"}
} else if ml_StandardML_Matcher_0.Match(i) {
return "Standard ML", true
return []string{"Standard ML"}
}
return OtherLanguage, false
return nil
},
".mod": func(i []byte) (string, bool) {
".mod": func(i []byte) []string {
if mod_XML_Matcher_0.Match(i) {
return "XML", true
return []string{"XML"}
} else if mod_ModulaDash2_Matcher_0.Match(i) || mod_ModulaDash2_Matcher_1.Match(i) {
return "Modula-2", true
return []string{"Modula-2"}
}
return "Linux Kernel Module", false
return []string{"Linux Kernel Module", "AMPL"}
},
".ms": func(i []byte) (string, bool) {
".ms": func(i []byte) []string {
if ms_Roff_Matcher_0.Match(i) {
return "Roff", true
return []string{"Roff"}
}
return "MAXScript", true
return []string{"MAXScript"}
},
".n": func(i []byte) (string, bool) {
".n": func(i []byte) []string {
if n_Roff_Matcher_0.Match(i) {
return "Roff", true
return []string{"Roff"}
} else if n_Nemerle_Matcher_0.Match(i) {
return "Nemerle", true
return []string{"Nemerle"}
}
return OtherLanguage, false
return nil
},
".ncl": func(i []byte) (string, bool) {
".ncl": func(i []byte) []string {
if ncl_Text_Matcher_0.Match(i) {
return "Text", true
return []string{"Text"}
}
return OtherLanguage, false
return nil
},
".nl": func(i []byte) (string, bool) {
".nl": func(i []byte) []string {
if nl_NL_Matcher_0.Match(i) {
return "NL", true
return []string{"NL"}
}
return "NewLisp", true
return []string{"NewLisp"}
},
".php": func(i []byte) (string, bool) {
".php": func(i []byte) []string {
if php_Hack_Matcher_0.Match(i) {
return "Hack", true
return []string{"Hack"}
} else if php_PHP_Matcher_0.Match(i) {
return "PHP", true
return []string{"PHP"}
}
return OtherLanguage, false
return nil
},
".pl": func(i []byte) (string, bool) {
".pl": func(i []byte) []string {
if pl_Prolog_Matcher_0.Match(i) {
return "Prolog", true
return []string{"Prolog"}
} else if pl_Perl_Matcher_0.Match(i) {
return "Perl", true
return []string{"Perl"}
} else if pl_Perl6_Matcher_0.Match(i) {
return "Perl6", true
return []string{"Perl6"}
}
return OtherLanguage, false
return nil
},
".pm": func(i []byte) (string, bool) {
".pm": func(i []byte) []string {
if pm_Perl6_Matcher_0.Match(i) {
return "Perl6", true
return []string{"Perl6"}
} else if pm_Perl_Matcher_0.Match(i) {
return "Perl", true
return []string{"Perl"}
}
return OtherLanguage, false
return nil
},
".pod": func(i []byte) (string, bool) {
".pod": func(i []byte) []string {
if pod_Pod_Matcher_0.Match(i) {
return "Pod", true
return []string{"Pod"}
}
return "Perl", true
return []string{"Perl"}
},
".pro": func(i []byte) (string, bool) {
".pro": func(i []byte) []string {
if pro_Prolog_Matcher_0.Match(i) {
return "Prolog", true
return []string{"Prolog"}
} else if pro_INI_Matcher_0.Match(i) {
return "INI", true
return []string{"INI"}
} else if pro_QMake_Matcher_0.Match(i) && pro_QMake_Matcher_1.Match(i) {
return "QMake", true
return []string{"QMake"}
} else if pro_IDL_Matcher_0.Match(i) {
return "IDL", true
return []string{"IDL"}
}
return OtherLanguage, false
return nil
},
".props": func(i []byte) (string, bool) {
".props": func(i []byte) []string {
if props_XML_Matcher_0.Match(i) {
return "XML", true
return []string{"XML"}
} else if props_INI_Matcher_0.Match(i) {
return "INI", true
return []string{"INI"}
}
return OtherLanguage, false
return nil
},
".r": func(i []byte) (string, bool) {
".r": func(i []byte) []string {
if r_Rebol_Matcher_0.Match(i) {
return "Rebol", true
return []string{"Rebol"}
} else if r_R_Matcher_0.Match(i) {
return "R", true
return []string{"R"}
}
return OtherLanguage, false
return nil
},
".rno": func(i []byte) (string, bool) {
".rno": func(i []byte) []string {
if rno_RUNOFF_Matcher_0.Match(i) {
return "RUNOFF", true
return []string{"RUNOFF"}
} else if rno_Roff_Matcher_0.Match(i) {
return "Roff", true
return []string{"Roff"}
}
return OtherLanguage, false
return nil
},
".rpy": func(i []byte) (string, bool) {
".rpy": func(i []byte) []string {
if rpy_Python_Matcher_0.Match(i) {
return "Python", true
return []string{"Python"}
}
return "Ren'Py", true
return []string{"Ren'Py"}
},
".rs": func(i []byte) (string, bool) {
".rs": func(i []byte) []string {
if rs_Rust_Matcher_0.Match(i) {
return "Rust", true
return []string{"Rust"}
} else if rs_RenderScript_Matcher_0.Match(i) {
return "RenderScript", true
return []string{"RenderScript"}
}
return OtherLanguage, false
return nil
},
".sc": func(i []byte) (string, bool) {
".sc": func(i []byte) []string {
if sc_SuperCollider_Matcher_0.Match(i) || sc_SuperCollider_Matcher_1.Match(i) || sc_SuperCollider_Matcher_2.Match(i) {
return "SuperCollider", true
return []string{"SuperCollider"}
} else if sc_Scala_Matcher_0.Match(i) || sc_Scala_Matcher_1.Match(i) || sc_Scala_Matcher_2.Match(i) {
return "Scala", true
return []string{"Scala"}
}
return OtherLanguage, false
return nil
},
".sql": func(i []byte) (string, bool) {
".sql": func(i []byte) []string {
if sql_PLpgSQL_Matcher_0.Match(i) || sql_PLpgSQL_Matcher_1.Match(i) || sql_PLpgSQL_Matcher_2.Match(i) {
return "PLpgSQL", true
return []string{"PLpgSQL"}
} else if sql_SQLPL_Matcher_0.Match(i) || sql_SQLPL_Matcher_1.Match(i) {
return "SQLPL", true
return []string{"SQLPL"}
} else if sql_PLSQL_Matcher_0.Match(i) || sql_PLSQL_Matcher_1.Match(i) {
return "PLSQL", true
return []string{"PLSQL"}
} else if sql_SQL_Matcher_0.Match(i) {
return "SQL", true
return []string{"SQL"}
}
return OtherLanguage, false
return nil
},
".srt": func(i []byte) (string, bool) {
".srt": func(i []byte) []string {
if srt_SubRipText_Matcher_0.Match(i) {
return "SubRip Text", true
return []string{"SubRip Text"}
}
return OtherLanguage, false
return nil
},
".t": func(i []byte) (string, bool) {
".t": func(i []byte) []string {
if t_Turing_Matcher_0.Match(i) {
return "Turing", true
return []string{"Turing"}
} else if t_Perl6_Matcher_0.Match(i) {
return "Perl6", true
return []string{"Perl6"}
} else if t_Perl_Matcher_0.Match(i) {
return "Perl", true
return []string{"Perl"}
}
return OtherLanguage, false
return nil
},
".toc": func(i []byte) (string, bool) {
".toc": func(i []byte) []string {
if toc_WorldofWarcraftAddonData_Matcher_0.Match(i) {
return "World of Warcraft Addon Data", true
return []string{"World of Warcraft Addon Data"}
} else if toc_TeX_Matcher_0.Match(i) {
return "TeX", true
return []string{"TeX"}
}
return OtherLanguage, false
return nil
},
".ts": func(i []byte) (string, bool) {
".ts": func(i []byte) []string {
if ts_XML_Matcher_0.Match(i) {
return "XML", true
return []string{"XML"}
}
return "TypeScript", true
return []string{"TypeScript"}
},
".tst": func(i []byte) (string, bool) {
".tst": func(i []byte) []string {
if tst_GAP_Matcher_0.Match(i) {
return "GAP", true
return []string{"GAP"}
}
return "Scilab", true
return []string{"Scilab"}
},
".tsx": func(i []byte) (string, bool) {
".tsx": func(i []byte) []string {
if tsx_TypeScript_Matcher_0.Match(i) {
return "TypeScript", true
return []string{"TypeScript"}
} else if tsx_XML_Matcher_0.Match(i) {
return "XML", true
return []string{"XML"}
}
return OtherLanguage, false
return nil
},
}

View File

@ -1,65 +0,0 @@
package slinguist
import (
"fmt"
"io/ioutil"
"os"
"path/filepath"
"text/tabwriter"
. "gopkg.in/check.v1"
)
func (s *TSuite) TestGetLanguageByContentLinguistCorpus(c *C) {
var total, failed, ok, other, unsafe int
w := new(tabwriter.Writer)
w.Init(os.Stdout, 0, 8, 0, '\t', 0)
filepath.Walk(".linguist/samples", func(path string, f os.FileInfo, err error) error {
if f.IsDir() {
if f.Name() == "filenames" {
return filepath.SkipDir
}
return nil
}
expected := filepath.Base(filepath.Dir(path))
filename := filepath.Base(path)
extension := filepath.Ext(path)
content, _ := ioutil.ReadFile(path)
if extension == "" {
return nil
}
total++
obtained, safe := GetLanguageByContent(filename, content)
if obtained == OtherLanguage {
other++
}
var status string
if expected == obtained {
status = "ok"
ok++
} else {
status = "failed"
failed++
if !safe {
unsafe++
}
}
fmt.Fprintf(w, "%s\t%s\t%s\t%v\t%s\n", filename, expected, obtained, safe, status)
return nil
})
fmt.Fprintln(w)
w.Flush()
fmt.Printf("total files: %d, ok: %d, failed: %d, unsafe: %d, other: %d\n", total, ok, failed, unsafe, other)
}

View File

@ -2,7 +2,7 @@ package slinguist
// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/simple-linguist.v1/internal/code-generator
// THIS FILE SHOULD NOT BE EDITED BY HAND
// Extracted from github/linguist commit: 60f864a138650dd17fafc94814be9ee2d3aaef8c
// Extracted from github/linguist commit: b6460f8ed6b249281ada099ca28bd8f1230b8892
import "gopkg.in/toqueteos/substring.v1"

File diff suppressed because it is too large Load Diff

View File

@ -1,22 +0,0 @@
package slinguist
import . "gopkg.in/check.v1"
func (s *TSuite) TestGetLanguageByExtension(c *C) {
lang, safe := GetLanguageByExtension("foo.foo")
c.Assert(lang, Equals, "Other")
c.Assert(safe, Equals, false)
lang, safe = GetLanguageByExtension("foo.go")
c.Assert(lang, Equals, "Go")
c.Assert(safe, Equals, true)
lang, safe = GetLanguageByExtension("foo.go.php")
c.Assert(lang, Equals, "Hack")
c.Assert(safe, Equals, false)
}
func (s *TSuite) TestGetLanguageExtensions(c *C) {
c.Assert(GetLanguageExtensions("foo"), HasLen, 0)
c.Assert(GetLanguageExtensions("C"), Not(HasLen), 0)
}

File diff suppressed because it is too large Load Diff

View File

@ -1,10 +1,143 @@
package slinguist
func GetLanguageByFilename(filename string) (lang string, safe bool) {
lang, safe = languagesByFilename[filename]
if lang == "" {
lang = OtherLanguage
}
// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/simple-linguist.v1/internal/code-generator
// THIS FILE SHOULD NOT BE EDITED BY HAND
// Extracted from github/linguist commit: b6460f8ed6b249281ada099ca28bd8f1230b8892
return
var languagesByFilename = map[string][]string{
".Rprofile": {"R"},
".XCompose": {"XCompose"},
".abbrev_defs": {"Emacs Lisp"},
".arcconfig": {"JSON"},
".babelrc": {"JSON5"},
".bash_history": {"Shell"},
".bash_logout": {"Shell"},
".bash_profile": {"Shell"},
".bashrc": {"Shell"},
".clang-format": {"YAML"},
".classpath": {"XML"},
".emacs": {"Emacs Lisp"},
".emacs.desktop": {"Emacs Lisp"},
".factor-boot-rc": {"Factor"},
".factor-rc": {"Factor"},
".gclient": {"Python"},
".gnus": {"Emacs Lisp"},
".irbrc": {"Ruby"},
".jshintrc": {"JSON"},
".nvimrc": {"Vim script"},
".php_cs": {"PHP"},
".php_cs.dist": {"PHP"},
".project": {"XML"},
".pryrc": {"Ruby"},
".spacemacs": {"Emacs Lisp"},
".vimrc": {"Vim script"},
".viper": {"Emacs Lisp"},
"APKBUILD": {"Alpine Abuild"},
"App.config": {"XML"},
"Appraisals": {"Ruby"},
"BSDmakefile": {"Makefile"},
"BUCK": {"Python"},
"BUILD": {"Python"},
"Berksfile": {"Ruby"},
"Brewfile": {"Ruby"},
"Buildfile": {"Ruby"},
"CMakeLists.txt": {"CMake"},
"COPYING": {"Text"},
"COPYRIGHT.regex": {"Text"},
"Cakefile": {"CoffeeScript"},
"Cask": {"Emacs Lisp"},
"Dangerfile": {"Ruby"},
"Deliverfile": {"Ruby"},
"Dockerfile": {"Dockerfile"},
"Emakefile": {"Erlang"},
"FONTLOG": {"Text"},
"Fakefile": {"Fancy"},
"Fastfile": {"Ruby"},
"GNUmakefile": {"Makefile"},
"Gemfile": {"Ruby"},
"Gemfile.lock": {"Ruby"},
"Guardfile": {"Ruby"},
"INSTALL": {"Text"},
"INSTALL.mysql": {"Text"},
"Jakefile": {"JavaScript"},
"Jarfile": {"Ruby"},
"Jenkinsfile": {"Groovy"},
"Kbuild": {"Makefile"},
"LICENSE": {"Text"},
"LICENSE.mysql": {"Text"},
"Makefile": {"Makefile"},
"Makefile.am": {"Makefile"},
"Makefile.boot": {"Makefile"},
"Makefile.frag": {"Makefile"},
"Makefile.in": {"Makefile"},
"Makefile.inc": {"Makefile"},
"Makefile.wat": {"Makefile"},
"Mavenfile": {"Ruby"},
"Modulefile": {"Puppet"},
"NEWS": {"Text"},
"Notebook": {"Jupyter Notebook"},
"NuGet.config": {"XML"},
"Nukefile": {"Nu"},
"PKGBUILD": {"Shell"},
"Phakefile": {"PHP"},
"Podfile": {"Ruby"},
"Project.ede": {"Emacs Lisp"},
"Puppetfile": {"Ruby"},
"README.1ST": {"Text"},
"README.me": {"Text"},
"README.mysql": {"Text"},
"ROOT": {"Isabelle ROOT"},
"Rakefile": {"Ruby"},
"Rexfile": {"Perl6"},
"SConscript": {"Python"},
"SConstruct": {"Python"},
"Settings.StyleCop": {"XML"},
"Slakefile": {"LiveScript"},
"Snakefile": {"Python"},
"Snapfile": {"Ruby"},
"Thorfile": {"Ruby"},
"Vagrantfile": {"Ruby"},
"WORKSPACE": {"Python"},
"Web.Debug.config": {"XML"},
"Web.Release.config": {"XML"},
"Web.config": {"XML"},
"XCompose": {"XCompose"},
"_emacs": {"Emacs Lisp"},
"_vimrc": {"Vim script"},
"abbrev_defs": {"Emacs Lisp"},
"ant.xml": {"Ant Build System"},
"build.xml": {"Ant Build System"},
"buildfile": {"Ruby"},
"click.me": {"Text"},
"composer.lock": {"JSON"},
"configure.ac": {"M4Sugar"},
"delete.me": {"Text"},
"descrip.mmk": {"Module Management System"},
"descrip.mms": {"Module Management System"},
"gradlew": {"Shell"},
"gvimrc": {"Vim script"},
"keep.me": {"Text"},
"ld.script": {"Linker Script"},
"makefile": {"Makefile"},
"makefile.sco": {"Makefile"},
"mcmod.info": {"JSON"},
"meson.build": {"Meson"},
"meson_options.txt": {"Meson"},
"mix.lock": {"Elixir"},
"mkfile": {"Makefile"},
"mmn": {"Roff"},
"mmt": {"Roff"},
"nginx.conf": {"Nginx"},
"nvimrc": {"Vim script"},
"packages.config": {"XML"},
"pom.xml": {"Maven POM"},
"read.me": {"Text"},
"rebar.config": {"Erlang"},
"rebar.config.lock": {"Erlang"},
"rebar.lock": {"Erlang"},
"riemann.config": {"Clojure"},
"test.me": {"Text"},
"vimrc": {"Vim script"},
"wscript": {"Python"},
"xcompose": {"XCompose"},
}

View File

@ -1,37 +0,0 @@
package slinguist
import . "gopkg.in/check.v1"
func (s *TSuite) TestGetLanguageByFilename(c *C) {
lang, safe := GetLanguageByFilename(`unknown.interpreter`)
c.Assert(lang, Equals, OtherLanguage)
c.Assert(safe, Equals, false)
lang, safe = GetLanguageByFilename(`.bashrc`)
c.Assert(lang, Equals, "Shell")
c.Assert(safe, Equals, true)
lang, safe = GetLanguageByFilename(`Dockerfile`)
c.Assert(lang, Equals, "Dockerfile")
c.Assert(safe, Equals, true)
lang, safe = GetLanguageByFilename(`Makefile.frag`)
c.Assert(lang, Equals, "Makefile")
c.Assert(safe, Equals, true)
lang, safe = GetLanguageByFilename(`makefile`)
c.Assert(lang, Equals, "Makefile")
c.Assert(safe, Equals, true)
lang, safe = GetLanguageByFilename(`Vagrantfile`)
c.Assert(lang, Equals, "Ruby")
c.Assert(safe, Equals, true)
lang, safe = GetLanguageByFilename(`_vimrc`)
c.Assert(lang, Equals, "Vim script")
c.Assert(safe, Equals, true)
lang, safe = GetLanguageByFilename(`pom.xml`)
c.Assert(lang, Equals, "Maven POM")
c.Assert(safe, Equals, true)
}

View File

@ -1,142 +0,0 @@
package slinguist
// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/simple-linguist.v1/internal/code-generator
// THIS FILE SHOULD NOT BE EDITED BY HAND
// Extracted from github/linguist commit: 60f864a138650dd17fafc94814be9ee2d3aaef8c
var languagesByFilename = map[string]string{
".Rprofile": "R",
".XCompose": "XCompose",
".abbrev_defs": "Emacs Lisp",
".arcconfig": "JSON",
".babelrc": "JSON5",
".bash_history": "Shell",
".bash_logout": "Shell",
".bash_profile": "Shell",
".bashrc": "Shell",
".clang-format": "YAML",
".classpath": "XML",
".emacs": "Emacs Lisp",
".emacs.desktop": "Emacs Lisp",
".factor-boot-rc": "Factor",
".factor-rc": "Factor",
".gclient": "Python",
".gnus": "Emacs Lisp",
".irbrc": "Ruby",
".jshintrc": "JSON",
".nvimrc": "Vim script",
".php_cs": "PHP",
".php_cs.dist": "PHP",
".project": "XML",
".pryrc": "Ruby",
".spacemacs": "Emacs Lisp",
".vimrc": "Vim script",
".viper": "Emacs Lisp",
"APKBUILD": "Alpine Abuild",
"App.config": "XML",
"Appraisals": "Ruby",
"BSDmakefile": "Makefile",
"BUCK": "Python",
"BUILD": "Python",
"Berksfile": "Ruby",
"Brewfile": "Ruby",
"Buildfile": "Ruby",
"CMakeLists.txt": "CMake",
"COPYING": "Text",
"COPYRIGHT.regex": "Text",
"Cakefile": "CoffeeScript",
"Cask": "Emacs Lisp",
"Dangerfile": "Ruby",
"Deliverfile": "Ruby",
"Dockerfile": "Dockerfile",
"Emakefile": "Erlang",
"FONTLOG": "Text",
"Fakefile": "Fancy",
"Fastfile": "Ruby",
"GNUmakefile": "Makefile",
"Gemfile": "Ruby",
"Gemfile.lock": "Ruby",
"Guardfile": "Ruby",
"INSTALL": "Text",
"INSTALL.mysql": "Text",
"Jakefile": "JavaScript",
"Jarfile": "Ruby",
"Jenkinsfile": "Groovy",
"Kbuild": "Makefile",
"LICENSE": "Text",
"LICENSE.mysql": "Text",
"Makefile": "Makefile",
"Makefile.am": "Makefile",
"Makefile.boot": "Makefile",
"Makefile.frag": "Makefile",
"Makefile.in": "Makefile",
"Makefile.inc": "Makefile",
"Mavenfile": "Ruby",
"Modulefile": "Puppet",
"NEWS": "Text",
"Notebook": "Jupyter Notebook",
"NuGet.config": "XML",
"Nukefile": "Nu",
"PKGBUILD": "Shell",
"Phakefile": "PHP",
"Podfile": "Ruby",
"Project.ede": "Emacs Lisp",
"Puppetfile": "Ruby",
"README.1ST": "Text",
"README.me": "Text",
"README.mysql": "Text",
"ROOT": "Isabelle ROOT",
"Rakefile": "Ruby",
"Rexfile": "Perl6",
"SConscript": "Python",
"SConstruct": "Python",
"Settings.StyleCop": "XML",
"Slakefile": "LiveScript",
"Snakefile": "Python",
"Snapfile": "Ruby",
"Thorfile": "Ruby",
"Vagrantfile": "Ruby",
"WORKSPACE": "Python",
"Web.Debug.config": "XML",
"Web.Release.config": "XML",
"Web.config": "XML",
"XCompose": "XCompose",
"_emacs": "Emacs Lisp",
"_vimrc": "Vim script",
"abbrev_defs": "Emacs Lisp",
"ant.xml": "Ant Build System",
"build.xml": "Ant Build System",
"buildfile": "Ruby",
"click.me": "Text",
"composer.lock": "JSON",
"configure.ac": "M4Sugar",
"delete.me": "Text",
"descrip.mmk": "Module Management System",
"descrip.mms": "Module Management System",
"gradlew": "Shell",
"gvimrc": "Vim script",
"keep.me": "Text",
"ld.script": "Linker Script",
"makefile": "Makefile",
"makefile.sco": "Makefile",
"mcmod.info": "JSON",
"meson.build": "Meson",
"meson_options.txt": "Meson",
"mix.lock": "Elixir",
"mkfile": "Makefile",
"mmn": "Roff",
"mmt": "Roff",
"nginx.conf": "Nginx",
"nvimrc": "Vim script",
"packages.config": "XML",
"pom.xml": "Maven POM",
"read.me": "Text",
"rebar.config": "Erlang",
"rebar.config.lock": "Erlang",
"rebar.lock": "Erlang",
"riemann.config": "Clojure",
"test.me": "Text",
"vimrc": "Vim script",
"wscript": "Python",
"xcompose": "XCompose",
}

File diff suppressed because it is too large Load Diff

View File

@ -4,8 +4,8 @@ package slinguist
// THIS FILE SHOULD NOT BE EDITED BY HAND
// Extracted from github/linguist commit: {{ getCommit }}
// languagesByAlias keeps alias for different languages and use the name of the languages as a alias too. All the
// keys (alias or not) are written in lower case and the whitespaces has been replaced by underscores.
// languagesByAlias keeps alias for different languages and use the name of the languages as an alias too.
// All the keys (alias or not) are written in lower case and the whitespaces has been replaced by underscores.
var languagesByAlias = map[string]string{
{{range $alias, $language := . -}}
"{{ $alias }}": {{ printf "%q" $language -}},

View File

@ -5,27 +5,14 @@ package slinguist
// Extracted from github/linguist commit: {{ getCommit }}
import (
"path/filepath"
"regexp"
"strings"
)
func GetLanguageByContent(filename string, content []byte) (lang string, safe bool) {
ext := strings.ToLower(filepath.Ext(filename))
if fnMatcher, ok := matchers[ext]; ok {
lang, safe = fnMatcher(content)
} else {
lang = OtherLanguage
}
type languageMatcher func ([]byte) []string
return
}
type languageMatcher func ([]byte) (string, bool)
var matchers = map[string]languageMatcher{
var contentMatchers = map[string]languageMatcher{
{{ range $index, $disambiguator := . -}}
{{ printf "%q" $disambiguator.Extension }}: func(i []byte) (string, bool) {
{{ printf "%q" $disambiguator.Extension }}: func(i []byte) []string {
{{ range $i, $language := $disambiguator.Languages -}}
{{- if not (avoidLanguage $language) }}
@ -33,14 +20,14 @@ var matchers = map[string]languageMatcher{
{{- if gt $i 0 }} else {{ end -}}
if {{- range $j, $heuristic := $language.Heuristics }} {{ $heuristic.Name }}.Match(i)
{{- if lt $j (len $language.LogicRelations) }} {{index $language.LogicRelations $j}} {{- end -}} {{ end }} {
return {{ printf "%q" $language.Language }}, true
return []string{ {{- printf "%q" $language.Language -}} }
}
{{- end -}}
{{- end -}}
{{- end}}
return {{ returnLanguage $disambiguator.Languages }}, {{ safeLanguage $disambiguator.Languages }}
return {{ returnLanguages $disambiguator.Languages | returnStringSlice }}
},
{{ end -}}
}

View File

@ -5,7 +5,13 @@ package slinguist
// Extracted from github/linguist commit: {{ getCommit }}
var languagesByExtension = map[string][]string{
{{range $extension, $languages := . -}}
{{range $extension, $languages := .LanguagesByExtension -}}
"{{ $extension }}": { {{- $languages | formatStringSlice -}} },
{{end -}}
}
var extensionsByLanguage = map[string][]string{
{{range $language, $extensions := .ExtensionsByLanguage -}}
"{{ $language }}": { {{- $extensions | formatStringSlice -}} },
{{end -}}
}

View File

@ -4,8 +4,8 @@ package slinguist
// THIS FILE SHOULD NOT BE EDITED BY HAND
// Extracted from github/linguist commit: {{ getCommit }}
var languagesByFilename = map[string]string{
{{range $filename, $language := . -}}
"{{ $filename }}": {{- printf "%q" $language -}},
var languagesByFilename = map[string][]string{
{{range $filename, $languages := . -}}
"{{ $filename }}": { {{- formatStringSlice $languages -}} },
{{end -}}
}

View File

@ -3,14 +3,13 @@ package generator
import (
"bytes"
"io"
"text/template"
"strings"
"text/template"
yaml "gopkg.in/yaml.v2"
)
// Aliases reads from buf and builds aliases_map.go file from aliasesTmplPath.
// Aliases reads from buf and builds source file from aliasesTmplPath.
func Aliases(data []byte, aliasesTmplPath, aliasesTmplName, commit string) ([]byte, error) {
languages := make(map[string]*languageInfo)
if err := yaml.Unmarshal(data, &languages); err != nil {

View File

@ -2,13 +2,13 @@ package generator
import (
"bytes"
"html/template"
"io"
"text/template"
yaml "gopkg.in/yaml.v2"
)
// Documentation reads from buf and builds documentation_matchers.go file from documentationTmplPath.
// Documentation reads from buf and builds source file from documentationTmplPath.
func Documentation(data []byte, documentationTmplPath, documentationTmplName, commit string) ([]byte, error) {
var regexpList []string
if err := yaml.Unmarshal(data, &regexpList); err != nil {

View File

@ -9,18 +9,25 @@ import (
yaml "gopkg.in/yaml.v2"
)
// Extensions reads from buf and builds extensions_map.go file from extensionsTmplPath.
type extensionsInfo struct {
LanguagesByExtension map[string][]string
ExtensionsByLanguage map[string][]string
}
// Extensions reads from buf and builds source file from extensionsTmplPath.
func Extensions(data []byte, extensionsTmplPath, extensionsTmplName, commit string) ([]byte, error) {
languages := make(map[string]*languageInfo)
if err := yaml.Unmarshal(data, &languages); err != nil {
return nil, err
}
extInfo := &extensionsInfo{}
orderedKeyList := getAlphabeticalOrderedKeys(languages)
languagesByExtension := buildExtensionLanguageMap(languages, orderedKeyList)
extInfo.LanguagesByExtension = buildExtensionLanguageMap(languages, orderedKeyList)
extInfo.ExtensionsByLanguage = buildLanguageExtensionsMap(languages)
buf := &bytes.Buffer{}
if err := executeExtensionsTemplate(buf, languagesByExtension, extensionsTmplPath, extensionsTmplName, commit); err != nil {
if err := executeExtensionsTemplate(buf, extInfo, extensionsTmplPath, extensionsTmplName, commit); err != nil {
return nil, err
}
@ -39,14 +46,25 @@ func buildExtensionLanguageMap(languages map[string]*languageInfo, orderedKeyLis
return extensionLangsMap
}
func executeExtensionsTemplate(out io.Writer, languagesByExtension map[string][]string, extensionsTmplPath, extensionsTmpl, commit string) error {
func buildLanguageExtensionsMap(languages map[string]*languageInfo) map[string][]string {
langExtensionMap := make(map[string][]string, len(languages))
for lang, info := range languages {
if len(info.Extensions) > 0 {
langExtensionMap[lang] = info.Extensions
}
}
return langExtensionMap
}
func executeExtensionsTemplate(out io.Writer, extInfo *extensionsInfo, extensionsTmplPath, extensionsTmpl, commit string) error {
fmap := template.FuncMap{
"getCommit": func() string { return commit },
"formatStringSlice": func(slice []string) string { return `"` + strings.Join(slice, `","`) + `"` },
}
t := template.Must(template.New(extensionsTmpl).Funcs(fmap).ParseFiles(extensionsTmplPath))
if err := t.Execute(out, languagesByExtension); err != nil {
if err := t.Execute(out, extInfo); err != nil {
return err
}

View File

@ -3,12 +3,13 @@ package generator
import (
"bytes"
"io"
"strings"
"text/template"
yaml "gopkg.in/yaml.v2"
)
// Filenames reads from buf and builds filenames_map.go file from filenamesTmplPath.
// Filenames reads from buf and builds source file from filenamesTmplPath.
func Filenames(data []byte, filenamesTmplPath, filenamesTmplName, commit string) ([]byte, error) {
languages := make(map[string]*languageInfo)
if err := yaml.Unmarshal(data, &languages); err != nil {
@ -25,20 +26,21 @@ func Filenames(data []byte, filenamesTmplPath, filenamesTmplName, commit string)
return buf.Bytes(), nil
}
func buildFilenameLanguageMap(languages map[string]*languageInfo) map[string]string {
filenameLangMap := make(map[string]string)
func buildFilenameLanguageMap(languages map[string]*languageInfo) map[string][]string {
filenameLangMap := make(map[string][]string)
for lang, langInfo := range languages {
for _, filename := range langInfo.Filenames {
filenameLangMap[filename] = lang
filenameLangMap[filename] = append(filenameLangMap[filename], lang)
}
}
return filenameLangMap
}
func executeFilenamesTemplate(out io.Writer, languagesByFilename map[string]string, filenamesTmplPath, filenamesTmpl, commit string) error {
func executeFilenamesTemplate(out io.Writer, languagesByFilename map[string][]string, filenamesTmplPath, filenamesTmpl, commit string) error {
fmap := template.FuncMap{
"getCommit": func() string { return commit },
"formatStringSlice": func(slice []string) string { return `"` + strings.Join(slice, `","`) + `"` },
}
t := template.Must(template.New(filenamesTmpl).Funcs(fmap).ParseFiles(filenamesTmplPath))

View File

@ -5,7 +5,7 @@ import (
"io/ioutil"
)
// Func is the function's type that generate the files from templates.
// Func is the function's type that generate source file from a data to be parsed and a template.
type Func func(dataToParse []byte, templatePath string, template string, commit string) ([]byte, error)
// FromFile read data to parse from a file named fileToParse and write the generated source code to a file named outPath. The generated

View File

@ -15,7 +15,7 @@ import (
const (
lingustURL = "https://github.com/github/linguist.git"
commitTree = "60f864a138650dd17fafc94814be9ee2d3aaef8c"
commitTest = "fe8b44ab8a225b1ffa75b983b916ea22fee5b6f7"
commitTest = "0123456789abcdef0123456789abcdef01234567"
// Extensions test
extensionsTestFile = "test_files/extensions.test.yml"
@ -77,6 +77,10 @@ type GeneratorTestSuite struct {
tmpLinguist string
}
func TestGeneratorTestSuite(t *testing.T) {
suite.Run(t, new(GeneratorTestSuite))
}
func (g *GeneratorTestSuite) SetupSuite() {
tmpLinguist, err := ioutil.TempDir("", "linguist-")
assert.NoError(g.T(), err)
@ -116,7 +120,7 @@ func (g *GeneratorTestSuite) TestFromFile() {
wantOut string
}{
{
name: "TestFromFile_Language",
name: "TestFromFile_Extensions",
fileToParse: extensionsTestFile,
tmplPath: extensionsTestTmplPath,
tmplName: extensionsTestTmplName,
@ -201,7 +205,7 @@ func (g *GeneratorTestSuite) TestFromFile() {
assert.NoError(g.T(), err)
out, err := ioutil.ReadFile(outPath.Name())
assert.NoError(g.T(), err)
assert.EqualValues(g.T(), gold, out, fmt.Sprintf("FromFile() = %v, want %v", string(out), string(test.wantOut)))
assert.EqualValues(g.T(), gold, out, fmt.Sprintf("%v: %v, expected: %v", test.name, string(out), string(test.wantOut)))
}
}
@ -236,10 +240,6 @@ func (g *GeneratorTestSuite) TestFrequencies() {
assert.NoError(g.T(), err)
out, err := ioutil.ReadFile(outPath.Name())
assert.NoError(g.T(), err)
assert.EqualValues(g.T(), gold, out, fmt.Sprintf("Frequencies() = %v, want %v", string(out), string(test.wantOut)))
assert.EqualValues(g.T(), gold, out, fmt.Sprintf("%v: %v, expected: %v", test.name, string(out), string(test.wantOut)))
}
}
func TestGeneratorTestSuite(t *testing.T) {
suite.Run(t, new(GeneratorTestSuite))
}

View File

@ -11,7 +11,7 @@ import (
"text/template"
)
// Heuristics reads from buf and builds content.go file from contentTmplPath.
// Heuristics reads from buf and builds source file from contentTmplPath.
func Heuristics(heuristics []byte, contentTmplPath, contentTmplName, commit string) ([]byte, error) {
disambiguators, err := getDisambiguators(heuristics)
if err != nil {
@ -24,6 +24,8 @@ func Heuristics(heuristics []byte, contentTmplPath, contentTmplName, commit stri
}
return buf.Bytes(), nil
// fmt.Println(string(buf.Bytes()))
// return nil, nil
}
const unknownLanguage = "OtherLanguage"
@ -417,8 +419,14 @@ func executeContentTemplate(out io.Writer, disambiguators []*disambiguator, cont
fmap := template.FuncMap{
"getCommit": func() string { return commit },
"getAllHeuristics": getAllHeuristics,
"returnLanguage": returnLanguage,
"safeLanguage": safeLanguage,
"returnStringSlice": func(slice []string) string {
if len(slice) == 0 {
return "nil"
}
return `[]string{` + strings.Join(slice, `, `) + `}`
},
"returnLanguages": returnLanguages,
"avoidLanguage": avoidLanguage,
}
@ -458,18 +466,7 @@ func containsInvalidRegexp(reg string) bool {
return strings.Contains(reg, `(?<`) || strings.Contains(reg, `\1`)
}
func returnLanguage(langsHeuristics []*languageHeuristics) string {
lang, _ := returnLangAndSafe(langsHeuristics)
return lang
}
func safeLanguage(langsHeuristics []*languageHeuristics) bool {
_, safe := returnLangAndSafe(langsHeuristics)
return safe
}
func returnLangAndSafe(langsHeuristics []*languageHeuristics) (string, bool) {
// at the moment, only returns one string although might be exists several language to return as a []string.
func returnLanguages(langsHeuristics []*languageHeuristics) []string {
langs := make([]string, 0)
for _, langHeu := range langsHeuristics {
if len(langHeu.Heuristics) == 0 {
@ -477,12 +474,5 @@ func returnLangAndSafe(langsHeuristics []*languageHeuristics) (string, bool) {
}
}
lang := unknownLanguage
safe := false
if len(langs) != 0 {
lang = langs[0]
safe = len(langs) == 1
}
return lang, safe
return langs
}

View File

@ -9,7 +9,7 @@ import (
"gopkg.in/yaml.v2"
)
// Interpreters reads from buf and builds interpreters_map.go file from interpretersTmplPath.
// Interpreters reads from buf and builds source file from interpretersTmplPath.
func Interpreters(data []byte, interpretersTmplPath, interpretersTmplName, commit string) ([]byte, error) {
languages := make(map[string]*languageInfo)
if err := yaml.Unmarshal(data, &languages); err != nil {

View File

@ -2,10 +2,10 @@ package slinguist
// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/simple-linguist.v1/internal/code-generator
// THIS FILE SHOULD NOT BE EDITED BY HAND
// Extracted from github/linguist commit: fe8b44ab8a225b1ffa75b983b916ea22fee5b6f7
// Extracted from github/linguist commit: 0123456789abcdef0123456789abcdef01234567
// languagesByAlias keeps alias for different languages and use the name of the languages as a alias too. All the
// keys (alias or not) are written in lower case and the whitespaces has been replaced by underscores.
// languagesByAlias keeps alias for different languages and use the name of the languages as an alias too.
// All the keys (alias or not) are written in lower case and the whitespaces has been replaced by underscores.
var languagesByAlias = map[string]string{
"asp": "ASP",
"aspx": "ASP",

View File

@ -2,121 +2,108 @@ package slinguist
// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/simple-linguist.v1/internal/code-generator
// THIS FILE SHOULD NOT BE EDITED BY HAND
// Extracted from github/linguist commit: fe8b44ab8a225b1ffa75b983b916ea22fee5b6f7
// Extracted from github/linguist commit: 0123456789abcdef0123456789abcdef01234567
import (
"path/filepath"
"regexp"
"strings"
)
func GetLanguageByContent(filename string, content []byte) (lang string, safe bool) {
ext := strings.ToLower(filepath.Ext(filename))
if fnMatcher, ok := matchers[ext]; ok {
lang, safe = fnMatcher(content)
} else {
lang = OtherLanguage
}
type languageMatcher func([]byte) []string
return
}
type languageMatcher func([]byte) (string, bool)
var matchers = map[string]languageMatcher{
".asc": func(i []byte) (string, bool) {
var contentMatchers = map[string]languageMatcher{
".asc": func(i []byte) []string {
if asc_PublicKey_Matcher_0.Match(i) {
return "Public Key", true
return []string{"Public Key"}
} else if asc_AsciiDoc_Matcher_0.Match(i) {
return "AsciiDoc", true
return []string{"AsciiDoc"}
} else if asc_AGSScript_Matcher_0.Match(i) {
return "AGS Script", true
return []string{"AGS Script"}
}
return OtherLanguage, false
return nil
},
".f": func(i []byte) (string, bool) {
".f": func(i []byte) []string {
if f_Forth_Matcher_0.Match(i) {
return "Forth", true
return []string{"Forth"}
} else if f_FilebenchWML_Matcher_0.Match(i) {
return "Filebench WML", true
return []string{"Filebench WML"}
} else if f_FORTRAN_Matcher_0.Match(i) {
return "FORTRAN", true
return []string{"FORTRAN"}
}
return OtherLanguage, false
return nil
},
".h": func(i []byte) (string, bool) {
".h": func(i []byte) []string {
if h_ObjectiveDashC_Matcher_0.Match(i) {
return "Objective-C", true
return []string{"Objective-C"}
} else if h_CPlusPlus_Matcher_0.Match(i) || h_CPlusPlus_Matcher_1.Match(i) || h_CPlusPlus_Matcher_2.Match(i) || h_CPlusPlus_Matcher_3.Match(i) || h_CPlusPlus_Matcher_4.Match(i) || h_CPlusPlus_Matcher_5.Match(i) || h_CPlusPlus_Matcher_6.Match(i) {
return "C++", true
return []string{"C++"}
}
return OtherLanguage, false
return nil
},
".lsp": func(i []byte) (string, bool) {
".lsp": func(i []byte) []string {
if lsp_CommonLisp_Matcher_0.Match(i) {
return "Common Lisp", true
return []string{"Common Lisp"}
} else if lsp_NewLisp_Matcher_0.Match(i) {
return "NewLisp", true
return []string{"NewLisp"}
}
return OtherLanguage, false
return nil
},
".lisp": func(i []byte) (string, bool) {
".lisp": func(i []byte) []string {
if lisp_CommonLisp_Matcher_0.Match(i) {
return "Common Lisp", true
return []string{"Common Lisp"}
} else if lisp_NewLisp_Matcher_0.Match(i) {
return "NewLisp", true
return []string{"NewLisp"}
}
return OtherLanguage, false
return nil
},
".md": func(i []byte) (string, bool) {
".md": func(i []byte) []string {
if md_Markdown_Matcher_0.Match(i) || md_Markdown_Matcher_1.Match(i) {
return "Markdown", true
return []string{"Markdown"}
} else if md_GCCmachinedescription_Matcher_0.Match(i) {
return "GCC machine description", true
return []string{"GCC machine description"}
}
return "Markdown", true
return []string{"Markdown"}
},
".ms": func(i []byte) (string, bool) {
".ms": func(i []byte) []string {
if ms_Groff_Matcher_0.Match(i) {
return "Groff", true
return []string{"Groff"}
}
return "MAXScript", true
return []string{"MAXScript"}
},
".mod": func(i []byte) (string, bool) {
".mod": func(i []byte) []string {
if mod_XML_Matcher_0.Match(i) {
return "XML", true
return []string{"XML"}
} else if mod_ModulaDash2_Matcher_0.Match(i) || mod_ModulaDash2_Matcher_1.Match(i) {
return "Modula-2", true
return []string{"Modula-2"}
}
return "Linux Kernel Module", false
return []string{"Linux Kernel Module", "AMPL"}
},
".pro": func(i []byte) (string, bool) {
".pro": func(i []byte) []string {
if pro_Prolog_Matcher_0.Match(i) {
return "Prolog", true
return []string{"Prolog"}
} else if pro_INI_Matcher_0.Match(i) {
return "INI", true
return []string{"INI"}
} else if pro_QMake_Matcher_0.Match(i) && pro_QMake_Matcher_1.Match(i) {
return "QMake", true
return []string{"QMake"}
} else if pro_IDL_Matcher_0.Match(i) {
return "IDL", true
return []string{"IDL"}
}
return OtherLanguage, false
return nil
},
".rpy": func(i []byte) (string, bool) {
".rpy": func(i []byte) []string {
if rpy_Python_Matcher_0.Match(i) {
return "Python", true
return []string{"Python"}
}
return "Ren'Py", true
return []string{"Ren'Py"}
},
}

View File

@ -2,7 +2,7 @@ package slinguist
// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/simple-linguist.v1/internal/code-generator
// THIS FILE SHOULD NOT BE EDITED BY HAND
// Extracted from github/linguist commit: fe8b44ab8a225b1ffa75b983b916ea22fee5b6f7
// Extracted from github/linguist commit: 0123456789abcdef0123456789abcdef01234567
import "gopkg.in/toqueteos/substring.v1"

View File

@ -2,7 +2,7 @@ package slinguist
// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/simple-linguist.v1/internal/code-generator
// THIS FILE SHOULD NOT BE EDITED BY HAND
// Extracted from github/linguist commit: fe8b44ab8a225b1ffa75b983b916ea22fee5b6f7
// Extracted from github/linguist commit: 0123456789abcdef0123456789abcdef01234567
var languagesByExtension = map[string][]string{
".abap": {"ABAP"},
@ -10,3 +10,9 @@ var languagesByExtension = map[string][]string{
".bsl": {"1C Enterprise"},
".os": {"1C Enterprise"},
}
var extensionsByLanguage = map[string][]string{
"1C Enterprise": {".bsl", ".os"},
"ABAP": {".abap"},
"ABNF": {".abnf"},
}

View File

@ -2,11 +2,11 @@ package slinguist
// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/simple-linguist.v1/internal/code-generator
// THIS FILE SHOULD NOT BE EDITED BY HAND
// Extracted from github/linguist commit: fe8b44ab8a225b1ffa75b983b916ea22fee5b6f7
// Extracted from github/linguist commit: 0123456789abcdef0123456789abcdef01234567
var languagesByFilename = map[string]string{
"APKBUILD": "Alpine Abuild",
"CMakeLists.txt": "CMake",
"Cakefile": "CoffeeScript",
"mix.lock": "Elixir",
var languagesByFilename = map[string][]string{
"APKBUILD": {"Alpine Abuild"},
"CMakeLists.txt": {"CMake"},
"Cakefile": {"CoffeeScript"},
"mix.lock": {"Elixir"},
}

File diff suppressed because it is too large Load Diff

View File

@ -2,7 +2,7 @@ package slinguist
// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/simple-linguist.v1/internal/code-generator
// THIS FILE SHOULD NOT BE EDITED BY HAND
// Extracted from github/linguist commit: fe8b44ab8a225b1ffa75b983b916ea22fee5b6f7
// Extracted from github/linguist commit: 0123456789abcdef0123456789abcdef01234567
var languagesByInterpreter = map[string][]string{
"bash": {"Shell"},

View File

@ -2,7 +2,7 @@ package slinguist
// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/simple-linguist.v1/internal/code-generator
// THIS FILE SHOULD NOT BE EDITED BY HAND
// Extracted from github/linguist commit: fe8b44ab8a225b1ffa75b983b916ea22fee5b6f7
// Extracted from github/linguist commit: 0123456789abcdef0123456789abcdef01234567
var languagesType = map[string]Type{
"Scaml": Markup,

View File

@ -2,7 +2,7 @@ package slinguist
// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/simple-linguist.v1/internal/code-generator
// THIS FILE SHOULD NOT BE EDITED BY HAND
// Extracted from github/linguist commit: fe8b44ab8a225b1ffa75b983b916ea22fee5b6f7
// Extracted from github/linguist commit: 0123456789abcdef0123456789abcdef01234567
import "gopkg.in/toqueteos/substring.v1"

View File

@ -15,7 +15,7 @@ var typeToTypeConst = map[string]string{
"prose": "Prose",
}
// Types reads from buf and builds type.go file from typeTmplPath.
// Types reads from buf and builds source file from typeTmplPath.
func Types(data []byte, typeTmplPath, typeTmplName, commit string) ([]byte, error) {
languages := make(map[string]*languageInfo)
if err := yaml.Unmarshal(data, &languages); err != nil {

View File

@ -2,13 +2,13 @@ package generator
import (
"bytes"
"html/template"
"io"
"text/template"
yaml "gopkg.in/yaml.v2"
)
// Vendor reads from buf and builds vendor_matchers.go file from vendorTmplPath.
// Vendor reads from buf and builds source file from vendorTmplPath.
func Vendor(data []byte, vendorTmplPath, vendorTmplName, commit string) ([]byte, error) {
var regexpList []string
if err := yaml.Unmarshal(data, &regexpList); err != nil {

View File

@ -11,8 +11,8 @@ const (
// languages info file
languagesYAML = ".linguist/lib/linguist/languages.yml"
// extensions_map.go generation
extensionsFile = "extensions_map.go"
// extension.go generation
extensionsFile = "extension.go"
extensionsTmplPath = "internal/code-generator/assets/extensions.go.tmpl"
extensionsTmpl = "extensions.go.tmpl"
@ -22,35 +22,35 @@ const (
contentTmplPath = "internal/code-generator/assets/content.go.tmpl"
contentTmpl = "content.go.tmpl"
// vendor_matchers.go generation
// vendor.go generation
vendorYAML = ".linguist/lib/linguist/vendor.yml"
vendorFile = "vendor_matchers.go"
vendorFile = "vendor.go"
vendorTmplPath = "internal/code-generator/assets/vendor.go.tmpl"
vendorTmpl = "vendor.go.tmpl"
// documentation_matchers.go generation
// documentation.go generation
documentationYAML = ".linguist/lib/linguist/documentation.yml"
documentationFile = "documentation_matchers.go"
documentationFile = "documentation.go"
documentationTmplPath = "internal/code-generator/assets/documentation.go.tmpl"
documentationTmpl = "documentation.go.tmpl"
// type.go generation
typeFile = "types_map.go"
typeFile = "type.go"
typeTmplPath = "internal/code-generator/assets/types.go.tmpl"
typeTmpl = "types.go.tmpl"
// interpreters_map.go generation
interpretersFile = "interpreters_map.go"
// interpreter.go generation
interpretersFile = "interpreter.go"
interpretersTmplPath = "internal/code-generator/assets/interpreters.go.tmpl"
interpretersTmpl = "interpreters.go.tmpl"
// filenames_map.go generation
filenamesFile = "filenames_map.go"
// filename.go generation
filenamesFile = "filename.go"
filenamesTmplPath = "internal/code-generator/assets/filenames.go.tmpl"
filenamesTmpl = "filenames.go.tmpl"
// aliases_map.go generation
aliasesFile = "aliases_map.go"
// alias.go generation
aliasesFile = "alias.go"
aliasesTmplPath = "internal/code-generator/assets/aliases.go.tmpl"
aliasesTmpl = "aliases.go.tmpl"

View File

@ -5,7 +5,13 @@ import (
"regexp"
)
const byteLimit = 100000
func Tokenize(content []byte) []string {
if len(content) > byteLimit {
content = content[:byteLimit]
}
tokens := make([][]byte, 0, 50)
for _, extract := range extractTokens {
var extractedTokens [][]byte

View File

@ -2,7 +2,7 @@ package slinguist
// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/simple-linguist.v1/internal/code-generator
// THIS FILE SHOULD NOT BE EDITED BY HAND
// Extracted from github/linguist commit: 60f864a138650dd17fafc94814be9ee2d3aaef8c
// Extracted from github/linguist commit: b6460f8ed6b249281ada099ca28bd8f1230b8892
var languagesByInterpreter = map[string][]string{
"Rscript": {"R"},

View File

@ -5,40 +5,60 @@ import (
"regexp"
)
// GetLanguageByModeline returns the language of the given content looking for the modeline,
// and safe to indicate the sureness of returned language.
func GetLanguageByModeline(content []byte) (lang string, safe bool) {
const (
searchScope = 5
)
// GetLanguagesByModeline returns a slice of possible languages for the given content, filename will be ignored.
// It accomplish the signature to be a Strategy type.
func GetLanguagesByModeline(filename string, content []byte) []string {
headFoot := getHeaderAndFooter(content)
var languages []string
for _, getLang := range modelinesFunc {
lang, safe = getLang(headFoot)
if safe {
languages = getLang("", headFoot)
if len(languages) > 0 {
break
}
}
return
return languages
}
func getHeaderAndFooter(content []byte) []byte {
const (
searchScope = 5
eol = "\n"
)
if bytes.Count(content, []byte(eol)) < 2*searchScope {
if bytes.Count(content, []byte("\n")) < 2*searchScope {
return content
}
splitted := bytes.Split(content, []byte(eol))
header := splitted[:searchScope]
footer := splitted[len(splitted)-searchScope:]
headerAndFooter := append(header, footer...)
return bytes.Join(headerAndFooter, []byte(eol))
header := headScope(content, searchScope)
footer := footScope(content, searchScope)
headerAndFooter := make([]byte, 0, len(content[:header])+len(content[footer:]))
headerAndFooter = append(headerAndFooter, content[:header]...)
headerAndFooter = append(headerAndFooter, content[footer:]...)
return headerAndFooter
}
var modelinesFunc = []func(content []byte) (string, bool){
GetLanguageByEmacsModeline,
GetLanguageByVimModeline,
func headScope(content []byte, scope int) (index int) {
for i := 0; i < scope; i++ {
eol := bytes.IndexAny(content, "\n")
content = content[eol+1:]
index += eol
}
return index + scope - 1
}
func footScope(content []byte, scope int) (index int) {
for i := 0; i < scope; i++ {
index = bytes.LastIndexAny(content, "\n")
content = content[:index]
}
return index + 1
}
var modelinesFunc = []func(filename string, content []byte) []string{
GetLanguagesByEmacsModeline,
GetLanguagesByVimModeline,
}
var (
@ -51,9 +71,20 @@ var (
// GetLanguageByEmacsModeline detecs if the content has a emacs modeline and try to get a
// language basing on alias. If couldn't retrieve a valid language, it returns OtherLanguage and false.
func GetLanguageByEmacsModeline(content []byte) (string, bool) {
languages := GetLanguagesByEmacsModeline("", content)
if len(languages) == 0 {
return OtherLanguage, false
}
return languages[0], true
}
// GetLanguagesByEmacsModeline returns a slice of possible languages for the given content, filename will be ignored.
// It accomplish the signature to be a Strategy type.
func GetLanguagesByEmacsModeline(filename string, content []byte) []string {
matched := reEmacsModeline.FindAllSubmatch(content, -1)
if matched == nil {
return OtherLanguage, false
return nil
}
// only take the last matched line, discard previous lines
@ -66,22 +97,38 @@ func GetLanguageByEmacsModeline(content []byte) (string, bool) {
alias = string(lastLineMatched)
}
return GetLanguageByAlias(alias)
language, ok := GetLanguageByAlias(alias)
if !ok {
return nil
}
return []string{language}
}
// GetLanguageByVimModeline detecs if the content has a vim modeline and try to get a
// language basing on alias. If couldn't retrieve a valid language, it returns OtherLanguage and false.
func GetLanguageByVimModeline(content []byte) (string, bool) {
languages := GetLanguagesByVimModeline("", content)
if len(languages) == 0 {
return OtherLanguage, false
}
return languages[0], true
}
// GetLanguagesByVimModeline returns a slice of possible languages for the given content, filename will be ignored.
// It accomplish the signature to be a Strategy type.
func GetLanguagesByVimModeline(filename string, content []byte) []string {
matched := reVimModeline.FindAllSubmatch(content, -1)
if matched == nil {
return OtherLanguage, false
return nil
}
// only take the last matched line, discard previous lines
lastLineMatched := matched[len(matched)-1][1]
matchedAlias := reVimLang.FindAllSubmatch(lastLineMatched, -1)
if matchedAlias == nil {
return OtherLanguage, false
return nil
}
alias := string(matchedAlias[0][1])
@ -92,11 +139,15 @@ func GetLanguageByVimModeline(content []byte) (string, bool) {
for _, match := range matchedAlias {
otherAlias := string(match[1])
if otherAlias != alias {
alias = OtherLanguage
break
return nil
}
}
}
return GetLanguageByAlias(alias)
language, ok := GetLanguageByAlias(alias)
if !ok {
return nil
}
return []string{language}
}

View File

@ -1,87 +0,0 @@
package slinguist
import (
"io/ioutil"
"path/filepath"
. "gopkg.in/check.v1"
)
const (
modelinesDir = ".linguist/test/fixtures/Data/Modelines"
samplesDir = ".linguist/samples"
)
func (s *TSuite) TestGetLanguageByModeline(c *C) {
linguistTests := []struct {
filename string
expectedLang string
expectedSafe bool
}{
// Emacs
{filename: filepath.Join(modelinesDir, "example_smalltalk.md"), expectedLang: "Smalltalk", expectedSafe: true},
{filename: filepath.Join(modelinesDir, "fundamentalEmacs.c"), expectedLang: "Text", expectedSafe: true},
{filename: filepath.Join(modelinesDir, "iamphp.inc"), expectedLang: "PHP", expectedSafe: true},
{filename: filepath.Join(modelinesDir, "seeplusplusEmacs1"), expectedLang: "C++", expectedSafe: true},
{filename: filepath.Join(modelinesDir, "seeplusplusEmacs2"), expectedLang: "C++", expectedSafe: true},
{filename: filepath.Join(modelinesDir, "seeplusplusEmacs3"), expectedLang: "C++", expectedSafe: true},
{filename: filepath.Join(modelinesDir, "seeplusplusEmacs4"), expectedLang: "C++", expectedSafe: true},
{filename: filepath.Join(modelinesDir, "seeplusplusEmacs5"), expectedLang: "C++", expectedSafe: true},
{filename: filepath.Join(modelinesDir, "seeplusplusEmacs6"), expectedLang: "C++", expectedSafe: true},
{filename: filepath.Join(modelinesDir, "seeplusplusEmacs7"), expectedLang: "C++", expectedSafe: true},
{filename: filepath.Join(modelinesDir, "seeplusplusEmacs9"), expectedLang: "C++", expectedSafe: true},
{filename: filepath.Join(modelinesDir, "seeplusplusEmacs10"), expectedLang: "C++", expectedSafe: true},
{filename: filepath.Join(modelinesDir, "seeplusplusEmacs11"), expectedLang: "C++", expectedSafe: true},
{filename: filepath.Join(modelinesDir, "seeplusplusEmacs12"), expectedLang: "C++", expectedSafe: true},
// Vim
{filename: filepath.Join(modelinesDir, "seeplusplus"), expectedLang: "C++", expectedSafe: true},
{filename: filepath.Join(modelinesDir, "iamjs.pl"), expectedLang: "JavaScript", expectedSafe: true},
{filename: filepath.Join(modelinesDir, "iamjs2.pl"), expectedLang: "JavaScript", expectedSafe: true},
{filename: filepath.Join(modelinesDir, "not_perl.pl"), expectedLang: "Prolog", expectedSafe: true},
{filename: filepath.Join(modelinesDir, "ruby"), expectedLang: "Ruby", expectedSafe: true},
{filename: filepath.Join(modelinesDir, "ruby2"), expectedLang: "Ruby", expectedSafe: true},
{filename: filepath.Join(modelinesDir, "ruby3"), expectedLang: "Ruby", expectedSafe: true},
{filename: filepath.Join(modelinesDir, "ruby4"), expectedLang: "Ruby", expectedSafe: true},
{filename: filepath.Join(modelinesDir, "ruby5"), expectedLang: "Ruby", expectedSafe: true},
{filename: filepath.Join(modelinesDir, "ruby6"), expectedLang: "Ruby", expectedSafe: true},
{filename: filepath.Join(modelinesDir, "ruby7"), expectedLang: "Ruby", expectedSafe: true},
{filename: filepath.Join(modelinesDir, "ruby8"), expectedLang: "Ruby", expectedSafe: true},
{filename: filepath.Join(modelinesDir, "ruby9"), expectedLang: "Ruby", expectedSafe: true},
{filename: filepath.Join(modelinesDir, "ruby10"), expectedLang: "Ruby", expectedSafe: true},
{filename: filepath.Join(modelinesDir, "ruby11"), expectedLang: "Ruby", expectedSafe: true},
{filename: filepath.Join(modelinesDir, "ruby12"), expectedLang: "Ruby", expectedSafe: true},
{filename: filepath.Join(samplesDir, "C/main.c"), expectedLang: OtherLanguage, expectedSafe: false},
}
for _, test := range linguistTests {
content, err := ioutil.ReadFile(test.filename)
c.Assert(err, Equals, nil)
lang, safe := GetLanguageByModeline(content)
c.Assert(lang, Equals, test.expectedLang)
c.Assert(safe, Equals, test.expectedSafe)
}
const (
wrongVim = `# vim: set syntax=ruby ft =python filetype=perl :`
rightVim = `/* vim: set syntax=python ft =python filetype=python */`
noLangVim = `/* vim: set shiftwidth=4 softtabstop=0 cindent cinoptions={1s: */`
)
tests := []struct {
content []byte
expectedLang string
expectedSafe bool
}{
{content: []byte(wrongVim), expectedLang: OtherLanguage, expectedSafe: false},
{content: []byte(rightVim), expectedLang: "Python", expectedSafe: true},
{content: []byte(noLangVim), expectedLang: OtherLanguage, expectedSafe: false},
}
for _, test := range tests {
lang, safe := GetLanguageByModeline(test.content)
c.Assert(lang, Equals, test.expectedLang)
c.Assert(safe, Equals, test.expectedSafe)
}
}

View File

@ -14,17 +14,11 @@ var (
pythonVersion = regexp.MustCompile(`python\d\.\d+`)
)
// GetLanguageByShebang returns the language of the given content looking for the shebang line,
// and safe to indicate the sureness of returned language.
func GetLanguageByShebang(content []byte) (lang string, safe bool) {
// GetLanguagesByShebang returns a slice of possible languages for the given content, filename will be ignored.
// It accomplish the signature to be a Strategy type.
func GetLanguagesByShebang(filename string, content []byte) (languages []string) {
interpreter := getInterpreter(content)
lang = OtherLanguage
if langs, ok := languagesByInterpreter[interpreter]; ok {
lang = langs[0]
safe = len(langs) == 1
}
return
return languagesByInterpreter[interpreter]
}
func getInterpreter(data []byte) (interpreter string) {

View File

@ -1,60 +0,0 @@
package slinguist
import . "gopkg.in/check.v1"
const (
multilineExecHack = `#!/bin/sh
# Next line is comment in Tcl, but not in sh... \
exec tclsh "$0" ${1+"$@"}`
multilineNoExecHack = `#!/bin/sh
#<<<#
echo "A shell script in a zkl program ($0)"
echo "Now run zkl <this file> with Hello World as args"
zkl $0 Hello World!
exit
#<<<#
println("The shell script says ",vm.arglist.concat(" "));`
)
func (s *TSuite) TestGetLanguageByShebang(c *C) {
lang, safe := GetLanguageByShebang([]byte(`#!/unknown/interpreter`))
c.Assert(lang, Equals, OtherLanguage)
c.Assert(safe, Equals, false)
lang, safe = GetLanguageByShebang([]byte(`no shebang`))
c.Assert(lang, Equals, OtherLanguage)
c.Assert(safe, Equals, false)
lang, safe = GetLanguageByShebang([]byte(`#!/usr/bin/env`))
c.Assert(lang, Equals, OtherLanguage)
c.Assert(safe, Equals, false)
lang, safe = GetLanguageByShebang([]byte(`#!/usr/bin/python -tt`))
c.Assert(lang, Equals, "Python")
c.Assert(safe, Equals, true)
lang, safe = GetLanguageByShebang([]byte(`#!/usr/bin/env python2.6`))
c.Assert(lang, Equals, "Python")
c.Assert(safe, Equals, true)
lang, safe = GetLanguageByShebang([]byte(`#!/usr/bin/env perl`))
c.Assert(lang, Equals, "Perl")
c.Assert(safe, Equals, true)
lang, safe = GetLanguageByShebang([]byte(`#! /bin/sh`))
c.Assert(lang, Equals, "Shell")
c.Assert(safe, Equals, true)
lang, safe = GetLanguageByShebang([]byte(`#!bash`))
c.Assert(lang, Equals, "Shell")
c.Assert(safe, Equals, true)
lang, safe = GetLanguageByShebang([]byte(multilineExecHack))
c.Assert(lang, Equals, "Tcl")
c.Assert(safe, Equals, true)
lang, safe = GetLanguageByShebang([]byte(multilineNoExecHack))
c.Assert(lang, Equals, "Shell")
c.Assert(safe, Equals, true)
}

473
type.go
View File

@ -1,22 +1,459 @@
package slinguist
type Type int
// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/simple-linguist.v1/internal/code-generator
// THIS FILE SHOULD NOT BE EDITED BY HAND
// Extracted from github/linguist commit: b6460f8ed6b249281ada099ca28bd8f1230b8892
const (
// Language's type. Either data, programming, markup, prose, or unknown.
Unknown Type = iota
Data
Programming
Markup
Prose
)
// GetLanguageType returns the given language's type.
func GetLanguageType(language string) (langType Type) {
langType, ok := languagesType[language]
if !ok {
langType = Unknown
}
return langType
var languagesType = map[string]Type{
"1C Enterprise": Programming,
"ABAP": Programming,
"ABNF": Data,
"AGS Script": Programming,
"AMPL": Programming,
"ANTLR": Programming,
"API Blueprint": Markup,
"APL": Programming,
"ASN.1": Data,
"ASP": Programming,
"ATS": Programming,
"ActionScript": Programming,
"Ada": Programming,
"Agda": Programming,
"Alloy": Programming,
"Alpine Abuild": Programming,
"Ant Build System": Data,
"ApacheConf": Markup,
"Apex": Programming,
"Apollo Guidance Computer": Programming,
"AppleScript": Programming,
"Arc": Programming,
"Arduino": Programming,
"AsciiDoc": Prose,
"AspectJ": Programming,
"Assembly": Programming,
"Augeas": Programming,
"AutoHotkey": Programming,
"AutoIt": Programming,
"Awk": Programming,
"Batchfile": Programming,
"Befunge": Programming,
"Bison": Programming,
"BitBake": Programming,
"Blade": Markup,
"BlitzBasic": Programming,
"BlitzMax": Programming,
"Bluespec": Programming,
"Boo": Programming,
"Brainfuck": Programming,
"Brightscript": Programming,
"Bro": Programming,
"C": Programming,
"C#": Programming,
"C++": Programming,
"C-ObjDump": Data,
"C2hs Haskell": Programming,
"CLIPS": Programming,
"CMake": Programming,
"COBOL": Programming,
"COLLADA": Data,
"CSON": Data,
"CSS": Markup,
"CSV": Data,
"CWeb": Programming,
"Cap'n Proto": Programming,
"CartoCSS": Programming,
"Ceylon": Programming,
"Chapel": Programming,
"Charity": Programming,
"ChucK": Programming,
"Cirru": Programming,
"Clarion": Programming,
"Clean": Programming,
"Click": Programming,
"Clojure": Programming,
"Closure Templates": Markup,
"CoffeeScript": Programming,
"ColdFusion": Programming,
"ColdFusion CFC": Programming,
"Common Lisp": Programming,
"Component Pascal": Programming,
"Cool": Programming,
"Coq": Programming,
"Cpp-ObjDump": Data,
"Creole": Prose,
"Crystal": Programming,
"Csound": Programming,
"Csound Document": Programming,
"Csound Score": Programming,
"Cuda": Programming,
"Cycript": Programming,
"Cython": Programming,
"D": Programming,
"D-ObjDump": Data,
"DIGITAL Command Language": Programming,
"DM": Programming,
"DNS Zone": Data,
"DTrace": Programming,
"Darcs Patch": Data,
"Dart": Programming,
"Diff": Data,
"Dockerfile": Data,
"Dogescript": Programming,
"Dylan": Programming,
"E": Programming,
"EBNF": Data,
"ECL": Programming,
"ECLiPSe": Programming,
"EJS": Markup,
"EQ": Programming,
"Eagle": Markup,
"Ecere Projects": Data,
"Eiffel": Programming,
"Elixir": Programming,
"Elm": Programming,
"Emacs Lisp": Programming,
"EmberScript": Programming,
"Erlang": Programming,
"F#": Programming,
"FLUX": Programming,
"Factor": Programming,
"Fancy": Programming,
"Fantom": Programming,
"Filebench WML": Programming,
"Filterscript": Programming,
"Formatted": Data,
"Forth": Programming,
"Fortran": Programming,
"FreeMarker": Programming,
"Frege": Programming,
"G-code": Data,
"GAMS": Programming,
"GAP": Programming,
"GCC Machine Description": Programming,
"GDB": Programming,
"GDScript": Programming,
"GLSL": Programming,
"GN": Data,
"Game Maker Language": Programming,
"Genie": Programming,
"Genshi": Programming,
"Gentoo Ebuild": Programming,
"Gentoo Eclass": Programming,
"Gettext Catalog": Prose,
"Gherkin": Programming,
"Glyph": Programming,
"Gnuplot": Programming,
"Go": Programming,
"Golo": Programming,
"Gosu": Programming,
"Grace": Programming,
"Gradle": Data,
"Grammatical Framework": Programming,
"Graph Modeling Language": Data,
"GraphQL": Data,
"Graphviz (DOT)": Data,
"Groovy": Programming,
"Groovy Server Pages": Programming,
"HCL": Programming,
"HLSL": Programming,
"HTML": Markup,
"HTML+Django": Markup,
"HTML+ECR": Markup,
"HTML+EEX": Markup,
"HTML+ERB": Markup,
"HTML+PHP": Markup,
"HTTP": Data,
"Hack": Programming,
"Haml": Markup,
"Handlebars": Markup,
"Harbour": Programming,
"Haskell": Programming,
"Haxe": Programming,
"Hy": Programming,
"HyPhy": Programming,
"IDL": Programming,
"IGOR Pro": Programming,
"INI": Data,
"IRC log": Data,
"Idris": Programming,
"Inform 7": Programming,
"Inno Setup": Programming,
"Io": Programming,
"Ioke": Programming,
"Isabelle": Programming,
"Isabelle ROOT": Programming,
"J": Programming,
"JFlex": Programming,
"JSON": Data,
"JSON5": Data,
"JSONLD": Data,
"JSONiq": Programming,
"JSX": Programming,
"Jasmin": Programming,
"Java": Programming,
"Java Server Pages": Programming,
"JavaScript": Programming,
"Jison": Programming,
"Jison Lex": Programming,
"Jolie": Programming,
"Julia": Programming,
"Jupyter Notebook": Markup,
"KRL": Programming,
"KiCad": Programming,
"Kit": Markup,
"Kotlin": Programming,
"LFE": Programming,
"LLVM": Programming,
"LOLCODE": Programming,
"LSL": Programming,
"LabVIEW": Programming,
"Lasso": Programming,
"Latte": Markup,
"Lean": Programming,
"Less": Markup,
"Lex": Programming,
"LilyPond": Programming,
"Limbo": Programming,
"Linker Script": Data,
"Linux Kernel Module": Data,
"Liquid": Markup,
"Literate Agda": Programming,
"Literate CoffeeScript": Programming,
"Literate Haskell": Programming,
"LiveScript": Programming,
"Logos": Programming,
"Logtalk": Programming,
"LookML": Programming,
"LoomScript": Programming,
"Lua": Programming,
"M": Programming,
"M4": Programming,
"M4Sugar": Programming,
"MAXScript": Programming,
"MQL4": Programming,
"MQL5": Programming,
"MTML": Markup,
"MUF": Programming,
"Makefile": Programming,
"Mako": Programming,
"Markdown": Prose,
"Marko": Markup,
"Mask": Markup,
"Mathematica": Programming,
"Matlab": Programming,
"Maven POM": Data,
"Max": Programming,
"MediaWiki": Prose,
"Mercury": Programming,
"Meson": Programming,
"Metal": Programming,
"MiniD": Programming,
"Mirah": Programming,
"Modelica": Programming,
"Modula-2": Programming,
"Module Management System": Programming,
"Monkey": Programming,
"Moocode": Programming,
"MoonScript": Programming,
"Myghty": Programming,
"NCL": Programming,
"NL": Data,
"NSIS": Programming,
"Nemerle": Programming,
"NetLinx": Programming,
"NetLinx+ERB": Programming,
"NetLogo": Programming,
"NewLisp": Programming,
"Nginx": Markup,
"Nim": Programming,
"Ninja": Data,
"Nit": Programming,
"Nix": Programming,
"Nu": Programming,
"NumPy": Programming,
"OCaml": Programming,
"ObjDump": Data,
"Objective-C": Programming,
"Objective-C++": Programming,
"Objective-J": Programming,
"Omgrofl": Programming,
"Opa": Programming,
"Opal": Programming,
"OpenCL": Programming,
"OpenEdge ABL": Programming,
"OpenRC runscript": Programming,
"OpenSCAD": Programming,
"OpenType Feature File": Data,
"Org": Prose,
"Ox": Programming,
"Oxygene": Programming,
"Oz": Programming,
"P4": Programming,
"PAWN": Programming,
"PHP": Programming,
"PLSQL": Programming,
"PLpgSQL": Programming,
"POV-Ray SDL": Programming,
"Pan": Programming,
"Papyrus": Programming,
"Parrot": Programming,
"Parrot Assembly": Programming,
"Parrot Internal Representation": Programming,
"Pascal": Programming,
"Pep8": Programming,
"Perl": Programming,
"Perl6": Programming,
"Pic": Markup,
"Pickle": Data,
"PicoLisp": Programming,
"PigLatin": Programming,
"Pike": Programming,
"Pod": Prose,
"PogoScript": Programming,
"Pony": Programming,
"PostScript": Markup,
"PowerBuilder": Programming,
"PowerShell": Programming,
"Processing": Programming,
"Prolog": Programming,
"Propeller Spin": Programming,
"Protocol Buffer": Markup,
"Public Key": Data,
"Pug": Markup,
"Puppet": Programming,
"Pure Data": Programming,
"PureBasic": Programming,
"PureScript": Programming,
"Python": Programming,
"Python console": Programming,
"Python traceback": Data,
"QML": Programming,
"QMake": Programming,
"R": Programming,
"RAML": Markup,
"RDoc": Prose,
"REALbasic": Programming,
"REXX": Programming,
"RHTML": Markup,
"RMarkdown": Prose,
"RPM Spec": Data,
"RUNOFF": Markup,
"Racket": Programming,
"Ragel": Programming,
"Rascal": Programming,
"Raw token data": Data,
"Reason": Programming,
"Rebol": Programming,
"Red": Programming,
"Redcode": Programming,
"Regular Expression": Data,
"Ren'Py": Programming,
"RenderScript": Programming,
"RobotFramework": Programming,
"Roff": Markup,
"Rouge": Programming,
"Ruby": Programming,
"Rust": Programming,
"SAS": Programming,
"SCSS": Markup,
"SMT": Programming,
"SPARQL": Data,
"SQF": Programming,
"SQL": Data,
"SQLPL": Programming,
"SRecode Template": Markup,
"STON": Data,
"SVG": Data,
"Sage": Programming,
"SaltStack": Programming,
"Sass": Markup,
"Scala": Programming,
"Scaml": Markup,
"Scheme": Programming,
"Scilab": Programming,
"Self": Programming,
"ShaderLab": Programming,
"Shell": Programming,
"ShellSession": Programming,
"Shen": Programming,
"Slash": Programming,
"Slim": Markup,
"Smali": Programming,
"Smalltalk": Programming,
"Smarty": Programming,
"SourcePawn": Programming,
"Spline Font Database": Data,
"Squirrel": Programming,
"Stan": Programming,
"Standard ML": Programming,
"Stata": Programming,
"Stylus": Markup,
"SubRip Text": Data,
"Sublime Text Config": Data,
"SuperCollider": Programming,
"Swift": Programming,
"SystemVerilog": Programming,
"TI Program": Programming,
"TLA": Programming,
"TOML": Data,
"TXL": Programming,
"Tcl": Programming,
"Tcsh": Programming,
"TeX": Markup,
"Tea": Markup,
"Terra": Programming,
"Text": Prose,
"Textile": Prose,
"Thrift": Programming,
"Turing": Programming,
"Turtle": Data,
"Twig": Markup,
"Type Language": Data,
"TypeScript": Programming,
"Unified Parallel C": Programming,
"Unity3D Asset": Data,
"Unix Assembly": Programming,
"Uno": Programming,
"UnrealScript": Programming,
"UrWeb": Programming,
"VCL": Programming,
"VHDL": Programming,
"Vala": Programming,
"Verilog": Programming,
"Vim script": Programming,
"Visual Basic": Programming,
"Volt": Programming,
"Vue": Markup,
"Wavefront Material": Data,
"Wavefront Object": Data,
"Web Ontology Language": Markup,
"WebAssembly": Programming,
"WebIDL": Programming,
"World of Warcraft Addon Data": Data,
"X10": Programming,
"XC": Programming,
"XCompose": Data,
"XML": Data,
"XPages": Programming,
"XProc": Programming,
"XQuery": Programming,
"XS": Programming,
"XSLT": Programming,
"Xojo": Programming,
"Xtend": Programming,
"YAML": Data,
"YANG": Data,
"Yacc": Programming,
"Zephir": Programming,
"Zimpl": Programming,
"desktop": Data,
"eC": Programming,
"edn": Data,
"fish": Programming,
"mupad": Programming,
"nesC": Programming,
"ooc": Programming,
"reStructuredText": Prose,
"wisp": Programming,
"xBase": Programming,
}

View File

@ -1,35 +0,0 @@
package slinguist
import . "gopkg.in/check.v1"
func (s *TSuite) TestGetLanguageType(c *C) {
langType := GetLanguageType("BestLanguageEver")
c.Assert(langType, Equals, Unknown)
langType = GetLanguageType("JSON")
c.Assert(langType, Equals, Data)
langType = GetLanguageType("COLLADA")
c.Assert(langType, Equals, Data)
langType = GetLanguageType("Go")
c.Assert(langType, Equals, Programming)
langType = GetLanguageType("Brainfuck")
c.Assert(langType, Equals, Programming)
langType = GetLanguageType("HTML")
c.Assert(langType, Equals, Markup)
langType = GetLanguageType("Sass")
c.Assert(langType, Equals, Markup)
langType = GetLanguageType("AsciiDoc")
c.Assert(langType, Equals, Prose)
langType = GetLanguageType("Textile")
c.Assert(langType, Equals, Prose)
langType = GetLanguageType("Whatever")
c.Assert(langType, Equals, Unknown)
}

View File

@ -1,458 +0,0 @@
package slinguist
// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/simple-linguist.v1/internal/code-generator
// THIS FILE SHOULD NOT BE EDITED BY HAND
// Extracted from github/linguist commit: 60f864a138650dd17fafc94814be9ee2d3aaef8c
var languagesType = map[string]Type{
"1C Enterprise": Programming,
"ABAP": Programming,
"ABNF": Data,
"AGS Script": Programming,
"AMPL": Programming,
"ANTLR": Programming,
"API Blueprint": Markup,
"APL": Programming,
"ASN.1": Data,
"ASP": Programming,
"ATS": Programming,
"ActionScript": Programming,
"Ada": Programming,
"Agda": Programming,
"Alloy": Programming,
"Alpine Abuild": Programming,
"Ant Build System": Data,
"ApacheConf": Markup,
"Apex": Programming,
"Apollo Guidance Computer": Programming,
"AppleScript": Programming,
"Arc": Programming,
"Arduino": Programming,
"AsciiDoc": Prose,
"AspectJ": Programming,
"Assembly": Programming,
"Augeas": Programming,
"AutoHotkey": Programming,
"AutoIt": Programming,
"Awk": Programming,
"Batchfile": Programming,
"Befunge": Programming,
"Bison": Programming,
"BitBake": Programming,
"Blade": Markup,
"BlitzBasic": Programming,
"BlitzMax": Programming,
"Bluespec": Programming,
"Boo": Programming,
"Brainfuck": Programming,
"Brightscript": Programming,
"Bro": Programming,
"C": Programming,
"C#": Programming,
"C++": Programming,
"C-ObjDump": Data,
"C2hs Haskell": Programming,
"CLIPS": Programming,
"CMake": Programming,
"COBOL": Programming,
"COLLADA": Data,
"CSON": Data,
"CSS": Markup,
"CSV": Data,
"CWeb": Programming,
"Cap'n Proto": Programming,
"CartoCSS": Programming,
"Ceylon": Programming,
"Chapel": Programming,
"Charity": Programming,
"ChucK": Programming,
"Cirru": Programming,
"Clarion": Programming,
"Clean": Programming,
"Click": Programming,
"Clojure": Programming,
"Closure Templates": Markup,
"CoffeeScript": Programming,
"ColdFusion": Programming,
"ColdFusion CFC": Programming,
"Common Lisp": Programming,
"Component Pascal": Programming,
"Cool": Programming,
"Coq": Programming,
"Cpp-ObjDump": Data,
"Creole": Prose,
"Crystal": Programming,
"Csound": Programming,
"Csound Document": Programming,
"Csound Score": Programming,
"Cuda": Programming,
"Cycript": Programming,
"Cython": Programming,
"D": Programming,
"D-ObjDump": Data,
"DIGITAL Command Language": Programming,
"DM": Programming,
"DNS Zone": Data,
"DTrace": Programming,
"Darcs Patch": Data,
"Dart": Programming,
"Diff": Data,
"Dockerfile": Data,
"Dogescript": Programming,
"Dylan": Programming,
"E": Programming,
"EBNF": Data,
"ECL": Programming,
"ECLiPSe": Programming,
"EJS": Markup,
"EQ": Programming,
"Eagle": Markup,
"Ecere Projects": Data,
"Eiffel": Programming,
"Elixir": Programming,
"Elm": Programming,
"Emacs Lisp": Programming,
"EmberScript": Programming,
"Erlang": Programming,
"F#": Programming,
"FLUX": Programming,
"Factor": Programming,
"Fancy": Programming,
"Fantom": Programming,
"Filebench WML": Programming,
"Filterscript": Programming,
"Formatted": Data,
"Forth": Programming,
"Fortran": Programming,
"FreeMarker": Programming,
"Frege": Programming,
"G-code": Data,
"GAMS": Programming,
"GAP": Programming,
"GCC Machine Description": Programming,
"GDB": Programming,
"GDScript": Programming,
"GLSL": Programming,
"GN": Data,
"Game Maker Language": Programming,
"Genie": Programming,
"Genshi": Programming,
"Gentoo Ebuild": Programming,
"Gentoo Eclass": Programming,
"Gettext Catalog": Prose,
"Gherkin": Programming,
"Glyph": Programming,
"Gnuplot": Programming,
"Go": Programming,
"Golo": Programming,
"Gosu": Programming,
"Grace": Programming,
"Gradle": Data,
"Grammatical Framework": Programming,
"Graph Modeling Language": Data,
"GraphQL": Data,
"Graphviz (DOT)": Data,
"Groovy": Programming,
"Groovy Server Pages": Programming,
"HCL": Programming,
"HLSL": Programming,
"HTML": Markup,
"HTML+Django": Markup,
"HTML+ECR": Markup,
"HTML+EEX": Markup,
"HTML+ERB": Markup,
"HTML+PHP": Markup,
"HTTP": Data,
"Hack": Programming,
"Haml": Markup,
"Handlebars": Markup,
"Harbour": Programming,
"Haskell": Programming,
"Haxe": Programming,
"Hy": Programming,
"HyPhy": Programming,
"IDL": Programming,
"IGOR Pro": Programming,
"INI": Data,
"IRC log": Data,
"Idris": Programming,
"Inform 7": Programming,
"Inno Setup": Programming,
"Io": Programming,
"Ioke": Programming,
"Isabelle": Programming,
"Isabelle ROOT": Programming,
"J": Programming,
"JFlex": Programming,
"JSON": Data,
"JSON5": Data,
"JSONLD": Data,
"JSONiq": Programming,
"JSX": Programming,
"Jasmin": Programming,
"Java": Programming,
"Java Server Pages": Programming,
"JavaScript": Programming,
"Jison": Programming,
"Jison Lex": Programming,
"Jolie": Programming,
"Julia": Programming,
"Jupyter Notebook": Markup,
"KRL": Programming,
"KiCad": Programming,
"Kit": Markup,
"Kotlin": Programming,
"LFE": Programming,
"LLVM": Programming,
"LOLCODE": Programming,
"LSL": Programming,
"LabVIEW": Programming,
"Lasso": Programming,
"Latte": Markup,
"Lean": Programming,
"Less": Markup,
"Lex": Programming,
"LilyPond": Programming,
"Limbo": Programming,
"Linker Script": Data,
"Linux Kernel Module": Data,
"Liquid": Markup,
"Literate Agda": Programming,
"Literate CoffeeScript": Programming,
"Literate Haskell": Programming,
"LiveScript": Programming,
"Logos": Programming,
"Logtalk": Programming,
"LookML": Programming,
"LoomScript": Programming,
"Lua": Programming,
"M": Programming,
"M4": Programming,
"M4Sugar": Programming,
"MAXScript": Programming,
"MQL4": Programming,
"MQL5": Programming,
"MTML": Markup,
"MUF": Programming,
"Makefile": Programming,
"Mako": Programming,
"Markdown": Prose,
"Marko": Markup,
"Mask": Markup,
"Mathematica": Programming,
"Matlab": Programming,
"Maven POM": Data,
"Max": Programming,
"MediaWiki": Prose,
"Mercury": Programming,
"Meson": Programming,
"Metal": Programming,
"MiniD": Programming,
"Mirah": Programming,
"Modelica": Programming,
"Modula-2": Programming,
"Module Management System": Programming,
"Monkey": Programming,
"Moocode": Programming,
"MoonScript": Programming,
"Myghty": Programming,
"NCL": Programming,
"NL": Data,
"NSIS": Programming,
"Nemerle": Programming,
"NetLinx": Programming,
"NetLinx+ERB": Programming,
"NetLogo": Programming,
"NewLisp": Programming,
"Nginx": Markup,
"Nim": Programming,
"Ninja": Data,
"Nit": Programming,
"Nix": Programming,
"Nu": Programming,
"NumPy": Programming,
"OCaml": Programming,
"ObjDump": Data,
"Objective-C": Programming,
"Objective-C++": Programming,
"Objective-J": Programming,
"Omgrofl": Programming,
"Opa": Programming,
"Opal": Programming,
"OpenCL": Programming,
"OpenEdge ABL": Programming,
"OpenRC runscript": Programming,
"OpenSCAD": Programming,
"OpenType Feature File": Data,
"Org": Prose,
"Ox": Programming,
"Oxygene": Programming,
"Oz": Programming,
"P4": Programming,
"PAWN": Programming,
"PHP": Programming,
"PLSQL": Programming,
"PLpgSQL": Programming,
"POV-Ray SDL": Programming,
"Pan": Programming,
"Papyrus": Programming,
"Parrot": Programming,
"Parrot Assembly": Programming,
"Parrot Internal Representation": Programming,
"Pascal": Programming,
"Pep8": Programming,
"Perl": Programming,
"Perl6": Programming,
"Pic": Markup,
"Pickle": Data,
"PicoLisp": Programming,
"PigLatin": Programming,
"Pike": Programming,
"Pod": Prose,
"PogoScript": Programming,
"Pony": Programming,
"PostScript": Markup,
"PowerBuilder": Programming,
"PowerShell": Programming,
"Processing": Programming,
"Prolog": Programming,
"Propeller Spin": Programming,
"Protocol Buffer": Markup,
"Public Key": Data,
"Pug": Markup,
"Puppet": Programming,
"Pure Data": Programming,
"PureBasic": Programming,
"PureScript": Programming,
"Python": Programming,
"Python console": Programming,
"Python traceback": Data,
"QML": Programming,
"QMake": Programming,
"R": Programming,
"RAML": Markup,
"RDoc": Prose,
"REALbasic": Programming,
"REXX": Programming,
"RHTML": Markup,
"RMarkdown": Prose,
"RPM Spec": Data,
"RUNOFF": Markup,
"Racket": Programming,
"Ragel": Programming,
"Rascal": Programming,
"Raw token data": Data,
"Reason": Programming,
"Rebol": Programming,
"Red": Programming,
"Redcode": Programming,
"Regular Expression": Data,
"Ren'Py": Programming,
"RenderScript": Programming,
"RobotFramework": Programming,
"Roff": Markup,
"Rouge": Programming,
"Ruby": Programming,
"Rust": Programming,
"SAS": Programming,
"SCSS": Markup,
"SMT": Programming,
"SPARQL": Data,
"SQF": Programming,
"SQL": Data,
"SQLPL": Programming,
"SRecode Template": Markup,
"STON": Data,
"SVG": Data,
"Sage": Programming,
"SaltStack": Programming,
"Sass": Markup,
"Scala": Programming,
"Scaml": Markup,
"Scheme": Programming,
"Scilab": Programming,
"Self": Programming,
"ShaderLab": Programming,
"Shell": Programming,
"ShellSession": Programming,
"Shen": Programming,
"Slash": Programming,
"Slim": Markup,
"Smali": Programming,
"Smalltalk": Programming,
"Smarty": Programming,
"SourcePawn": Programming,
"Spline Font Database": Data,
"Squirrel": Programming,
"Stan": Programming,
"Standard ML": Programming,
"Stata": Programming,
"Stylus": Markup,
"SubRip Text": Data,
"Sublime Text Config": Data,
"SuperCollider": Programming,
"Swift": Programming,
"SystemVerilog": Programming,
"TI Program": Programming,
"TLA": Programming,
"TOML": Data,
"TXL": Programming,
"Tcl": Programming,
"Tcsh": Programming,
"TeX": Markup,
"Tea": Markup,
"Terra": Programming,
"Text": Prose,
"Textile": Prose,
"Thrift": Programming,
"Turing": Programming,
"Turtle": Data,
"Twig": Markup,
"Type Language": Data,
"TypeScript": Programming,
"Unified Parallel C": Programming,
"Unity3D Asset": Data,
"Unix Assembly": Programming,
"Uno": Programming,
"UnrealScript": Programming,
"UrWeb": Programming,
"VCL": Programming,
"VHDL": Programming,
"Vala": Programming,
"Verilog": Programming,
"Vim script": Programming,
"Visual Basic": Programming,
"Volt": Programming,
"Vue": Markup,
"Wavefront Material": Data,
"Wavefront Object": Data,
"Web Ontology Language": Markup,
"WebIDL": Programming,
"World of Warcraft Addon Data": Data,
"X10": Programming,
"XC": Programming,
"XCompose": Data,
"XML": Data,
"XPages": Programming,
"XProc": Programming,
"XQuery": Programming,
"XS": Programming,
"XSLT": Programming,
"Xojo": Programming,
"Xtend": Programming,
"YAML": Data,
"YANG": Data,
"Yacc": Programming,
"Zephir": Programming,
"Zimpl": Programming,
"desktop": Data,
"eC": Programming,
"edn": Data,
"fish": Programming,
"mupad": Programming,
"nesC": Programming,
"ooc": Programming,
"reStructuredText": Prose,
"wisp": Programming,
"xBase": Programming,
}

View File

@ -4,15 +4,32 @@ import (
"bytes"
"path/filepath"
"strings"
"gopkg.in/toqueteos/substring.v1"
)
var (
auxiliaryLanguages = map[string]bool{
"Other": true, "XML": true, "YAML": true, "TOML": true, "INI": true,
"JSON": true, "TeX": true, "Public Key": true, "AsciiDoc": true,
"AGS Script": true, "VimL": true, "Diff": true, "CMake": true, "fish": true,
"Awk": true, "Graphviz (DOT)": true, "Markdown": true, "desktop": true,
"XSLT": true, "SQL": true, "RMarkdown": true, "IRC log": true,
"reStructuredText": true, "Twig": true, "CSS": true, "Batchfile": true,
"Text": true, "HTML+ERB": true, "HTML": true, "Gettext Catalog": true,
"Smarty": true, "Raw token data": true,
}
configurationLanguages = map[string]bool{
"XML": true, "JSON": true, "TOML": true, "YAML": true, "INI": true, "SQL": true,
}
)
// IsAuxiliaryLanguage returns whether or not lang is an auxiliary language.
func IsAuxiliaryLanguage(lang string) bool {
_, ok := auxiliaryLanguages[lang]
return ok
}
// IsConfiguration returns whether or not path is using a configuration language.
func IsConfiguration(path string) bool {
lang, _ := GetLanguageByExtension(path)
_, is := configurationLanguages[lang]
@ -20,20 +37,19 @@ func IsConfiguration(path string) bool {
return is
}
// IsDotFile returns whether or not path has dot as a prefix.
func IsDotFile(path string) bool {
return strings.HasPrefix(filepath.Base(path), ".")
}
// IsVendor returns whether or not path is a vendor path.
func IsVendor(path string) bool {
return findIndex(path, vendorMatchers) >= 0
return vendorMatchers.Match(path)
}
// IsDocumentation returns whether or not path is a documentation path.
func IsDocumentation(path string) bool {
return findIndex(path, documentationMatchers) >= 0
}
func findIndex(path string, matchers substring.StringsMatcher) int {
return matchers.MatchIndex(path)
return documentationMatchers.Match(path)
}
const sniffLen = 8000
@ -51,7 +67,3 @@ func IsBinary(data []byte) bool {
return true
}
var configurationLanguages = map[string]bool{
"XML": true, "JSON": true, "TOML": true, "YAML": true, "INI": true, "SQL": true,
}

View File

@ -2,42 +2,83 @@ package slinguist
import (
"bytes"
"fmt"
"testing"
. "gopkg.in/check.v1"
"github.com/stretchr/testify/assert"
)
func (s *TSuite) TestIsVendor(c *C) {
c.Assert(IsVendor("foo/bar"), Equals, false)
c.Assert(IsVendor("foo/vendor/foo"), Equals, true)
c.Assert(IsVendor(".sublime-project"), Equals, true)
c.Assert(IsVendor("leaflet.draw-src.js"), Equals, true)
c.Assert(IsVendor("foo/bar/MochiKit.js"), Equals, true)
c.Assert(IsVendor("foo/bar/dojo.js"), Equals, true)
c.Assert(IsVendor("foo/env/whatever"), Equals, true)
c.Assert(IsVendor("foo/.imageset/bar"), Equals, true)
c.Assert(IsVendor("Vagrantfile"), Equals, true)
func (s *SimpleLinguistTestSuite) TestIsVendor() {
tests := []struct {
name string
path string
expected bool
}{
{name: "TestIsVendor_1", path: "foo/bar", expected: false},
{name: "TestIsVendor_2", path: "foo/vendor/foo", expected: true},
{name: "TestIsVendor_3", path: ".sublime-project", expected: true},
{name: "TestIsVendor_4", path: "leaflet.draw-src.js", expected: true},
{name: "TestIsVendor_5", path: "foo/bar/MochiKit.js", expected: true},
{name: "TestIsVendor_6", path: "foo/bar/dojo.js", expected: true},
{name: "TestIsVendor_7", path: "foo/env/whatever", expected: true},
{name: "TestIsVendor_8", path: "foo/.imageset/bar", expected: true},
{name: "TestIsVendor_9", path: "Vagrantfile", expected: true},
}
func (s *TSuite) TestIsDocumentation(c *C) {
c.Assert(IsDocumentation("foo"), Equals, false)
c.Assert(IsDocumentation("README"), Equals, true)
for _, test := range tests {
is := IsVendor(test.path)
assert.Equal(s.T(), is, test.expected, fmt.Sprintf("%v: is = %v, expected: %v", test.name, is, test.expected))
}
}
func (s *TSuite) TestIsConfiguration(c *C) {
c.Assert(IsConfiguration("foo"), Equals, false)
c.Assert(IsConfiguration("foo.ini"), Equals, true)
c.Assert(IsConfiguration("foo.json"), Equals, true)
func (s *SimpleLinguistTestSuite) TestIsDocumentation() {
tests := []struct {
name string
path string
expected bool
}{
{name: "TestIsDocumentation_1", path: "foo", expected: false},
{name: "TestIsDocumentation_2", path: "README", expected: true},
}
func (s *TSuite) TestIsBinary(c *C) {
c.Assert(IsBinary([]byte("foo")), Equals, false)
for _, test := range tests {
is := IsDocumentation(test.path)
assert.Equal(s.T(), is, test.expected, fmt.Sprintf("%v: is = %v, expected: %v", test.name, is, test.expected))
}
}
binary := []byte{0}
c.Assert(IsBinary(binary), Equals, true)
func (s *SimpleLinguistTestSuite) TestIsConfiguration() {
tests := []struct {
name string
path string
expected bool
}{
{name: "TestIsConfiguration_1", path: "foo", expected: false},
{name: "TestIsConfiguration_2", path: "foo.ini", expected: true},
{name: "TestIsConfiguration_3", path: "foo.json", expected: true},
}
binary = bytes.Repeat([]byte{'o'}, 8000)
binary = append(binary, byte(0))
c.Assert(IsBinary(binary), Equals, false)
for _, test := range tests {
is := IsConfiguration(test.path)
assert.Equal(s.T(), is, test.expected, fmt.Sprintf("%v: is = %v, expected: %v", test.name, is, test.expected))
}
}
func (s *SimpleLinguistTestSuite) TestIsBinary() {
tests := []struct {
name string
data []byte
expected bool
}{
{name: "TestIsBinary_1", data: []byte("foo"), expected: false},
{name: "TestIsBinary_2", data: []byte{0}, expected: true},
{name: "TestIsBinary_3", data: bytes.Repeat([]byte{'o'}, 8000), expected: false},
}
for _, test := range tests {
is := IsBinary(test.data)
assert.Equal(s.T(), is, test.expected, fmt.Sprintf("%v: is = %v, expected: %v", test.name, is, test.expected))
}
}
const (
@ -45,14 +86,14 @@ const (
jsPath = "some/random/dir/file.js"
)
func (s *TSuite) BenchmarkVendor(c *C) {
for i := 0; i < c.N; i++ {
func BenchmarkVendor(b *testing.B) {
for i := 0; i < b.N; i++ {
_ = IsVendor(htmlPath)
}
}
func (s *TSuite) BenchmarkVendorJS(c *C) {
for i := 0; i < c.N; i++ {
func BenchmarkVendorJS(b *testing.B) {
for i := 0; i < b.N; i++ {
_ = IsVendor(jsPath)
}
}

View File

@ -2,7 +2,7 @@ package slinguist
// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/simple-linguist.v1/internal/code-generator
// THIS FILE SHOULD NOT BE EDITED BY HAND
// Extracted from github/linguist commit: 60f864a138650dd17fafc94814be9ee2d3aaef8c
// Extracted from github/linguist commit: b6460f8ed6b249281ada099ca28bd8f1230b8892
import "gopkg.in/toqueteos/substring.v1"
@ -41,20 +41,20 @@ var vendorMatchers = substring.Or(
substring.Regexp(`3rd[-_]?party/`),
substring.Regexp(`vendors?/`),
substring.Regexp(`extern(al)?/`),
substring.Regexp(`(^|/)[Vv]&#43;endor/`),
substring.Regexp(`(^|/)[Vv]+endor/`),
substring.Regexp(`^debian/`),
substring.Regexp(`run.n$`),
substring.Regexp(`bootstrap-datepicker/`),
substring.Regexp(`(^|/)jquery([^.]*)\.js$`),
substring.Regexp(`(^|/)jquery\-\d\.\d&#43;(\.\d&#43;)?\.js$`),
substring.Regexp(`(^|/)jquery\-ui(\-\d\.\d&#43;(\.\d&#43;)?)?(\.\w&#43;)?\.(js|css)$`),
substring.Regexp(`(^|/)jquery\-\d\.\d+(\.\d+)?\.js$`),
substring.Regexp(`(^|/)jquery\-ui(\-\d\.\d+(\.\d+)?)?(\.\w+)?\.(js|css)$`),
substring.Regexp(`(^|/)jquery\.(ui|effects)\.([^.]*)\.(js|css)$`),
substring.Regexp(`jquery.fn.gantt.js`),
substring.Regexp(`jquery.fancybox.(js|css)`),
substring.Regexp(`fuelux.js`),
substring.Regexp(`(^|/)jquery\.fileupload(-\w&#43;)?\.js$`),
substring.Regexp(`(^|/)slick\.\w&#43;.js$`),
substring.Regexp(`(^|/)Leaflet\.Coordinates-\d&#43;\.\d&#43;\.\d&#43;\.src\.js$`),
substring.Regexp(`(^|/)jquery\.fileupload(-\w+)?\.js$`),
substring.Regexp(`(^|/)slick\.\w+.js$`),
substring.Regexp(`(^|/)Leaflet\.Coordinates-\d+\.\d+\.\d+\.src\.js$`),
substring.Regexp(`leaflet.draw-src.js`),
substring.Regexp(`leaflet.draw.css`),
substring.Regexp(`Control.FullScreen.css`),
@ -68,7 +68,7 @@ var vendorMatchers = substring.Or(
substring.Regexp(`(^|/)controls\.js$`),
substring.Regexp(`(^|/)dragdrop\.js$`),
substring.Regexp(`(.*?)\.d\.ts$`),
substring.Regexp(`(^|/)mootools([^.]*)\d&#43;\.\d&#43;.\d&#43;([^.]*)\.js$`),
substring.Regexp(`(^|/)mootools([^.]*)\d+\.\d+.\d+([^.]*)\.js$`),
substring.Regexp(`(^|/)dojo\.js$`),
substring.Regexp(`(^|/)MochiKit\.js$`),
substring.Regexp(`(^|/)yahoo-([^.]*)\.js$`),
@ -80,16 +80,16 @@ var vendorMatchers = substring.Or(
substring.Regexp(`(^|/)fontello(.*?)\.css$`),
substring.Regexp(`(^|/)MathJax/`),
substring.Regexp(`(^|/)Chart\.js$`),
substring.Regexp(`(^|/)[Cc]ode[Mm]irror/(\d&#43;\.\d&#43;/)?(lib|mode|theme|addon|keymap|demo)`),
substring.Regexp(`(^|/)[Cc]ode[Mm]irror/(\d+\.\d+/)?(lib|mode|theme|addon|keymap|demo)`),
substring.Regexp(`(^|/)shBrush([^.]*)\.js$`),
substring.Regexp(`(^|/)shCore\.js$`),
substring.Regexp(`(^|/)shLegacy\.js$`),
substring.Regexp(`(^|/)angular([^.]*)\.js$`),
substring.Regexp(`(^|\/)d3(\.v\d&#43;)?([^.]*)\.js$`),
substring.Regexp(`(^|\/)d3(\.v\d+)?([^.]*)\.js$`),
substring.Regexp(`(^|/)react(-[^.]*)?\.js$`),
substring.Regexp(`(^|/)modernizr\-\d\.\d&#43;(\.\d&#43;)?\.js$`),
substring.Regexp(`(^|/)modernizr\.custom\.\d&#43;\.js$`),
substring.Regexp(`(^|/)knockout-(\d&#43;\.){3}(debug\.)?js$`),
substring.Regexp(`(^|/)modernizr\-\d\.\d+(\.\d+)?\.js$`),
substring.Regexp(`(^|/)modernizr\.custom\.\d+\.js$`),
substring.Regexp(`(^|/)knockout-(\d+\.){3}(debug\.)?js$`),
substring.Regexp(`(^|/)docs?/_?(build|themes?|templates?|static)/`),
substring.Regexp(`(^|/)admin_media/`),
substring.Regexp(`(^|/)env/`),
@ -117,7 +117,7 @@ var vendorMatchers = substring.Or(
substring.Regexp(`(^|/)jquery([^.]*)\.validate(\.unobtrusive)?\.js$`),
substring.Regexp(`(^|/)jquery([^.]*)\.unobtrusive\-ajax\.js$`),
substring.Regexp(`(^|/)[Mm]icrosoft([Mm]vc)?([Aa]jax|[Vv]alidation)(\.debug)?\.js$`),
substring.Regexp(`^[Pp]ackages\/.&#43;\.\d&#43;\/`),
substring.Regexp(`^[Pp]ackages\/.+\.\d+\/`),
substring.Regexp(`(^|/)extjs/.*?\.js$`),
substring.Regexp(`(^|/)extjs/.*?\.xml$`),
substring.Regexp(`(^|/)extjs/.*?\.txt$`),