diff --git a/common.go b/common.go index 6659911..a31f308 100644 --- a/common.go +++ b/common.go @@ -500,6 +500,16 @@ func GetLanguageExtensions(language string) []string { return data.ExtensionsByLanguage[language] } +// GetLanguageID returns the ID for the language. IDs are assigned by GitHub. +// The input must be the canonical language name. Aliases are not supported. +// +// NOTE: The zero value (0) is a valid language ID, so this API mimics the Go +// map API. Use the second return value to check if the language was found. +func GetLanguageID(language string) (int, bool) { + id, ok := data.IDByLanguage[language] + return id, ok +} + // Type represent language's type. Either data, programming, markup, prose, or unknown. type Type int diff --git a/common_test.go b/common_test.go index 27b324f..f88b274 100644 --- a/common_test.go +++ b/common_test.go @@ -576,3 +576,26 @@ func (s *EnryTestSuite) TestLinguistCorpus() { s.T().Logf("\t\ttotal files: %d, ok: %d, failed: %d, other: %d\n", total, ok, failed, other) } + +func (s *EnryTestSuite) TestGetLanguageID() { + tests := []struct { + name string + language string + expectedID int + found bool + }{ + {name: "TestGetLanguageID_1", language: "1C Enterprise", expectedID: 0, found: true}, + {name: "TestGetLanguageID_2", language: "BestLanguageEver", expectedID: 0, found: false}, + {name: "TestGetLanguageID_3", language: "C++", expectedID: 43, found: true}, + {name: "TestGetLanguageID_5", language: "Objective-C", expectedID: 257, found: true}, + {name: "TestGetLanguageID_6", language: "golang", expectedID: 0, found: false}, // Aliases are not supported + {name: "TestGetLanguageID_7", language: "Go", expectedID: 132, found: true}, + {name: "TestGetLanguageID_8", language: "Makefile", expectedID: 220, found: true}, + } + + for _, test := range tests { + id, found := GetLanguageID(test.language) + assert.Equal(s.T(), test.expectedID, id, fmt.Sprintf("%v: id = %v, expected: %v", test.name, id, test.expectedID)) + assert.Equal(s.T(), test.found, found, fmt.Sprintf("%v: found = %t, expected: %t", test.name, found, test.found)) + } +} diff --git a/data/id.go b/data/id.go new file mode 100644 index 0000000..3b623b3 --- /dev/null +++ b/data/id.go @@ -0,0 +1,585 @@ +// Code generated by github.com/go-enry/go-enry/v2/internal/code-generator DO NOT EDIT. +// Extracted from github/linguist commit: 6aed0bd564f55c4f2d987c2e6035eda7aea26375 + +package data + +var IDByLanguage = map[string]int{ + "1C Enterprise": 0, + "4D": 577529595, + "ABAP": 1, + "ABAP CDS": 452681853, + "ABNF": 429, + "AGS Script": 2, + "AL": 658971832, + "AMPL": 3, + "ANTLR": 4, + "API Blueprint": 5, + "APL": 6, + "ASL": 124996147, + "ASN.1": 7, + "ASP.NET": 564186416, + "ATS": 9, + "ActionScript": 10, + "Ada": 11, + "Adobe Font Metrics": 147198098, + "Agda": 12, + "Alloy": 13, + "Alpine Abuild": 14, + "Altium Designer": 187772328, + "AngelScript": 389477596, + "Ant Build System": 15, + "ApacheConf": 16, + "Apex": 17, + "Apollo Guidance Computer": 18, + "AppleScript": 19, + "Arc": 20, + "AsciiDoc": 22, + "AspectJ": 23, + "Assembly": 24, + "Asymptote": 591605007, + "Augeas": 25, + "AutoHotkey": 26, + "AutoIt": 27, + "Avro IDL": 785497837, + "Awk": 28, + "Ballerina": 720859680, + "Batchfile": 29, + "Beef": 545626333, + "Befunge": 30, + "BibTeX": 982188347, + "Bison": 31, + "BitBake": 32, + "Blade": 33, + "BlitzBasic": 34, + "BlitzMax": 35, + "Bluespec": 36, + "Boo": 37, + "Boogie": 955017407, + "Brainfuck": 38, + "Brightscript": 39, + "Browserslist": 153503348, + "C": 41, + "C#": 42, + "C++": 43, + "C-ObjDump": 44, + "C2hs Haskell": 45, + "CLIPS": 46, + "CMake": 47, + "COBOL": 48, + "COLLADA": 49, + "CSON": 424, + "CSS": 50, + "CSV": 51, + "CWeb": 657332628, + "Cabal Config": 677095381, + "Cap'n Proto": 52, + "CartoCSS": 53, + "Ceylon": 54, + "Chapel": 55, + "Charity": 56, + "ChucK": 57, + "Cirru": 58, + "Clarion": 59, + "Classic ASP": 8, + "Clean": 60, + "Click": 61, + "Clojure": 62, + "Closure Templates": 357046146, + "Cloud Firestore Security Rules": 407996372, + "CoNLL-U": 421026389, + "CodeQL": 424259634, + "CoffeeScript": 63, + "ColdFusion": 64, + "ColdFusion CFC": 65, + "Common Lisp": 66, + "Common Workflow Language": 988547172, + "Component Pascal": 67, + "Cool": 68, + "Coq": 69, + "Cpp-ObjDump": 70, + "Creole": 71, + "Crystal": 72, + "Csound": 73, + "Csound Document": 74, + "Csound Score": 75, + "Cuda": 77, + "Cycript": 78, + "Cython": 79, + "D": 80, + "D-ObjDump": 81, + "DIGITAL Command Language": 82, + "DM": 83, + "DNS Zone": 84, + "DTrace": 85, + "Dafny": 969323346, + "Darcs Patch": 86, + "Dart": 87, + "DataWeave": 974514097, + "Dhall": 793969321, + "Diff": 88, + "DirectX 3D File": 201049282, + "Dockerfile": 89, + "Dogescript": 90, + "Dylan": 91, + "E": 92, + "EBNF": 430, + "ECL": 93, + "ECLiPSe": 94, + "EJS": 95, + "EML": 529653389, + "EQ": 96, + "Eagle": 97, + "Easybuild": 342840477, + "Ecere Projects": 98, + "EditorConfig": 96139566, + "Edje Data Collection": 342840478, + "Eiffel": 99, + "Elixir": 100, + "Elm": 101, + "Emacs Lisp": 102, + "EmberScript": 103, + "Erlang": 104, + "F#": 105, + "F*": 336943375, + "FIGlet Font": 686129783, + "FLUX": 106, + "Factor": 108, + "Fancy": 109, + "Fantom": 110, + "Faust": 622529198, + "Filebench WML": 111, + "Filterscript": 112, + "Formatted": 113, + "Forth": 114, + "Fortran": 107, + "Fortran Free Form": 761352333, + "FreeMarker": 115, + "Frege": 116, + "Futhark": 97358117, + "G-code": 117, + "GAML": 290345951, + "GAMS": 118, + "GAP": 119, + "GCC Machine Description": 121, + "GDB": 122, + "GDScript": 123, + "GEDCOM": 459577965, + "GLSL": 124, + "GN": 302957008, + "Game Maker Language": 125, + "Genie": 792408528, + "Genshi": 126, + "Gentoo Ebuild": 127, + "Gentoo Eclass": 128, + "Gerber Image": 404627610, + "Gettext Catalog": 129, + "Gherkin": 76, + "Git Attributes": 956324166, + "Git Config": 807968997, + "Glyph": 130, + "Glyph Bitmap Distribution Format": 997665271, + "Gnuplot": 131, + "Go": 132, + "Golo": 133, + "Gosu": 134, + "Grace": 135, + "Gradle": 136, + "Grammatical Framework": 137, + "Graph Modeling Language": 138, + "GraphQL": 139, + "Graphviz (DOT)": 140, + "Groovy": 142, + "Groovy Server Pages": 143, + "HAProxy": 366607477, + "HCL": 144, + "HLSL": 145, + "HTML": 146, + "HTML+Django": 147, + "HTML+ECR": 148, + "HTML+EEX": 149, + "HTML+ERB": 150, + "HTML+PHP": 151, + "HTML+Razor": 479039817, + "HTTP": 152, + "HXML": 786683730, + "Hack": 153, + "Haml": 154, + "Handlebars": 155, + "Harbour": 156, + "Haskell": 157, + "Haxe": 158, + "HiveQL": 931814087, + "HolyC": 928121743, + "Hy": 159, + "HyPhy": 160, + "IDL": 161, + "IGOR Pro": 162, + "INI": 163, + "IRC log": 164, + "Idris": 165, + "Ignore List": 74444240, + "ImageJ Macro": 575143428, + "Inform 7": 166, + "Inno Setup": 167, + "Io": 168, + "Ioke": 169, + "Isabelle": 170, + "Isabelle ROOT": 171, + "J": 172, + "JFlex": 173, + "JSON": 174, + "JSON with Comments": 423, + "JSON5": 175, + "JSONLD": 176, + "JSONiq": 177, + "Jasmin": 180, + "Java": 181, + "Java Properties": 519377561, + "Java Server Pages": 182, + "JavaScript": 183, + "JavaScript+ERB": 914318960, + "Jison": 284531423, + "Jison Lex": 406395330, + "Jolie": 998078858, + "Jsonnet": 664885656, + "Julia": 184, + "Jupyter Notebook": 185, + "KRL": 186, + "Kaitai Struct": 818804755, + "KiCad Layout": 187, + "KiCad Legacy Layout": 140848857, + "KiCad Schematic": 622447435, + "Kit": 188, + "Kotlin": 189, + "LFE": 190, + "LLVM": 191, + "LOLCODE": 192, + "LSL": 193, + "LTspice Symbol": 1013566805, + "LabVIEW": 194, + "Lark": 758480799, + "Lasso": 195, + "Latte": 196, + "Lean": 197, + "Less": 198, + "Lex": 199, + "LilyPond": 200, + "Limbo": 201, + "Linker Script": 202, + "Linux Kernel Module": 203, + "Liquid": 204, + "Literate Agda": 205, + "Literate CoffeeScript": 206, + "Literate Haskell": 207, + "LiveScript": 208, + "Logos": 209, + "Logtalk": 210, + "LookML": 211, + "LoomScript": 212, + "Lua": 213, + "M": 214, + "M4": 215, + "M4Sugar": 216, + "MATLAB": 225, + "MAXScript": 217, + "MLIR": 448253929, + "MQL4": 426, + "MQL5": 427, + "MTML": 218, + "MUF": 219, + "Macaulay2": 34167825, + "Makefile": 220, + "Mako": 221, + "Markdown": 222, + "Marko": 932782397, + "Mask": 223, + "Mathematica": 224, + "Maven POM": 226, + "Max": 227, + "MediaWiki": 228, + "Mercury": 229, + "Meson": 799141244, + "Metal": 230, + "Microsoft Developer Studio Project": 800983837, + "MiniD": 231, + "Mirah": 232, + "Modelica": 233, + "Modula-2": 234, + "Modula-3": 564743864, + "Module Management System": 235, + "Monkey": 236, + "Moocode": 237, + "MoonScript": 238, + "Motorola 68K Assembly": 477582706, + "Muse": 474864066, + "Mustache": 638334590, + "Myghty": 239, + "NASL": 171666519, + "NCL": 240, + "NEON": 481192983, + "NL": 241, + "NPM Config": 685022663, + "NSIS": 242, + "NWScript": 731233819, + "Nearley": 521429430, + "Nemerle": 243, + "NetLinx": 244, + "NetLinx+ERB": 245, + "NetLogo": 246, + "NewLisp": 247, + "Nextflow": 506780613, + "Nginx": 248, + "Nim": 249, + "Ninja": 250, + "Nit": 251, + "Nix": 252, + "Nu": 253, + "NumPy": 254, + "Nunjucks": 461856962, + "OCaml": 255, + "ObjDump": 256, + "Object Data Instance Notation": 985227236, + "ObjectScript": 202735509, + "Objective-C": 257, + "Objective-C++": 258, + "Objective-J": 259, + "Odin": 889244082, + "Omgrofl": 260, + "Opa": 261, + "Opal": 262, + "Open Policy Agent": 840483232, + "OpenCL": 263, + "OpenEdge ABL": 264, + "OpenQASM": 153739399, + "OpenRC runscript": 265, + "OpenSCAD": 266, + "OpenStep Property List": 598917541, + "OpenType Feature File": 374317347, + "Org": 267, + "Ox": 268, + "Oxygene": 269, + "Oz": 270, + "P4": 348895984, + "PHP": 272, + "PLSQL": 273, + "PLpgSQL": 274, + "POV-Ray SDL": 275, + "Pan": 276, + "Papyrus": 277, + "Parrot": 278, + "Parrot Assembly": 279, + "Parrot Internal Representation": 280, + "Pascal": 281, + "Pawn": 271, + "Pep8": 840372442, + "Perl": 282, + "Pic": 425, + "Pickle": 284, + "PicoLisp": 285, + "PigLatin": 286, + "Pike": 287, + "PlantUML": 833504686, + "Pod": 288, + "Pod 6": 155357471, + "PogoScript": 289, + "Pony": 290, + "PostCSS": 262764437, + "PostScript": 291, + "PowerBuilder": 292, + "PowerShell": 293, + "Prisma": 499933428, + "Processing": 294, + "Proguard": 716513858, + "Prolog": 295, + "Propeller Spin": 296, + "Protocol Buffer": 297, + "Public Key": 298, + "Pug": 179, + "Puppet": 299, + "Pure Data": 300, + "PureBasic": 301, + "PureScript": 302, + "Python": 303, + "Python console": 428, + "Python traceback": 304, + "Q#": 697448245, + "QML": 305, + "QMake": 306, + "Qt Script": 558193693, + "Quake": 375265331, + "R": 307, + "RAML": 308, + "RDoc": 309, + "REALbasic": 310, + "REXX": 311, + "RMarkdown": 313, + "RPC": 1031374237, + "RPM Spec": 314, + "RUNOFF": 315, + "Racket": 316, + "Ragel": 317, + "Raku": 283, + "Rascal": 173616037, + "Raw token data": 318, + "ReScript": 501875647, + "Readline Config": 538732839, + "Reason": 869538413, + "Rebol": 319, + "Record Jar": 865765202, + "Red": 320, + "Redcode": 321, + "Regular Expression": 363378884, + "Ren'Py": 322, + "RenderScript": 323, + "Rich Text Format": 51601661, + "Ring": 431, + "Riot": 878396783, + "RobotFramework": 324, + "Roff": 141, + "Roff Manpage": 612669833, + "Rouge": 325, + "Ruby": 326, + "Rust": 327, + "SAS": 328, + "SCSS": 329, + "SMT": 330, + "SPARQL": 331, + "SQF": 332, + "SQL": 333, + "SQLPL": 334, + "SRecode Template": 335, + "SSH Config": 554920715, + "STON": 336, + "SVG": 337, + "SWIG": 1066250075, + "Sage": 338, + "SaltStack": 339, + "Sass": 340, + "Scala": 341, + "Scaml": 342, + "Scheme": 343, + "Scilab": 344, + "Self": 345, + "ShaderLab": 664257356, + "Shell": 346, + "ShellSession": 347, + "Shen": 348, + "Sieve": 208976687, + "Slash": 349, + "Slice": 894641667, + "Slim": 350, + "SmPL": 164123055, + "Smali": 351, + "Smalltalk": 352, + "Smarty": 353, + "Solidity": 237469032, + "SourcePawn": 354, + "Spline Font Database": 767169629, + "Squirrel": 355, + "Stan": 356, + "Standard ML": 357, + "Starlark": 960266174, + "Stata": 358, + "Stylus": 359, + "SubRip Text": 360, + "SugarSS": 826404698, + "SuperCollider": 361, + "Svelte": 928734530, + "Swift": 362, + "SystemVerilog": 363, + "TI Program": 422, + "TLA": 364, + "TOML": 365, + "TSQL": 918334941, + "TSV": 1035892117, + "TSX": 94901924, + "TXL": 366, + "Tcl": 367, + "Tcsh": 368, + "TeX": 369, + "Tea": 370, + "Terra": 371, + "Texinfo": 988020015, + "Text": 372, + "Textile": 373, + "Thrift": 374, + "Turing": 375, + "Turtle": 376, + "Twig": 377, + "Type Language": 632765617, + "TypeScript": 378, + "Unified Parallel C": 379, + "Unity3D Asset": 380, + "Unix Assembly": 120, + "Uno": 381, + "UnrealScript": 382, + "UrWeb": 383, + "V": 603371597, + "VBA": 399230729, + "VBScript": 408016005, + "VCL": 384, + "VHDL": 385, + "Vala": 386, + "Verilog": 387, + "Vim Help File": 508563686, + "Vim Snippet": 81265970, + "Vim script": 388, + "Visual Basic .NET": 389, + "Volt": 390, + "Vue": 391, + "Wavefront Material": 392, + "Wavefront Object": 393, + "Web Ontology Language": 394, + "WebAssembly": 956556503, + "WebIDL": 395, + "WebVTT": 658679714, + "Wget Config": 668457123, + "Windows Registry Entries": 969674868, + "Wollok": 632745969, + "World of Warcraft Addon Data": 396, + "X BitMap": 782911107, + "X Font Directory Index": 208700028, + "X PixMap": 781846279, + "X10": 397, + "XC": 398, + "XCompose": 225167241, + "XML": 399, + "XML Property List": 75622871, + "XPages": 400, + "XProc": 401, + "XQuery": 402, + "XS": 403, + "XSLT": 404, + "Xojo": 405, + "Xtend": 406, + "YAML": 407, + "YANG": 408, + "YARA": 805122868, + "YASnippet": 378760102, + "Yacc": 409, + "ZAP": 952972794, + "ZIL": 973483626, + "Zeek": 40, + "ZenScript": 494938890, + "Zephir": 410, + "Zig": 646424281, + "Zimpl": 411, + "cURL Config": 992375436, + "desktop": 412, + "dircolors": 691605112, + "eC": 413, + "edn": 414, + "fish": 415, + "mIRC Script": 517654727, + "mcfunction": 462488745, + "mupad": 416, + "nanorc": 775996197, + "nesC": 417, + "ooc": 418, + "q": 970539067, + "reStructuredText": 419, + "sed": 847830017, + "wdl": 374521672, + "wisp": 420, + "xBase": 421, +} diff --git a/internal/code-generator/assets/id.go.tmpl b/internal/code-generator/assets/id.go.tmpl new file mode 100644 index 0000000..ef639a8 --- /dev/null +++ b/internal/code-generator/assets/id.go.tmpl @@ -0,0 +1,7 @@ +package data + +var IDByLanguage = map[string]int{ + {{range $language, $id := . -}} + "{{$language}}": {{$id -}}, + {{end -}} +} diff --git a/internal/code-generator/generator/id.go b/internal/code-generator/generator/id.go new file mode 100644 index 0000000..274af9e --- /dev/null +++ b/internal/code-generator/generator/id.go @@ -0,0 +1,48 @@ +package generator + +import ( + "bytes" + "io" + "io/ioutil" + + yaml "gopkg.in/yaml.v2" +) + +// ID generates a map in Go with language name -> language ID. +// It is of generator.File type. +func ID(fileToParse, samplesDir, outPath, tmplPath, tmplName, commit string) error { + data, err := ioutil.ReadFile(fileToParse) + if err != nil { + return err + } + + languages := make(map[string]*languageInfo) + if err := yaml.Unmarshal(data, &languages); err != nil { + return err + } + + langMimeMap := buildLanguageIDMap(languages) + + buf := &bytes.Buffer{} + if err := executeIDTemplate(buf, langMimeMap, tmplPath, tmplName, commit); err != nil { + return err + } + + return formatedWrite(outPath, buf.Bytes()) +} + +func buildLanguageIDMap(languages map[string]*languageInfo) map[string]int { + langIDMap := make(map[string]int) + for lang, info := range languages { + // NOTE: 0 is a valid language ID so checking the zero value would skip one language + if info.LanguageID != nil { + langIDMap[lang] = *info.LanguageID + } + } + + return langIDMap +} + +func executeIDTemplate(out io.Writer, langIDMap map[string]int, tmplPath, tmplName, commit string) error { + return executeTemplate(out, tmplName, tmplPath, commit, nil, langIDMap) +} diff --git a/internal/code-generator/generator/langinfo.go b/internal/code-generator/generator/langinfo.go index 7d85797..46d23f4 100644 --- a/internal/code-generator/generator/langinfo.go +++ b/internal/code-generator/generator/langinfo.go @@ -11,6 +11,7 @@ type languageInfo struct { Interpreters []string `yaml:"interpreters,omitempty,flow"` Filenames []string `yaml:"filenames,omitempty,flow"` MimeType string `yaml:"codemirror_mime_type,omitempty,flow"` + LanguageID *int `yaml:"language_id,omitempty"` } func getAlphabeticalOrderedKeys(languages map[string]*languageInfo) []string { diff --git a/internal/code-generator/main.go b/internal/code-generator/main.go index 51fc224..9fd41f1 100644 --- a/internal/code-generator/main.go +++ b/internal/code-generator/main.go @@ -85,6 +85,11 @@ var ( groupsTmplPath = filepath.Join(assetsDir, "groups.go.tmpl") groupsTmpl = "groups.go.tmpl" + // id.go generation + idFile = "data/id.go" + idTmplPath = "internal/code-generator/assets/id.go.tmpl" + idTmpl = "id.go.tmpl" + commitPath = filepath.Join(".linguist", ".git", "HEAD") ) @@ -118,6 +123,7 @@ func main() { {generator.MimeType, languagesYAML, "", mimeTypeFile, mimeTypeTmplPath, mimeTypeTmpl, commit}, {generator.Colors, languagesYAML, "", colorsFile, colorsTmplPath, colorsTmpl, commit}, {generator.Groups, languagesYAML, "", groupsFile, groupsTmplPath, groupsTmpl, commit}, + {generator.ID, languagesYAML, "", idFile, idTmplPath, idTmpl, commit}, } for _, file := range fileList {