diff --git a/.gitignore b/.gitignore index b9838e6..395e764 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,4 @@ pygments/ shard.lock .vscode/ .crystal/ +venv/ diff --git a/TODO.md b/TODO.md index 51cf8e9..71e22bf 100644 --- a/TODO.md +++ b/TODO.md @@ -9,4 +9,4 @@ * ✅ Implement lexer loader by file extension * ✅ Add --line-numbers to terminal formatter * Implement lexer loader by mime type -* Implement Delegating lexers \ No newline at end of file +* ✅ Implement Delegating lexers diff --git a/lexers/LiquidLexer.xml b/lexers/LiquidLexer.xml new file mode 100644 index 0000000..d4143ce --- /dev/null +++ b/lexers/LiquidLexer.xml @@ -0,0 +1,130 @@ + + + + liquid + liquid + *.liquid + + + + + + + + + + + + + + + + None + None + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + None + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/lexers/TwigLexer.xml b/lexers/TwigLexer.xml new file mode 100644 index 0000000..3413120 --- /dev/null +++ b/lexers/TwigLexer.xml @@ -0,0 +1,47 @@ + + + + Twig + twig + application/x-twig + true + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/lexers/VelocityLexer.xml b/lexers/VelocityLexer.xml new file mode 100644 index 0000000..c22d17e --- /dev/null +++ b/lexers/VelocityLexer.xml @@ -0,0 +1,55 @@ + + + + Velocity + velocity + *.vm + *.fhtml + true + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/lexers/bbcode.xml b/lexers/bbcode.xml new file mode 100644 index 0000000..d72d1a5 --- /dev/null +++ b/lexers/bbcode.xml @@ -0,0 +1,22 @@ + + + + BBCode + bbcode + text/x-bbcode + + + + + + + + + + + + + + + + diff --git a/lexers/markdown.xml b/lexers/markdown.xml new file mode 100644 index 0000000..e415ebf --- /dev/null +++ b/lexers/markdown.xml @@ -0,0 +1,56 @@ + + + + Markdown + markdown + md + *.md + *.markdown + text/x-markdown + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/lexers/moinwiki.xml b/lexers/moinwiki.xml new file mode 100644 index 0000000..9a5d660 --- /dev/null +++ b/lexers/moinwiki.xml @@ -0,0 +1,34 @@ + + + + MoinMoin/Trac Wiki markup + trac-wiki + moin + text/x-trac-wiki + true + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/scripts/lexer_metadata.py b/scripts/lexer_metadata.py index 58b5762..29d5f9d 100644 --- a/scripts/lexer_metadata.py +++ b/scripts/lexer_metadata.py @@ -40,15 +40,18 @@ for fname in glob.glob("lexers/*.xml"): with open("src/constants/lexers.cr", "w") as f: f.write("module Tartrazine\n") f.write(" LEXERS_BY_NAME = {\n") - for k, v in lexer_by_name.items(): + for k in sorted(lexer_by_name.keys()): + v = lexer_by_name[k] f.write(f'"{k}" => "{v}", \n') f.write("}\n") f.write(" LEXERS_BY_MIMETYPE = {\n") - for k, v in lexer_by_mimetype.items(): + for k in sorted(lexer_by_mimetype.keys()): + v = lexer_by_mimetype[k] f.write(f'"{k}" => "{v}", \n') f.write("}\n") f.write(" LEXERS_BY_FILENAME = {\n") - for k, v in lexer_by_filename.items(): + for k in sorted(lexer_by_filename.keys()): + v = lexer_by_filename[k] f.write(f'"{k}" => {str(list(v)).replace("'", "\"")}, \n') f.write("}\n") f.write("end\n") diff --git a/spec/tartrazine_spec.cr b/spec/tartrazine_spec.cr index 15011e9..a139af9 100644 --- a/spec/tartrazine_spec.cr +++ b/spec/tartrazine_spec.cr @@ -72,8 +72,7 @@ end # Helper that creates lexer and tokenizes def tokenize(lexer_name, text) - lexer = Tartrazine.lexer(lexer_name) - tokenizer = Tartrazine::Tokenizer.new(lexer, text) + tokenizer = Tartrazine.lexer(lexer_name).tokenizer(text) Tartrazine::Lexer.collapse_tokens(tokenizer.to_a) end diff --git a/src/actions.cr b/src/actions.cr index b626dd2..9ae536d 100644 --- a/src/actions.cr +++ b/src/actions.cr @@ -16,13 +16,16 @@ module Tartrazine Push Token Using + Usingbygroup Usingself end struct Action property actions : Array(Action) = [] of Action + @content_index : Int32 = 0 @depth : Int32 = 0 + @lexer_index : Int32 = 0 @lexer_name : String = "" @states : Array(String) = [] of String @states_to_push : Array(String) = [] of String @@ -62,6 +65,9 @@ module Tartrazine @states = xml.attributes.select { |attrib| attrib.name == "state" }.map &.content + when ActionType::Usingbygroup + @lexer_index = xml["lexer"].to_i + @content_index = xml["content"].to_i end end @@ -115,15 +121,13 @@ module Tartrazine when ActionType::Using # Shunt to another lexer entirely return [] of Token if match.empty? - Tokenizer.new( - Tartrazine.lexer(@lexer_name), + Tartrazine.lexer(@lexer_name).tokenizer( String.new(match[match_group].value), secondary: true).to_a when ActionType::Usingself # Shunt to another copy of this lexer return [] of Token if match.empty? - Tokenizer.new( - tokenizer.lexer, + tokenizer.lexer.tokenizer( String.new(match[match_group].value), secondary: true).to_a when ActionType::Combined @@ -136,6 +140,12 @@ module Tartrazine tokenizer.lexer.states[new_state.name] = new_state tokenizer.state_stack << new_state.name [] of Token + when ActionType::Usingbygroup + # Shunt to content-specified lexer + return [] of Token if match.empty? + Tartrazine.lexer(String.new(match[@lexer_index].value)).tokenizer( + String.new(match[@content_index].value), + secondary: true).to_a else raise Exception.new("Unknown action type: #{@type}") end diff --git a/src/constants/lexers.cr b/src/constants/lexers.cr index fb4b198..25a253a 100644 --- a/src/constants/lexers.cr +++ b/src/constants/lexers.cr @@ -1,1160 +1,1176 @@ module Tartrazine LEXERS_BY_NAME = { - "forth" => "forth", - "nginx configuration file" => "nginx_configuration_file", - "nginx" => "nginx_configuration_file", - "ocaml" => "ocaml", - "typoscriptcssdata" => "typoscriptcssdata", - "kotlin" => "kotlin", - "systemverilog" => "systemverilog", - "sv" => "systemverilog", - "mma" => "mathematica", - "mathematica" => "mathematica", - "nb" => "mathematica", + "1s" => "onesenterprise", + "1s:enterprise" => "onesenterprise", + "abap" => "abap", + "abl" => "openedge_abl", + "abnf" => "abnf", + "aconf" => "apacheconf", + "actionscript" => "actionscript", + "actionscript 3" => "actionscript_3", + "actionscript3" => "actionscript_3", + "ada" => "ada", + "ada2005" => "ada", + "ada95" => "ada", + "agda" => "agda", + "ahk" => "autohotkey", + "al" => "al", + "alloy" => "alloy", + "angular2" => "angular2", + "antlr" => "antlr", + "apache" => "apacheconf", + "apacheconf" => "apacheconf", + "apl" => "apl", + "applescript" => "applescript", + "aql" => "arangodb_aql", + "arangodb aql" => "arangodb_aql", + "arduino" => "arduino", + "arexx" => "rexx", + "armasm" => "armasm", + "as" => "actionscript", + "as3" => "actionscript_3", + "asm" => "gas", + "autohotkey" => "autohotkey", + "autoit" => "autoit", + "awk" => "awk", + "b3d" => "blitzbasic", + "ballerina" => "ballerina", + "bash" => "bash", + "bash session" => "bash_session", + "bash-session" => "bash_session", + "basic" => "qbasic", + "bat" => "batchfile", + "batch" => "batchfile", + "batchfile" => "batchfile", + "bbcode" => "bbcode", + "bf" => "brainfuck", + "bib" => "bibtex", + "bibtex" => "bibtex", + "bicep" => "bicep", + "bind" => "dns", + "blitzbasic" => "blitzbasic", + "bnf" => "bnf", + "bplus" => "blitzbasic", "bqn" => "bqn", - "v" => "verilog", - "vlang" => "v", - "rego" => "rego", - "matlab" => "matlab", - "tf" => "terraform", - "terraform" => "terraform", + "brainfuck" => "brainfuck", + "bsdmake" => "makefile", + "c" => "c", + "c#" => "c#", + "c++" => "c++", + "cap'n proto" => "cap_n_proto", + "capnp" => "cap_n_proto", + "cassandra" => "cassandra_cql", + "cassandra cql" => "cassandra_cql", + "cassette" => "vhs", + "ceylon" => "ceylon", + "cf3" => "cfengine3", + "cfengine3" => "cfengine3", + "cfg" => "ini", + "cfs" => "cfstatement", + "cfstatement" => "cfstatement", + "chai" => "chaiscript", + "chaiscript" => "chaiscript", + "chapel" => "chapel", + "cheetah" => "cheetah", + "chpl" => "chapel", + "cl" => "common_lisp", + "clj" => "clojure", + "clojure" => "clojure", + "cmake" => "cmake", + "cobol" => "cobol", + "coffee" => "coffeescript", + "coffee-script" => "coffeescript", + "coffeescript" => "coffeescript", + "common lisp" => "common_lisp", + "common-lisp" => "common_lisp", + "console" => "bash_session", + "coq" => "coq", + "cpp" => "c++", + "cql" => "cassandra_cql", + "cr" => "crystal", + "crystal" => "crystal", + "csh" => "tcsh", + "csharp" => "c#", + "css" => "css", + "cucumber" => "gherkin", + "cue" => "cue", + "cython" => "cython", + "d" => "d", + "dart" => "dart", + "dax" => "dax", "desktop" => "desktop_entry", "desktop file" => "desktop_entry", "desktop_entry" => "desktop_entry", - "bicep" => "bicep", - "python3" => "python", - "py" => "python", - "sage" => "python", - "py3" => "python", - "python" => "python", - "morrowind" => "morrowindscript", - "mwscript" => "morrowindscript", - "morrowindscript" => "morrowindscript", - "registry" => "reg", - "reg" => "reg", - "tv" => "tradingview", - "tradingview" => "tradingview", - "bplus" => "blitzbasic", - "b3d" => "blitzbasic", - "blitzbasic" => "blitzbasic", - "sas" => "sas", - "armasm" => "armasm", - "systemd" => "systemd", - "glsl" => "glsl", - "uxntal" => "tal", - "tal" => "tal", - "scheme" => "scheme", - "scm" => "scheme", - "json" => "json", - "prolog" => "prolog", - "prql" => "prql", - "ones" => "onesenterprise", - "1s:enterprise" => "onesenterprise", - "1s" => "onesenterprise", - "onesenterprise" => "onesenterprise", - "coffeescript" => "coffeescript", - "coffee-script" => "coffeescript", - "coffee" => "coffeescript", - "llvm" => "llvm", - "hlsl" => "hlsl", - "fishshell" => "fish", + "diff" => "diff", + "django" => "django_jinja", + "django/jinja" => "django_jinja", + "dns" => "dns", + "docker" => "docker", + "dockerfile" => "docker", + "dosbatch" => "batchfile", + "dosini" => "ini", + "dtd" => "dtd", + "duby" => "ruby", + "dylan" => "dylan", + "ebnf" => "ebnf", + "edn" => "clojure", + "elisp" => "emacslisp", + "elixir" => "elixir", + "elm" => "elm", + "emacs" => "emacslisp", + "emacs-lisp" => "emacslisp", + "emacslisp" => "emacslisp", + "erlang" => "erlang", + "ex" => "elixir", + "exs" => "elixir", + "f90" => "fortran", + "factor" => "factor", + "fennel" => "fennel", "fish" => "fish", - "newspeak" => "newspeak", + "fishshell" => "fish", + "fnl" => "fennel", + "forth" => "forth", + "fortran" => "fortran", + "fortranfixed" => "fortranfixed", + "fsharp" => "fsharp", + "gas" => "gas", + "gawk" => "awk", + "gd" => "gdscript", + "gd3" => "gdscript3", + "gdscript" => "gdscript", + "gdscript3" => "gdscript3", + "gherkin" => "gherkin", + "gleam" => "gleam", + "glsl" => "glsl", "gnuplot" => "gnuplot", - "octave" => "octave", - "nimrod" => "nim", - "nim" => "nim", - "whiley" => "whiley", - "iscdhcpd" => "iscdhcpd", - "cython" => "cython", - "pyx" => "cython", - "pyrex" => "cython", - "lighttpd configuration file" => "lighttpd_configuration_file", - "lighttpd" => "lighttpd_configuration_file", - "lighty" => "lighttpd_configuration_file", - "ndisasm" => "ndisasm", - "lua" => "lua", - "meson" => "meson", - "meson.build" => "meson", - "crystal" => "crystal", - "cr" => "crystal", - "qml" => "qml", - "qbs" => "qml", - "tcl" => "tcl", - "xml" => "xml", - "vbnet" => "vb_net", - "vb.net" => "vb_net", - "alloy" => "alloy", - "vhdl" => "vhdl", - "hbs" => "handlebars", - "handlebars" => "handlebars", - "thrift" => "thrift", - "coq" => "coq", - "turing" => "turing", + "go template" => "go_template", + "go-template" => "go_template", "gql" => "graphql", "graphql" => "graphql", "graphqls" => "graphql", - "fortranfixed" => "fortranfixed", - "termcap" => "termcap", - "mako" => "mako", - "dax" => "dax", - "ksh" => "bash", - "sh" => "bash", - "shell" => "bash", - "bash" => "bash", - "zsh" => "bash", - "actionscript" => "actionscript", - "as" => "actionscript", - "c" => "c", - "vshell" => "v_shell", - "vsh" => "v_shell", - "v shell" => "v_shell", - "monkeyc" => "monkeyc", - "spitfire" => "cheetah", - "cheetah" => "cheetah", - "powerquery" => "powerquery", - "pq" => "powerquery", - "mariadb" => "mysql", - "mysql" => "mysql", - "psl" => "psl", - "nasm" => "nasm", - "ts" => "typescript", - "typescript" => "typescript", - "tsx" => "typescript", - "mlir" => "mlir", - "fennel" => "fennel", - "fnl" => "fennel", - "python 2" => "python_2", - "python2" => "python_2", - "py2" => "python_2", - "sourcepawn" => "sourcepawn", - "sp" => "sourcepawn", - "makefile" => "makefile", - "bsdmake" => "makefile", - "mf" => "makefile", - "make" => "makefile", + "groff" => "groff", + "groovy" => "groovy", + "gsed" => "sed", + "handlebars" => "handlebars", "hare" => "hare", - "ahk" => "autohotkey", - "autohotkey" => "autohotkey", - "snobol" => "snobol", - "mcfunction" => "mcfunction", - "promela" => "promela", - "cf3" => "cfengine3", - "cfengine3" => "cfengine3", - "pony" => "pony", - "gleam" => "gleam", - "arexx" => "rexx", - "rexx" => "rexx", - "openedge" => "openedge_abl", - "abl" => "openedge_abl", - "openedgeabl" => "openedge_abl", - "openedge abl" => "openedge_abl", - "progress" => "openedge_abl", - "j" => "j", - "elm" => "elm", - "d" => "d", - "django/jinja" => "django_jinja", - "django" => "django_jinja", - "jinja" => "django_jinja", - "autoit" => "autoit", - "plain" => "plaintext", - "no-highlight" => "plaintext", - "text" => "plaintext", - "plaintext" => "plaintext", - "bat" => "batchfile", - "batchfile" => "batchfile", - "batch" => "batchfile", - "dosbatch" => "batchfile", - "winbatch" => "batchfile", - "cobol" => "cobol", - "pl/pgsql" => "pl_pgsql", - "plpgsql" => "pl_pgsql", - "edn" => "clojure", - "clj" => "clojure", - "clojure" => "clojure", - "webgpu shading language" => "webgpu_shading_language", - "wgsl" => "webgpu_shading_language", - "ceylon" => "ceylon", - "cmake" => "cmake", + "haskell" => "haskell", + "hbs" => "handlebars", + "hcl" => "hcl", + "hexdump" => "hexdump", + "hlb" => "hlb", + "hlsl" => "hlsl", + "holyc" => "holyc", + "hs" => "haskell", + "html" => "html", + "hy" => "hy", + "hylang" => "hy", + "idr" => "idris", + "idris" => "idris", + "igor" => "igor", + "igorpro" => "igor", + "ini" => "ini", "io" => "io", - "apl" => "apl", - "erlang" => "erlang", - "modula2" => "modula-2", + "iscdhcpd" => "iscdhcpd", + "j" => "j", + "java" => "java", + "java-properties" => "properties", + "javascript" => "javascript", + "jinja" => "django_jinja", + "jl" => "julia", + "js" => "javascript", + "json" => "json", + "jsonata" => "jsonata", + "jsx" => "react", + "julia" => "julia", + "jungle" => "jungle", + "kotlin" => "kotlin", + "ksh" => "bash", + "latex" => "tex", + "lighttpd" => "lighttpd_configuration_file", + "lighttpd configuration file" => "lighttpd_configuration_file", + "lighty" => "lighttpd_configuration_file", + "liquid" => "LiquidLexer", + "lisp" => "common_lisp", + "llvm" => "llvm", + "lua" => "lua", "m2" => "modula-2", + "make" => "makefile", + "makefile" => "makefile", + "mako" => "mako", + "man" => "groff", + "mariadb" => "mysql", + "markdown" => "markdown", + "mason" => "mason", + "materialize" => "materialize_sql_dialect", + "materialize sql dialect" => "materialize_sql_dialect", + "mathematica" => "mathematica", + "matlab" => "matlab", + "mawk" => "awk", + "mcfunction" => "mcfunction", + "md" => "markdown", + "meson" => "meson", + "meson.build" => "meson", + "metal" => "metal", + "mf" => "makefile", + "minizinc" => "minizinc", + "mlir" => "mlir", + "mma" => "mathematica", "modula-2" => "modula-2", - "asm" => "gas", - "gas" => "gas", + "modula2" => "modula-2", + "moin" => "moinwiki", + "moinmoin/trac wiki markup" => "moinwiki", + "monkeyc" => "monkeyc", + "morrowind" => "morrowindscript", + "morrowindscript" => "morrowindscript", + "mwscript" => "morrowindscript", + "myghty" => "myghty", + "mysql" => "mysql", + "mzn" => "minizinc", + "mzsql" => "materialize_sql_dialect", + "nasm" => "nasm", + "natural" => "natural", + "nawk" => "awk", + "nb" => "mathematica", + "ndisasm" => "ndisasm", + "newspeak" => "newspeak", + "ng2" => "angular2", + "nginx" => "nginx_configuration_file", + "nginx configuration file" => "nginx_configuration_file", + "nim" => "nim", + "nimrod" => "nim", + "nix" => "nix", + "nixos" => "nix", + "no-highlight" => "plaintext", + "nroff" => "groff", + "obj-c" => "objective-c", + "objc" => "objective-c", + "objective-c" => "objective-c", + "objectivec" => "objective-c", + "objectpascal" => "objectpascal", + "ocaml" => "ocaml", + "octave" => "octave", + "odin" => "odin", + "ones" => "onesenterprise", + "onesenterprise" => "onesenterprise", + "openedge" => "openedge_abl", + "openedge abl" => "openedge_abl", + "openedgeabl" => "openedge_abl", + "openscad" => "openscad", + "org" => "org_mode", + "org mode" => "org_mode", + "orgmode" => "org_mode", + "pacmanconf" => "pacmanconf", + "perl" => "perl", + "php" => "php", + "php3" => "php", + "php4" => "php", + "php5" => "php", + "pig" => "pig", + "pkgconfig" => "pkgconfig", + "pl" => "perl", + "pl/pgsql" => "pl_pgsql", + "plain" => "plaintext", + "plaintext" => "plaintext", + "plc" => "plutus_core", + "plpgsql" => "pl_pgsql", + "plutus core" => "plutus_core", + "plutus-core" => "plutus_core", + "pony" => "pony", + "posh" => "powershell", + "postgres" => "postgresql_sql_dialect", + "postgresql" => "postgresql_sql_dialect", + "postgresql sql dialect" => "postgresql_sql_dialect", + "postscr" => "postscript", + "postscript" => "postscript", + "pov" => "povray", + "povray" => "povray", + "powerquery" => "powerquery", + "powershell" => "powershell", + "pq" => "powerquery", + "progress" => "openedge_abl", + "prolog" => "prolog", + "promela" => "promela", + "promql" => "promql", + "properties" => "properties", "proto" => "protocol_buffer", "protobuf" => "protocol_buffer", "protocol buffer" => "protocol_buffer", - "mzsql" => "materialize_sql_dialect", - "materialize sql dialect" => "materialize_sql_dialect", - "materialize" => "materialize_sql_dialect", - "vue" => "vue", - "vuejs" => "vue", - "reasonml" => "reasonml", - "reason" => "reasonml", - "sql" => "sql", - "gsed" => "sed", - "ssed" => "sed", - "sed" => "sed", - "racket" => "racket", - "rkt" => "racket", - "sass" => "sass", - "hexdump" => "hexdump", - "fsharp" => "fsharp", - "plutus-core" => "plutus_core", - "plutus core" => "plutus_core", - "plc" => "plutus_core", - "react" => "react", - "jsx" => "react", - "zed" => "zed", - "groovy" => "groovy", - "zone" => "dns", - "bind" => "dns", - "dns" => "dns", - "nroff" => "groff", - "man" => "groff", - "groff" => "groff", - "ucode" => "ucode", - "dosini" => "ini", - "ini" => "ini", - "cfg" => "ini", - "typoscripthtmldata" => "typoscripthtmldata", - "org" => "org_mode", - "orgmode" => "org_mode", - "org mode" => "org_mode", - "stylus" => "stylus", - "yang" => "yang", - "dylan" => "dylan", - "bnf" => "bnf", - "agda" => "agda", - "cue" => "cue", - "cpp" => "c++", - "c++" => "c++", - "turtle" => "turtle", - "dtd" => "dtd", - "standard ml" => "standard_ml", - "sml" => "standard_ml", - "hcl" => "hcl", - "openscad" => "openscad", - "holyc" => "holyc", - "gdscript3" => "gdscript3", - "gd3" => "gdscript3", - "puppet" => "puppet", - "obj-c" => "objective-c", - "objective-c" => "objective-c", - "objc" => "objective-c", - "objectivec" => "objective-c", - "zig" => "zig", - "postscript" => "postscript", - "postscr" => "postscript", - "abnf" => "abnf", - "ragel" => "ragel", - "r" => "r", - "splus" => "r", - "s" => "r", - "pacmanconf" => "pacmanconf", - "hlb" => "hlb", - "bash session" => "bash_session", - "bash-session" => "bash_session", - "shell-session" => "bash_session", - "console" => "bash_session", - "smali" => "smali", - "idr" => "idris", - "idris" => "idris", - "sol" => "solidity", - "solidity" => "solidity", - "igor" => "igor", - "igorpro" => "igor", - "viml" => "viml", - "vim" => "viml", - "applescript" => "applescript", - "gherkin" => "gherkin", - "cucumber" => "gherkin", - "scss" => "scss", - "jsonata" => "jsonata", - "terminfo" => "terminfo", - "rb" => "ruby", - "ruby" => "ruby", - "duby" => "ruby", - "ballerina" => "ballerina", - "css" => "css", - "rpmspec" => "rpm_spec", - "spec" => "rpm_spec", - "elixir" => "elixir", - "exs" => "elixir", - "ex" => "elixir", - "al" => "al", - "typoscript" => "typoscript", - "stas" => "stas", - "apacheconf" => "apacheconf", - "aconf" => "apacheconf", - "apache" => "apacheconf", - "perl" => "perl", - "pl" => "perl", - "psm1" => "powershell", - "pwsh" => "powershell", - "powershell" => "powershell", - "posh" => "powershell", + "prql" => "prql", "ps1" => "powershell", "psd1" => "powershell", - "docker" => "docker", - "dockerfile" => "docker", + "psl" => "psl", + "psm1" => "powershell", + "puppet" => "puppet", + "pwsh" => "powershell", + "py" => "python", + "py2" => "python_2", + "py3" => "python", + "pyrex" => "cython", + "python" => "python", + "python 2" => "python_2", + "python2" => "python_2", + "python3" => "python", + "pyx" => "cython", + "qbasic" => "qbasic", + "qbs" => "qml", + "qml" => "qml", + "r" => "r", + "racket" => "racket", + "ragel" => "ragel", + "rb" => "ruby", + "react" => "react", + "reason" => "reasonml", + "reasonml" => "reasonml", + "reg" => "reg", + "registry" => "reg", + "rego" => "rego", + "rexx" => "rexx", + "rkt" => "racket", + "rpmspec" => "rpm_spec", + "rs" => "rust", + "ruby" => "ruby", + "rust" => "rust", + "s" => "r", + "sage" => "python", + "sas" => "sas", + "sass" => "sass", + "scala" => "scala", + "scheme" => "scheme", + "scilab" => "scilab", + "scm" => "scheme", + "scss" => "scss", + "sed" => "sed", + "sh" => "bash", + "shell" => "bash", + "shell-session" => "bash_session", + "sieve" => "sieve", + "smali" => "smali", "smalltalk" => "smalltalk", - "squeak" => "smalltalk", - "st" => "smalltalk", - "jl" => "julia", - "julia" => "julia", - "hs" => "haskell", - "haskell" => "haskell", - "awk" => "awk", - "nawk" => "awk", - "mawk" => "awk", - "gawk" => "awk", - "mason" => "mason", - "postgresql sql dialect" => "postgresql_sql_dialect", - "postgresql" => "postgresql_sql_dialect", - "postgres" => "postgresql_sql_dialect", - "ada2005" => "ada", - "ada" => "ada", - "ada95" => "ada", - "fortran" => "fortran", - "f90" => "fortran", + "smarty" => "smarty", + "sml" => "standard_ml", + "snobol" => "snobol", + "sol" => "solidity", + "solidity" => "solidity", + "sourcepawn" => "sourcepawn", + "sp" => "sourcepawn", "sparql" => "sparql", - "pig" => "pig", - "verilog" => "verilog", - "common-lisp" => "common_lisp", - "common lisp" => "common_lisp", - "cl" => "common_lisp", - "lisp" => "common_lisp", - "html" => "html", + "spec" => "rpm_spec", + "spitfire" => "cheetah", + "splus" => "r", + "sql" => "sql", + "squeak" => "smalltalk", + "squid" => "squidconf", "squid.conf" => "squidconf", "squidconf" => "squidconf", - "squid" => "squidconf", - "tasm" => "tasm", - "vhs" => "vhs", - "cassette" => "vhs", - "tape" => "vhs", - "minizinc" => "minizinc", - "mzn" => "minizinc", - "pov" => "povray", - "povray" => "povray", - "sieve" => "sieve", - "twig" => "twig", - "basic" => "qbasic", - "qbasic" => "qbasic", - "toml" => "toml", - "scilab" => "scilab", - "natural" => "natural", - "odin" => "odin", - "tablegen" => "tablegen", - "metal" => "metal", - "cfs" => "cfstatement", - "cfstatement" => "cfstatement", - "dart" => "dart", - "cql" => "cassandra_cql", - "cassandra cql" => "cassandra_cql", - "cassandra" => "cassandra_cql", - "properties" => "properties", - "java-properties" => "properties", - "chpl" => "chapel", - "chapel" => "chapel", - "chai" => "chaiscript", - "chaiscript" => "chaiscript", - "yaml" => "yaml", - "abap" => "abap", - "udiff" => "diff", - "diff" => "diff", - "brainfuck" => "brainfuck", - "bf" => "brainfuck", - "rust" => "rust", - "rs" => "rust", - "tex" => "tex", - "latex" => "tex", - "hylang" => "hy", - "hy" => "hy", - "ebnf" => "ebnf", - "gd" => "gdscript", - "gdscript" => "gdscript", - "java" => "java", - "xorg.conf" => "xorg", - "xorg" => "xorg", - "nixos" => "nix", - "nix" => "nix", - "myghty" => "myghty", - "c#" => "c#", - "csharp" => "c#", - "tsql" => "transact-sql", - "transact-sql" => "transact-sql", - "t-sql" => "transact-sql", - "z80 assembly" => "z80_assembly", - "z80" => "z80_assembly", - "pkgconfig" => "pkgconfig", - "ng2" => "angular2", - "angular2" => "angular2", - "php5" => "php", - "php4" => "php", - "php" => "php", - "php3" => "php", - "vapi" => "vala", - "vala" => "vala", - "factor" => "factor", - "js" => "javascript", - "javascript" => "javascript", - "arduino" => "arduino", - "bibtex" => "bibtex", - "bib" => "bibtex", - "cap'n proto" => "cap_n_proto", - "capnp" => "cap_n_proto", - "jungle" => "jungle", - "antlr" => "antlr", + "ssed" => "sed", + "st" => "smalltalk", + "standard ml" => "standard_ml", + "stas" => "stas", + "stylus" => "stylus", + "sv" => "systemverilog", "swift" => "swift", - "go-template" => "go_template", - "go template" => "go_template", - "promql" => "promql", - "wdte" => "wdte", - "objectpascal" => "objectpascal", - "emacs" => "emacslisp", - "emacslisp" => "emacslisp", - "emacs-lisp" => "emacslisp", - "elisp" => "emacslisp", - "smarty" => "smarty", - "actionscript 3" => "actionscript_3", - "actionscript3" => "actionscript_3", - "as3" => "actionscript_3", - "scala" => "scala", + "systemd" => "systemd", + "systemverilog" => "systemverilog", + "t-sql" => "transact-sql", + "tablegen" => "tablegen", + "tal" => "tal", + "tape" => "vhs", + "tasm" => "tasm", + "tcl" => "tcl", "tcsh" => "tcsh", - "csh" => "tcsh", - "aql" => "arangodb_aql", - "arangodb aql" => "arangodb_aql", + "termcap" => "termcap", + "terminfo" => "terminfo", + "terraform" => "terraform", + "tex" => "tex", + "text" => "plaintext", + "tf" => "terraform", + "thrift" => "thrift", + "toml" => "toml", + "trac-wiki" => "moinwiki", + "tradingview" => "tradingview", + "transact-sql" => "transact-sql", + "ts" => "typescript", + "tsql" => "transact-sql", + "tsx" => "typescript", + "turing" => "turing", + "turtle" => "turtle", + "tv" => "tradingview", + "twig" => "TwigLexer", + "typescript" => "typescript", + "typoscript" => "typoscript", + "typoscriptcssdata" => "typoscriptcssdata", + "typoscripthtmldata" => "typoscripthtmldata", + "ucode" => "ucode", + "udiff" => "diff", + "uxntal" => "tal", + "v" => "verilog", + "v shell" => "v_shell", + "vala" => "vala", + "vapi" => "vala", + "vb.net" => "vb_net", + "vbnet" => "vb_net", + "velocity" => "VelocityLexer", + "verilog" => "verilog", + "vhdl" => "vhdl", + "vhs" => "vhs", + "vim" => "viml", + "viml" => "viml", + "vlang" => "v", + "vsh" => "v_shell", + "vshell" => "v_shell", + "vue" => "vue", + "vuejs" => "vue", + "wdte" => "wdte", + "webgpu shading language" => "webgpu_shading_language", + "wgsl" => "webgpu_shading_language", + "whiley" => "whiley", + "winbatch" => "batchfile", + "xml" => "xml", + "xorg" => "xorg", + "xorg.conf" => "xorg", + "yaml" => "yaml", + "yang" => "yang", + "z80" => "z80_assembly", + "z80 assembly" => "z80_assembly", + "zed" => "zed", + "zig" => "zig", + "zone" => "dns", + "zsh" => "bash", } LEXERS_BY_MIMETYPE = { - "application/x-forth" => "forth", - "text/x-nginx-conf" => "nginx_configuration_file", - "text/x-ocaml" => "ocaml", - "text/x-kotlin" => "kotlin", - "text/x-systemverilog" => "systemverilog", - "application/vnd.wolfram.mathematica.package" => "mathematica", - "application/vnd.wolfram.mathematica" => "mathematica", - "application/mathematica" => "mathematica", - "application/vnd.wolfram.cdf" => "mathematica", - "text/x-v" => "v", - "text/matlab" => "matlab", - "application/x-terraform" => "terraform", - "application/x-tf" => "terraform", - "application/x-desktop" => "desktop_entry", - "application/x-python3" => "python", - "text/x-python" => "python", - "application/x-python" => "python", - "text/x-python3" => "python", - "text/x-windows-registry" => "reg", - "text/x-tradingview" => "tradingview", - "text/x-bb" => "blitzbasic", - "application/x-sas" => "sas", - "text/x-sas" => "sas", - "text/sas" => "sas", - "text/x-asm" => "armasm", - "text/x-armasm" => "armasm", - "text/plain" => "plaintext", - "text/x-glslsrc" => "glsl", - "text/x-uxntal" => "tal", - "text/x-scheme" => "scheme", - "application/x-scheme" => "scheme", - "application/json" => "json", - "text/x-prolog" => "prolog", - "application/prql" => "prql", - "application/octet-stream" => "onesenterprise", - "text/coffeescript" => "coffeescript", - "text/x-llvm" => "llvm", - "text/x-hlsl" => "hlsl", - "application/x-fish" => "fish", - "text/x-newspeak" => "newspeak", - "text/x-gnuplot" => "gnuplot", - "text/octave" => "octave", - "text/x-nim" => "nim", - "text/x-whiley" => "whiley", - "application/x-cython" => "cython", - "text/x-cython" => "cython", - "text/x-lighttpd-conf" => "lighttpd_configuration_file", - "text/x-disasm" => "ndisasm", - "text/x-lua" => "lua", - "application/x-lua" => "lua", - "text/x-meson" => "meson", - "text/x-crystal" => "crystal", - "application/x-qt.qbs+qml" => "qml", - "application/x-qml" => "qml", - "application/x-tcl" => "tcl", - "text/x-tcl" => "tcl", - "text/x-script.tcl" => "tcl", - "text/xml" => "xml", "application/atom+xml" => "xml", - "application/xml" => "xml", - "image/svg+xml" => "xml", + "application/edn" => "clojure", + "application/javascript" => "javascript", + "application/json" => "json", + "application/mathematica" => "mathematica", + "application/octet-stream" => "onesenterprise", + "application/postscript" => "postscript", + "application/prql" => "prql", "application/rss+xml" => "xml", - "text/x-vbnet" => "vb_net", - "text/x-vba" => "vb_net", - "text/x-alloy" => "alloy", - "text/x-vhdl" => "vhdl", - "application/x-thrift" => "thrift", - "text/x-coq" => "coq", - "text/x-turing" => "turing", - "text/x-fortran" => "fortran", + "application/sparql-query" => "sparql", + "application/vnd.wolfram.cdf" => "mathematica", + "application/vnd.wolfram.mathematica" => "mathematica", + "application/vnd.wolfram.mathematica.package" => "mathematica", + "application/x-actionscript" => "actionscript", + "application/x-actionscript3" => "actionscript_3", + "application/x-awk" => "awk", + "application/x-brainfuck" => "brainfuck", + "application/x-chaiscript" => "chaiscript", + "application/x-cheetah" => "cheetah", + "application/x-clojure" => "clojure", + "application/x-csh" => "tcsh", + "application/x-cython" => "cython", + "application/x-desktop" => "desktop_entry", + "application/x-django-templating" => "django_jinja", + "application/x-dos-batch" => "batchfile", + "application/x-elisp" => "emacslisp", + "application/x-fennel" => "fennel", + "application/x-fish" => "fish", + "application/x-forth" => "forth", + "application/x-gdscript" => "gdscript3", + "application/x-hcl" => "hcl", + "application/x-hy" => "hy", + "application/x-javascript" => "javascript", + "application/x-jinja" => "django_jinja", + "application/x-julia" => "julia", + "application/x-lua" => "lua", "application/x-mako" => "mako", + "application/x-mason" => "mason", + "application/x-myghty" => "myghty", + "application/x-openedge" => "openedge_abl", + "application/x-perl" => "perl", + "application/x-plutus-core" => "plutus_core", + "application/x-python" => "python", + "application/x-python2" => "python_2", + "application/x-python3" => "python", + "application/x-qml" => "qml", + "application/x-qt.qbs+qml" => "qml", + "application/x-racket" => "racket", + "application/x-ruby" => "ruby", + "application/x-sas" => "sas", + "application/x-scheme" => "scheme", "application/x-sh" => "bash", "application/x-shellscript" => "bash", - "application/x-actionscript" => "actionscript", - "text/actionscript" => "actionscript", - "text/x-actionscript" => "actionscript", - "text/x-csrc" => "holyc", - "text/x-chdr" => "holyc", + "application/x-smarty" => "smarty", + "application/x-spitfire" => "cheetah", + "application/x-standardml" => "standard_ml", + "application/x-tcl" => "tcl", + "application/x-terraform" => "terraform", + "application/x-tf" => "terraform", + "application/x-thrift" => "thrift", + "application/x-troff" => "groff", + "application/x-turtle" => "turtle", + "application/x-twig" => "TwigLexer", + "application/x-vue" => "vue", + "application/x.ucode" => "ucode", + "application/xhtml+xml" => "html", + "application/xml" => "xml", + "application/xml-dtd" => "dtd", + "application/yang" => "yang", + "image/svg+xml" => "xml", "image/x-xbitmap" => "holyc", "image/x-xpixmap" => "holyc", - "text/x-vsh" => "v_shell", - "text/x-monkeyc" => "monkeyc", - "application/x-cheetah" => "cheetah", - "application/x-spitfire" => "cheetah", - "text/x-powerquery" => "powerquery", - "text/x-mysql" => "mysql", - "text/x-mariadb" => "mysql", - "text/x-psl" => "psl", - "text/x-nasm" => "nasm", - "text/x-typescript" => "typescript", - "text/x-mlir" => "mlir", - "text/x-fennel" => "fennel", - "application/x-fennel" => "fennel", - "text/x-python2" => "python_2", - "application/x-python2" => "python_2", - "text/x-sourcepawn" => "sourcepawn", - "text/x-makefile" => "makefile", - "text/x-hare" => "hare", - "text/x-autohotkey" => "autohotkey", - "text/x-snobol" => "snobol", - "text/x-promela" => "promela", - "text/x-gleam" => "gleam", - "text/x-rexx" => "rexx", - "application/x-openedge" => "openedge_abl", - "text/x-openedge" => "openedge_abl", - "text/x-j" => "j", - "text/x-elm" => "elm", - "text/x-d" => "d", - "application/x-django-templating" => "django_jinja", - "application/x-jinja" => "django_jinja", - "text/x-autoit" => "autoit", - "application/x-dos-batch" => "batchfile", - "text/x-cobol" => "cobol", - "text/x-plpgsql" => "pl_pgsql", - "application/edn" => "clojure", - "application/x-clojure" => "clojure", - "text/x-clojure" => "clojure", - "text/wgsl" => "webgpu_shading_language", - "text/x-ceylon" => "ceylon", - "text/x-cmake" => "cmake", - "text/x-iosrc" => "io", - "text/x-erlang" => "erlang", - "text/x-modula2" => "modula-2", - "text/x-gas" => "gas", - "text/x-materializesql" => "materialize_sql_dialect", - "text/x-vue" => "vue", - "application/x-vue" => "vue", - "text/x-reasonml" => "reasonml", - "text/x-sql" => "sql", - "text/x-sed" => "sed", - "application/x-racket" => "racket", - "text/x-racket" => "racket", - "text/x-sass" => "sass", - "text/x-fsharp" => "fsharp", - "application/x-plutus-core" => "plutus_core", - "text/x-plutus-core" => "plutus_core", - "text/jsx" => "react", - "text/typescript-jsx" => "react", - "text/zed" => "zed", - "text/x-groovy" => "groovy", - "text/dns" => "dns", - "application/x-troff" => "groff", - "text/troff" => "groff", - "application/x.ucode" => "ucode", - "text/x.ucode" => "ucode", - "text/x-ini" => "ini", - "text/inf" => "ini", - "text/org" => "org_mode", - "text/x-styl" => "stylus", - "application/yang" => "yang", - "text/x-dylan" => "dylan", - "text/x-bnf" => "bnf", - "text/x-agda" => "agda", - "text/x-cue" => "cue", - "text/x-c++src" => "c++", - "text/x-c++hdr" => "c++", - "text/turtle" => "turtle", - "application/x-turtle" => "turtle", - "application/xml-dtd" => "dtd", - "text/x-standardml" => "standard_ml", - "application/x-standardml" => "standard_ml", - "application/x-hcl" => "hcl", - "text/x-scad" => "openscad", - "application/x-gdscript" => "gdscript", - "text/x-gdscript" => "gdscript", - "text/x-objective-c" => "objective-c", - "text/zig" => "zig", - "application/postscript" => "postscript", - "text/x-abnf" => "abnf", - "text/s" => "r", - "text/x-r" => "r", - "text/x-r-profile" => "r", - "text/s-plus" => "r", - "text/x-r-source" => "r", - "text/x-r-history" => "r", - "text/x-sh" => "bash_session", - "text/smali" => "smali", - "text/x-idris" => "idris", - "text/ipf" => "igor", - "text/x-vim" => "viml", - "text/x-gherkin" => "gherkin", - "text/x-scss" => "scss", - "application/x-ruby" => "ruby", - "text/x-ruby" => "ruby", - "text/x-ballerina" => "ballerina", - "text/css" => "css", - "text/x-rpm-spec" => "rpm_spec", - "text/x-elixir" => "elixir", - "text/x-al" => "al", - "text/x-typoscript" => "typoscript", - "text/x-apacheconf" => "apacheconf", - "application/x-perl" => "perl", - "text/x-perl" => "perl", - "text/x-powershell" => "powershell", - "text/x-dockerfile-config" => "docker", - "text/x-smalltalk" => "smalltalk", - "text/x-julia" => "julia", - "application/x-julia" => "julia", - "text/x-haskell" => "haskell", - "application/x-awk" => "awk", - "application/x-mason" => "mason", - "text/x-postgresql" => "postgresql_sql_dialect", - "text/x-ada" => "ada", - "application/sparql-query" => "sparql", - "text/x-pig" => "pig", - "text/x-verilog" => "verilog", - "text/x-common-lisp" => "common_lisp", - "application/xhtml+xml" => "html", - "text/html" => "html", - "text/x-squidconf" => "squidconf", - "text/x-tasm" => "tasm", - "text/minizinc" => "minizinc", - "text/x-povray" => "povray", - "application/x-twig" => "twig", - "text/basic" => "qbasic", - "text/x-toml" => "toml", - "text/scilab" => "scilab", - "text/x-natural" => "natural", - "text/odin" => "odin", - "text/x-tablegen" => "tablegen", - "text/x-metal" => "metal", - "text/x-dart" => "dart", - "text/x-cql" => "cassandra_cql", - "text/x-java-properties" => "properties", - "application/x-chaiscript" => "chaiscript", - "text/x-chaiscript" => "chaiscript", - "text/x-yaml" => "yaml", - "text/x-abap" => "abap", - "text/x-diff" => "diff", - "text/x-patch" => "diff", - "application/x-brainfuck" => "brainfuck", - "text/rust" => "rust", - "text/x-rust" => "rust", - "text/x-latex" => "tex", - "text/x-tex" => "tex", - "text/x-hy" => "hy", - "application/x-hy" => "hy", - "text/x-ebnf" => "ebnf", - "text/x-java" => "java", - "text/x-nix" => "nix", - "application/x-myghty" => "myghty", - "text/x-csharp" => "c#", - "text/x-tsql" => "transact-sql", - "text/x-php" => "php", - "text/x-vala" => "vala", - "text/x-factor" => "factor", - "text/javascript" => "javascript", - "text/x-javascript" => "javascript", - "application/javascript" => "javascript", - "application/x-javascript" => "javascript", - "text/x-arduino" => "arduino", - "text/x-bibtex" => "bibtex", - "text/x-jungle" => "jungle", - "text/x-swift" => "swift", - "text/x-pascal" => "objectpascal", - "application/x-elisp" => "emacslisp", - "text/x-elisp" => "emacslisp", - "application/x-smarty" => "smarty", + "text/actionscript" => "actionscript", "text/actionscript3" => "actionscript_3", - "application/x-actionscript3" => "actionscript_3", + "text/basic" => "qbasic", + "text/coffeescript" => "coffeescript", + "text/css" => "css", + "text/dns" => "dns", + "text/html" => "html", + "text/inf" => "ini", + "text/ipf" => "igor", + "text/javascript" => "javascript", + "text/jsx" => "react", + "text/matlab" => "matlab", + "text/minizinc" => "minizinc", + "text/octave" => "octave", + "text/odin" => "odin", + "text/org" => "org_mode", + "text/plain" => "plaintext", + "text/rust" => "rust", + "text/s" => "r", + "text/s-plus" => "r", + "text/sas" => "sas", + "text/scilab" => "scilab", + "text/smali" => "smali", + "text/troff" => "groff", + "text/turtle" => "turtle", + "text/typescript-jsx" => "react", + "text/wgsl" => "webgpu_shading_language", + "text/x-abap" => "abap", + "text/x-abnf" => "abnf", + "text/x-actionscript" => "actionscript", "text/x-actionscript3" => "actionscript_3", - "text/x-scala" => "scala", - "application/x-csh" => "tcsh", + "text/x-ada" => "ada", + "text/x-agda" => "agda", + "text/x-al" => "al", + "text/x-alloy" => "alloy", + "text/x-apacheconf" => "apacheconf", "text/x-aql" => "arangodb_aql", + "text/x-arduino" => "arduino", + "text/x-armasm" => "armasm", + "text/x-asm" => "armasm", + "text/x-autohotkey" => "autohotkey", + "text/x-autoit" => "autoit", + "text/x-ballerina" => "ballerina", + "text/x-bb" => "blitzbasic", + "text/x-bbcode" => "bbcode", + "text/x-bibtex" => "bibtex", + "text/x-bnf" => "bnf", + "text/x-c++hdr" => "c++", + "text/x-c++src" => "c++", + "text/x-ceylon" => "ceylon", + "text/x-chaiscript" => "chaiscript", + "text/x-chdr" => "holyc", + "text/x-clojure" => "clojure", + "text/x-cmake" => "cmake", + "text/x-cobol" => "cobol", + "text/x-common-lisp" => "common_lisp", + "text/x-coq" => "coq", + "text/x-cql" => "cassandra_cql", + "text/x-crystal" => "crystal", + "text/x-csharp" => "c#", + "text/x-csrc" => "holyc", + "text/x-cue" => "cue", + "text/x-cython" => "cython", + "text/x-d" => "d", + "text/x-dart" => "dart", + "text/x-diff" => "diff", + "text/x-disasm" => "ndisasm", + "text/x-dockerfile-config" => "docker", + "text/x-dylan" => "dylan", + "text/x-ebnf" => "ebnf", + "text/x-elisp" => "emacslisp", + "text/x-elixir" => "elixir", + "text/x-elm" => "elm", + "text/x-erlang" => "erlang", + "text/x-factor" => "factor", + "text/x-fennel" => "fennel", + "text/x-fortran" => "fortran", + "text/x-fsharp" => "fsharp", + "text/x-gas" => "gas", + "text/x-gdscript" => "gdscript3", + "text/x-gherkin" => "gherkin", + "text/x-gleam" => "gleam", + "text/x-glslsrc" => "glsl", + "text/x-gnuplot" => "gnuplot", + "text/x-groovy" => "groovy", + "text/x-hare" => "hare", + "text/x-haskell" => "haskell", + "text/x-hlsl" => "hlsl", + "text/x-hy" => "hy", + "text/x-idris" => "idris", + "text/x-ini" => "ini", + "text/x-iosrc" => "io", + "text/x-j" => "j", + "text/x-java" => "java", + "text/x-java-properties" => "properties", + "text/x-javascript" => "javascript", + "text/x-julia" => "julia", + "text/x-jungle" => "jungle", + "text/x-kotlin" => "kotlin", + "text/x-latex" => "tex", + "text/x-lighttpd-conf" => "lighttpd_configuration_file", + "text/x-llvm" => "llvm", + "text/x-lua" => "lua", + "text/x-makefile" => "makefile", + "text/x-mariadb" => "mysql", + "text/x-markdown" => "markdown", + "text/x-materializesql" => "materialize_sql_dialect", + "text/x-meson" => "meson", + "text/x-metal" => "metal", + "text/x-mlir" => "mlir", + "text/x-modula2" => "modula-2", + "text/x-monkeyc" => "monkeyc", + "text/x-mysql" => "mysql", + "text/x-nasm" => "nasm", + "text/x-natural" => "natural", + "text/x-newspeak" => "newspeak", + "text/x-nginx-conf" => "nginx_configuration_file", + "text/x-nim" => "nim", + "text/x-nix" => "nix", + "text/x-objective-c" => "objective-c", + "text/x-ocaml" => "ocaml", + "text/x-openedge" => "openedge_abl", + "text/x-pascal" => "objectpascal", + "text/x-patch" => "diff", + "text/x-perl" => "perl", + "text/x-php" => "php", + "text/x-pig" => "pig", + "text/x-plpgsql" => "pl_pgsql", + "text/x-plutus-core" => "plutus_core", + "text/x-postgresql" => "postgresql_sql_dialect", + "text/x-povray" => "povray", + "text/x-powerquery" => "powerquery", + "text/x-powershell" => "powershell", + "text/x-prolog" => "prolog", + "text/x-promela" => "promela", + "text/x-psl" => "psl", + "text/x-python" => "python", + "text/x-python2" => "python_2", + "text/x-python3" => "python", + "text/x-r" => "r", + "text/x-r-history" => "r", + "text/x-r-profile" => "r", + "text/x-r-source" => "r", + "text/x-racket" => "racket", + "text/x-reasonml" => "reasonml", + "text/x-rexx" => "rexx", + "text/x-rpm-spec" => "rpm_spec", + "text/x-ruby" => "ruby", + "text/x-rust" => "rust", + "text/x-sas" => "sas", + "text/x-sass" => "sass", + "text/x-scad" => "openscad", + "text/x-scala" => "scala", + "text/x-scheme" => "scheme", + "text/x-script.tcl" => "tcl", + "text/x-scss" => "scss", + "text/x-sed" => "sed", + "text/x-sh" => "bash_session", + "text/x-smalltalk" => "smalltalk", + "text/x-snobol" => "snobol", + "text/x-sourcepawn" => "sourcepawn", + "text/x-sql" => "sql", + "text/x-squidconf" => "squidconf", + "text/x-standardml" => "standard_ml", + "text/x-styl" => "stylus", + "text/x-swift" => "swift", + "text/x-systemverilog" => "systemverilog", + "text/x-tablegen" => "tablegen", + "text/x-tasm" => "tasm", + "text/x-tcl" => "tcl", + "text/x-tex" => "tex", + "text/x-toml" => "toml", + "text/x-trac-wiki" => "moinwiki", + "text/x-tradingview" => "tradingview", + "text/x-tsql" => "transact-sql", + "text/x-turing" => "turing", + "text/x-typescript" => "typescript", + "text/x-typoscript" => "typoscript", + "text/x-uxntal" => "tal", + "text/x-v" => "v", + "text/x-vala" => "vala", + "text/x-vba" => "vb_net", + "text/x-vbnet" => "vb_net", + "text/x-verilog" => "verilog", + "text/x-vhdl" => "vhdl", + "text/x-vim" => "viml", + "text/x-vsh" => "v_shell", + "text/x-vue" => "vue", + "text/x-whiley" => "whiley", + "text/x-windows-registry" => "reg", + "text/x-yaml" => "yaml", + "text/x.ucode" => "ucode", + "text/xml" => "xml", + "text/zed" => "zed", + "text/zig" => "zig", } LEXERS_BY_FILENAME = { - "*.frt" => ["forth"], - "*.fs" => ["forth", "fsharp"], - "*.fth" => ["forth"], - "nginx.conf" => ["nginx_configuration_file"], - "*.ml" => ["ocaml"], - "*.mli" => ["ocaml"], - "*.mly" => ["ocaml"], - "*.mll" => ["ocaml"], - "*.kt" => ["kotlin"], - "*.sv" => ["systemverilog"], - "*.svh" => ["systemverilog"], - "*.mt" => ["mathematica"], - "*.wl" => ["mathematica"], - "*.cdf" => ["mathematica"], - "*.ma" => ["mathematica"], - "*.mx" => ["mathematica"], - "*.nb" => ["mathematica"], - "*.nbp" => ["mathematica"], - "*.m" => ["mathematica", "objective-c", "mason", "octave", "matlab"], - "*.bqn" => ["bqn"], - "*.vv" => ["v"], - "v.mod" => ["v"], - "*.v" => ["verilog", "v", "coq"], - "*.rego" => ["rego"], - "*.tf" => ["terraform"], - "*.desktop" => ["desktop_entry"], - "*.bicep" => ["bicep"], - "*.py" => ["python"], - "sconscript" => ["python"], - "build.bazel" => ["python"], - "module.bazel" => ["python"], - "workspace.bzlmod" => ["python"], - "repo.bazel" => ["python"], - "*.jy" => ["python"], - "sconstruct" => ["python"], - "buck" => ["python"], - "*.tac" => ["python"], - "workspace.bazel" => ["python"], - "*.bzl" => ["python"], - "build" => ["python"], - "workspace" => ["python"], - "*.sc" => ["python"], - "*.sage" => ["python"], - "*.pyi" => ["python"], - "*.pyw" => ["python"], - "*.reg" => ["reg"], - "*.tv" => ["tradingview"], - "*.decls" => ["blitzbasic"], - "*.bb" => ["blitzbasic"], - "*.sas" => ["sas"], - "*.s" => ["r", "armasm", "gas"], - "*.link" => ["systemd"], - "*.target" => ["systemd"], - "*.path" => ["systemd"], - "*.slice" => ["systemd"], - "*.device" => ["systemd"], - "*.socket" => ["ini", "systemd"], - "*.service" => ["ini", "systemd"], - "*.scope" => ["systemd"], - "*.dnssd" => ["systemd"], - "*.swap" => ["systemd"], - "*.netdev" => ["systemd"], - "*.network" => ["systemd"], - "*.automount" => ["systemd"], - "*.timer" => ["systemd"], - "*.mount" => ["systemd"], - "*.frag" => ["glsl"], - "*.vert" => ["glsl"], - "*.geo" => ["glsl"], - "*.tal" => ["tal"], - "*.scm" => ["scheme"], - "*.ss" => ["scheme"], - "*.avsc" => ["json"], - "*.json" => ["json"], - "*.prolog" => ["prolog"], - "*.ecl" => ["prolog"], - "*.pl" => ["perl", "prolog"], - "*.pro" => ["prolog"], - "*.prql" => ["prql"], - "*.erf" => ["onesenterprise"], - "*.epf" => ["onesenterprise"], - "*.coffee" => ["coffeescript"], - "*.ll" => ["llvm"], - "*.hlsl" => ["hlsl"], - "*.fx" => ["hlsl"], - "*.cginc" => ["hlsl"], - "*.fxh" => ["hlsl"], - "*.hlsli" => ["hlsl"], - "*.fish" => ["fish"], - "*.load" => ["fish"], - "*.ns2" => ["newspeak"], - "*.plot" => ["gnuplot"], - "*.plt" => ["gnuplot"], - "*.nimrod" => ["nim"], - "*.nim" => ["nim"], - "*.whiley" => ["whiley"], - "dhcpd.conf" => ["iscdhcpd"], - "*.pxi" => ["cython"], - "*.pyx" => ["cython"], - "*.pxd" => ["cython"], - "*.wlua" => ["lua"], - "*.lua" => ["lua"], - "meson.build" => ["meson"], - "meson_options.txt" => ["meson"], - "*.cr" => ["crystal"], - "*.qbs" => ["qml"], - "*.qml" => ["qml"], - "*.tcl" => ["tcl"], - "*.rvt" => ["tcl"], - "*.wsf" => ["xml"], - "*.svg" => ["xml"], - "*.rss" => ["xml"], - "*.xsl" => ["xml"], - "*.xml" => ["xml"], - "*.xslt" => ["xml", "html"], - "*.csproj" => ["xml"], - "*.fsproj" => ["xml"], - "*.vcxproj" => ["xml"], - "*.wsdl" => ["xml"], - "*.xsd" => ["xml"], - "*.bas" => ["vb_net", "qbasic"], - "*.vb" => ["vb_net"], - "*.als" => ["alloy"], - "*.vhd" => ["vhdl"], - "*.vhdl" => ["vhdl"], - "*.handlebars" => ["handlebars"], - "*.hbs" => ["handlebars"], - "*.thrift" => ["thrift"], - "*.turing" => ["turing"], - "*.tu" => ["turing"], - "*.graphqls" => ["graphql"], - "*.graphql" => ["graphql"], - "*.f" => ["fortranfixed"], - "termcap" => ["termcap"], - "termcap.src" => ["termcap"], - "*.mao" => ["mako"], - "*.dax" => ["dax"], - "*.zsh" => ["bash"], - "*.exlib" => ["bash"], - "*.sh" => ["bash"], - ".env" => ["bash"], - "*.env" => ["bash"], - ".bash_*" => ["bash"], - "bashrc" => ["bash"], - "*.zshrc" => ["bash"], - "*.bash" => ["bash"], - "*.exheres-0" => ["bash"], - "bash_*" => ["bash"], - "zshrc" => ["bash"], - ".zshrc" => ["bash"], - "*.eclass" => ["bash"], - "pkgbuild" => ["bash"], - "*.ksh" => ["bash"], - ".bashrc" => ["bash"], - "*.ebuild" => ["bash"], - "*.as" => ["actionscript", "actionscript_3"], - "*.x[bp]m" => ["c"], - "*.c" => ["c", "c++"], - "*.idc" => ["c"], - "*.h" => ["objective-c", "c", "c++"], - "*.vsh" => ["v_shell"], - "*.mc" => ["monkeyc", "mason"], - "*.spt" => ["cheetah"], - "*.tmpl" => ["cheetah"], - "*.pq" => ["powerquery"], - "*.sql" => ["sql", "mysql"], - "*.psl" => ["psl"], - "*.proc" => ["psl"], - "*.trig" => ["psl"], - "*.batch" => ["psl"], - "*.asm" => ["tasm", "z80_assembly", "nasm"], - "*.nasm" => ["nasm"], - "*.ts" => ["typoscript", "typescript"], - "*.cts" => ["typescript"], - "*.mts" => ["typescript"], - "*.tsx" => ["typescript"], - "*.mlir" => ["mlir"], - "*.fennel" => ["fennel"], - "*.sp" => ["sourcepawn"], - "*.inc" => ["objectpascal", "sourcepawn", "povray", "php"], - "bsdmakefile" => ["makefile"], - "gnumakefile" => ["makefile"], - "*.mak" => ["makefile"], - "*.mk" => ["makefile"], - "makefile.*" => ["makefile"], - "justfile" => ["makefile"], - ".justfile" => ["makefile"], - "makefile" => ["makefile"], - "*.ha" => ["hare"], + "*.1p" => ["groff"], + "*.3pm" => ["groff"], + "*.[1-9]" => ["groff"], + "*.[gs]sed" => ["sed"], + "*.abap" => ["abap"], + "*.abnf" => ["abnf"], + "*.ada" => ["ada"], + "*.adb" => ["ada"], + "*.ads" => ["ada"], + "*.agda" => ["agda"], "*.ahk" => ["autohotkey"], "*.ahkl" => ["autohotkey"], - "*.snobol" => ["snobol"], - "*.mcfunction" => ["mcfunction"], - "*.pml" => ["promela"], - "*.pr" => ["promela"], - "*.promela" => ["promela"], - "*.prm" => ["promela"], - "*.prom" => ["promela"], - "*.pm" => ["perl", "promela"], - "*.cf" => ["cfengine3"], - "*.pony" => ["pony"], - "*.gleam" => ["gleam"], - "*.rex" => ["rexx"], - "*.arexx" => ["rexx"], - "*.rexx" => ["rexx"], - "*.rx" => ["rexx"], - "*.p" => ["openedge_abl"], - "*.i" => ["openedge_abl"], - "*.cls" => ["openedge_abl"], - "*.w" => ["openedge_abl"], - "*.ijs" => ["j"], - "*.elm" => ["elm"], - "*.d" => ["d"], - "*.di" => ["d"], - "*.au3" => ["autoit"], - "*.txt" => ["plaintext"], - "*.bat" => ["batchfile"], - "*.cmd" => ["batchfile"], - "*.cpy" => ["cobol"], - "*.cob" => ["cobol"], - "*.clj" => ["clojure"], - "*.edn" => ["clojure"], - "*.wgsl" => ["webgpu_shading_language"], - "*.ceylon" => ["ceylon"], - "*.cmake" => ["cmake"], - "cmakelists.txt" => ["cmake"], - "*.io" => ["io"], + "*.al" => ["al"], + "*.als" => ["alloy"], "*.apl" => ["apl"], + "*.applescript" => ["applescript"], + "*.aql" => ["arangodb_aql"], + "*.arexx" => ["rexx"], + "*.as" => ["actionscript", "actionscript_3"], + "*.asm" => ["nasm", "z80_assembly", "tasm"], + "*.au3" => ["autoit"], + "*.automount" => ["systemd"], + "*.aux" => ["tex"], + "*.avsc" => ["json"], + "*.awk" => ["awk"], + "*.b" => ["brainfuck"], + "*.bal" => ["ballerina"], + "*.bas" => ["vb_net", "qbasic"], + "*.bash" => ["bash"], + "*.bat" => ["batchfile"], + "*.batch" => ["psl"], + "*.bb" => ["blitzbasic"], + "*.bf" => ["brainfuck"], + "*.bib" => ["bibtex"], + "*.bicep" => ["bicep"], + "*.bnf" => ["bnf"], + "*.bqn" => ["bqn"], + "*.bzl" => ["python"], + "*.c" => ["c", "c++"], + "*.c++" => ["c++"], + "*.capnp" => ["cap_n_proto"], + "*.cc" => ["c++"], + "*.cdf" => ["mathematica"], + "*.ceylon" => ["ceylon"], + "*.cf" => ["cfengine3"], + "*.cfg" => ["ini"], + "*.cginc" => ["hlsl"], + "*.chai" => ["chaiscript"], + "*.chpl" => ["chapel"], + "*.cjs" => ["javascript"], + "*.cl" => ["common_lisp"], + "*.clj" => ["clojure"], + "*.cls" => ["openedge_abl"], + "*.cmake" => ["cmake"], + "*.cmd" => ["batchfile"], + "*.cob" => ["cobol"], + "*.coffee" => ["coffeescript"], + "*.cp" => ["c++"], + "*.cpp" => ["c++"], + "*.cpy" => ["cobol"], + "*.cql" => ["cassandra_cql"], + "*.cr" => ["crystal"], + "*.cs" => ["c#"], + "*.csh" => ["tcsh"], + "*.csproj" => ["xml"], + "*.css" => ["css"], + "*.cts" => ["typescript"], + "*.cue" => ["cue"], + "*.cxx" => ["c++"], + "*.d" => ["d"], + "*.dal" => ["al"], + "*.dart" => ["dart"], + "*.dax" => ["dax"], + "*.decls" => ["blitzbasic"], + "*.def" => ["modula-2"], + "*.desktop" => ["desktop_entry"], + "*.device" => ["systemd"], + "*.di" => ["d"], + "*.diff" => ["diff"], + "*.dnssd" => ["systemd"], + "*.docker" => ["docker"], + "*.dockerfile" => ["docker"], + "*.dpk" => ["objectpascal"], + "*.dpr" => ["objectpascal"], + "*.dtd" => ["dtd"], + "*.duby" => ["ruby"], + "*.dyl" => ["dylan"], + "*.dylan" => ["dylan"], + "*.dzn" => ["minizinc"], + "*.ebnf" => ["ebnf"], + "*.ebuild" => ["bash"], + "*.ecl" => ["prolog"], + "*.eclass" => ["bash"], + "*.edn" => ["clojure"], + "*.eex" => ["elixir"], + "*.el" => ["emacslisp"], + "*.elm" => ["elm"], + "*.env" => ["bash"], + "*.epf" => ["onesenterprise"], + "*.eps" => ["postscript"], + "*.erf" => ["onesenterprise"], + "*.erl" => ["erlang"], "*.es" => ["erlang"], "*.escript" => ["erlang"], + "*.ex" => ["elixir"], + "*.exheres-0" => ["bash"], + "*.exlib" => ["bash"], + "*.exs" => ["elixir"], + "*.f" => ["fortranfixed"], + "*.f03" => ["fortran"], + "*.f90" => ["fortran"], + "*.f95" => ["fortran"], + "*.factor" => ["factor"], + "*.feature" => ["gherkin"], + "*.fennel" => ["fennel"], + "*.fhtml" => ["VelocityLexer"], + "*.fish" => ["fish"], + "*.frag" => ["glsl"], + "*.frt" => ["forth"], + "*.fs" => ["forth", "fsharp"], + "*.fsi" => ["fsharp"], + "*.fsproj" => ["xml"], + "*.fth" => ["forth"], + "*.fun" => ["standard_ml"], + "*.fx" => ["hlsl"], + "*.fxh" => ["hlsl"], + "*.fzn" => ["minizinc"], + "*.gd" => ["gdscript3", "gdscript"], + "*.gemspec" => ["ruby"], + "*.geo" => ["glsl"], + "*.gleam" => ["gleam"], + "*.go.tmpl" => ["go_template"], + "*.gotmpl" => ["go_template"], + "*.gradle" => ["groovy"], + "*.graphql" => ["graphql"], + "*.graphqls" => ["graphql"], + "*.groovy" => ["groovy"], + "*.h" => ["objective-c", "c", "c++"], + "*.h++" => ["c++"], + "*.ha" => ["hare"], + "*.handlebars" => ["handlebars"], + "*.hbs" => ["handlebars"], + "*.hc" => ["holyc"], + "*.hc.z" => ["holyc"], + "*.hcl" => ["hcl"], + "*.hh" => ["holyc", "c++"], + "*.hlb" => ["hlb"], + "*.hlsl" => ["hlsl"], + "*.hlsli" => ["hlsl"], + "*.hpp" => ["c++"], "*.hrl" => ["erlang"], - "*.erl" => ["erlang"], - "*.def" => ["modula-2"], + "*.hs" => ["haskell"], + "*.htm" => ["html"], + "*.html" => ["html"], + "*.hxx" => ["c++"], + "*.hy" => ["hy"], + "*.i" => ["openedge_abl"], + "*.idc" => ["c"], + "*.idr" => ["idris"], + "*.ijs" => ["j"], + "*.inc" => ["php", "sourcepawn", "objectpascal", "povray"], + "*.inf" => ["ini"], + "*.ini" => ["ini"], + "*.ino" => ["arduino"], + "*.intr" => ["dylan"], + "*.io" => ["io"], + "*.ipf" => ["igor"], + "*.java" => ["java"], + "*.jl" => ["julia"], + "*.js" => ["javascript"], + "*.jsm" => ["javascript"], + "*.json" => ["json"], + "*.jsonata" => ["jsonata"], + "*.jsx" => ["react"], + "*.jungle" => ["jungle"], + "*.jy" => ["python"], + "*.ksh" => ["bash"], + "*.kt" => ["kotlin"], + "*.link" => ["systemd"], + "*.liquid" => ["LiquidLexer"], + "*.lisp" => ["common_lisp"], + "*.ll" => ["llvm"], + "*.load" => ["fish"], + "*.lpk" => ["objectpascal"], + "*.lpr" => ["objectpascal"], + "*.lua" => ["lua"], + "*.m" => ["mason", "mathematica", "matlab", "octave", "objective-c"], + "*.ma" => ["mathematica"], + "*.mak" => ["makefile"], + "*.man" => ["groff"], + "*.mao" => ["mako"], + "*.markdown" => ["markdown"], + "*.mc" => ["monkeyc", "mason"], + "*.mcfunction" => ["mcfunction"], + "*.md" => ["markdown"], + "*.metal" => ["metal"], + "*.mhtml" => ["mason"], + "*.mi" => ["mason"], + "*.mjs" => ["javascript"], + "*.mk" => ["makefile"], + "*.ml" => ["ocaml"], + "*.mli" => ["ocaml"], + "*.mlir" => ["mlir"], + "*.mll" => ["ocaml"], + "*.mly" => ["ocaml"], "*.mod" => ["modula-2"], + "*.mount" => ["systemd"], + "*.mt" => ["mathematica"], + "*.mts" => ["typescript"], + "*.mx" => ["mathematica"], + "*.myt" => ["myghty"], + "*.mzn" => ["minizinc"], + "*.nasm" => ["nasm"], + "*.nb" => ["mathematica"], + "*.nbp" => ["mathematica"], + "*.netdev" => ["systemd"], + "*.network" => ["systemd"], + "*.nim" => ["nim"], + "*.nimrod" => ["nim"], + "*.nix" => ["nix"], + "*.ns2" => ["newspeak"], + "*.ns7" => ["natural"], + "*.nsa" => ["natural"], + "*.nsc" => ["natural"], + "*.nsg" => ["natural"], + "*.nsh" => ["natural"], + "*.nsl" => ["natural"], + "*.nsm" => ["natural"], + "*.nsn" => ["natural"], + "*.nsp" => ["natural"], + "*.nss" => ["natural"], + "*.odin" => ["odin"], + "*.org" => ["org_mode"], + "*.p" => ["openedge_abl"], + "*.pas" => ["objectpascal"], + "*.patch" => ["diff"], + "*.path" => ["systemd"], + "*.pc" => ["pkgconfig"], + "*.php" => ["php"], + "*.php[345]" => ["php"], + "*.pig" => ["pig"], + "*.pl" => ["prolog", "perl"], + "*.plc" => ["plutus_core"], + "*.plot" => ["gnuplot"], + "*.plt" => ["gnuplot"], + "*.pm" => ["perl", "promela"], + "*.pml" => ["promela"], + "*.pony" => ["pony"], + "*.pov" => ["povray"], + "*.pp" => ["objectpascal", "puppet"], + "*.pq" => ["powerquery"], + "*.pr" => ["promela"], + "*.prm" => ["promela"], + "*.pro" => ["prolog"], + "*.proc" => ["psl"], + "*.prolog" => ["prolog"], + "*.prom" => ["promela"], + "*.promela" => ["promela"], + "*.promql" => ["promql"], + "*.properties" => ["properties"], "*.proto" => ["protocol_buffer"], - "*.vue" => ["vue"], - "*.rei" => ["reasonml"], + "*.prql" => ["prql"], + "*.ps" => ["postscript"], + "*.ps1" => ["powershell"], + "*.psd1" => ["powershell"], + "*.psl" => ["psl"], + "*.psm1" => ["powershell"], + "*.pxd" => ["cython"], + "*.pxi" => ["cython"], + "*.py" => ["python"], + "*.pyi" => ["python"], + "*.pyw" => ["python"], + "*.pyx" => ["cython"], + "*.qbs" => ["qml"], + "*.qml" => ["qml"], + "*.r" => ["r"], + "*.rake" => ["ruby"], + "*.rb" => ["ruby"], + "*.rbw" => ["ruby"], + "*.rbx" => ["ruby"], "*.re" => ["reasonml"], - "*.[gs]sed" => ["sed"], - "*.sed" => ["sed"], + "*.react" => ["react"], + "*.reg" => ["reg"], + "*.rego" => ["rego"], + "*.rei" => ["reasonml"], + "*.rex" => ["rexx"], + "*.rexx" => ["rexx"], "*.rkt" => ["racket"], "*.rktd" => ["racket"], "*.rktl" => ["racket"], - "*.sass" => ["sass"], - "*.fsi" => ["fsharp"], - "*.plc" => ["plutus_core"], - "*.react" => ["react"], - "*.jsx" => ["react"], - "*.zed" => ["zed"], - "*.gradle" => ["groovy"], - "*.groovy" => ["groovy"], - "*.zone" => ["dns"], - "*.3pm" => ["groff"], - "*.man" => ["groff"], - "*.[1-9]" => ["groff"], - "*.1p" => ["groff"], - "*.uc" => ["ucode"], - ".gitconfig" => ["ini"], - "*.inf" => ["ini"], - "*.ini" => ["ini"], - "pylintrc" => ["ini"], - "*.cfg" => ["ini"], - ".editorconfig" => ["ini"], - ".pylintrc" => ["ini"], - "*.org" => ["org_mode"], - "*.styl" => ["stylus"], - "*.yang" => ["yang"], - "*.intr" => ["dylan"], - "*.dyl" => ["dylan"], - "*.dylan" => ["dylan"], - "*.bnf" => ["bnf"], - "*.agda" => ["agda"], - "*.cue" => ["cue"], - "*.hxx" => ["c++"], - "*.h++" => ["c++"], - "*.cpp" => ["c++"], - "*.hpp" => ["c++"], - "*.cxx" => ["c++"], - "*.cc" => ["c++"], - "*.cp" => ["c++"], - "*.c++" => ["c++"], - "*.hh" => ["holyc", "c++"], - "*.tpp" => ["c++"], - "*.ttl" => ["turtle"], - "*.dtd" => ["dtd"], - "*.sig" => ["standard_ml"], - "*.fun" => ["standard_ml"], - "*.sml" => ["standard_ml"], - "*.hcl" => ["hcl"], - "*.scad" => ["openscad"], - "*.hc" => ["holyc"], - "*.hc.z" => ["holyc"], - "*.gd" => ["gdscript3", "gdscript"], - "*.pp" => ["objectpascal", "puppet"], - "*.zig" => ["zig"], - "*.eps" => ["postscript"], - "*.ps" => ["postscript"], - "*.abnf" => ["abnf"], - ".rhistory" => ["r"], - "*.r" => ["r"], - ".renviron" => ["r"], - ".rprofile" => ["r"], - "pacman.conf" => ["pacmanconf"], - "*.hlb" => ["hlb"], - "*.sh-session" => ["bash_session"], - "*.smali" => ["smali"], - "*.idr" => ["idris"], - "*.sol" => ["solidity"], - "*.ipf" => ["igor"], - "gvimrc" => ["viml"], - ".gvimrc" => ["viml"], - "_vimrc" => ["viml"], - ".exrc" => ["viml"], - "_exrc" => ["viml"], - "_gvimrc" => ["viml"], - "*.vim" => ["viml"], - "vimrc" => ["viml"], - ".vimrc" => ["viml"], - "*.applescript" => ["applescript"], - "*.feature" => ["gherkin"], - "*.scss" => ["scss"], - "*.jsonata" => ["jsonata"], - "terminfo.src" => ["terminfo"], - "terminfo" => ["terminfo"], - "*.rbx" => ["ruby"], - "*.rake" => ["ruby"], - "rakefile" => ["ruby"], - "gemfile" => ["ruby"], - "*.duby" => ["ruby"], - "vagrantfile" => ["ruby"], - "*.gemspec" => ["ruby"], - "*.rb" => ["ruby"], - "*.rbw" => ["ruby"], - "*.bal" => ["ballerina"], - "*.css" => ["css"], - "*.spec" => ["rpm_spec"], - "*.exs" => ["elixir"], - "*.eex" => ["elixir"], - "*.ex" => ["elixir"], - "*.dal" => ["al"], - "*.al" => ["al"], - "*.stas" => ["stas"], - "apache.conf" => ["apacheconf"], - ".htaccess" => ["apacheconf"], - "apache2.conf" => ["apacheconf"], - "*.t" => ["perl"], - "*.psm1" => ["powershell"], - "*.ps1" => ["powershell"], - "*.psd1" => ["powershell"], - "*.dockerfile" => ["docker"], - "dockerfile" => ["docker"], - "*.docker" => ["docker"], - "dockerfile.*" => ["docker"], - "*.st" => ["smalltalk"], - "*.jl" => ["julia"], - "*.hs" => ["haskell"], - "*.awk" => ["awk"], - "autohandler" => ["mason"], - "*.mi" => ["mason"], - "*.mhtml" => ["mason"], - "dhandler" => ["mason"], - "*.ads" => ["ada"], - "*.ada" => ["ada"], - "*.adb" => ["ada"], - "*.f03" => ["fortran"], - "*.f95" => ["fortran"], - "*.f90" => ["fortran"], "*.rq" => ["sparql"], - "*.sparql" => ["sparql"], - "*.pig" => ["pig"], - "*.lisp" => ["common_lisp"], - "*.cl" => ["common_lisp"], - "*.htm" => ["html"], - "*.html" => ["html"], - "*.xhtml" => ["html"], - "squid.conf" => ["squidconf"], - "*.tasm" => ["tasm"], - "*.tape" => ["vhs"], - "*.dzn" => ["minizinc"], - "*.fzn" => ["minizinc"], - "*.mzn" => ["minizinc"], - "*.pov" => ["povray"], - "*.siv" => ["sieve"], - "*.sieve" => ["sieve"], - "*.twig" => ["twig"], - "poetry.lock" => ["toml"], - "pipfile" => ["toml"], - "*.toml" => ["toml"], + "*.rs" => ["rust"], + "*.rs.in" => ["rust"], + "*.rss" => ["xml"], + "*.rvt" => ["tcl"], + "*.rx" => ["rexx"], + "*.s" => ["r", "armasm", "gas"], + "*.sage" => ["python"], + "*.sas" => ["sas"], + "*.sass" => ["sass"], + "*.sc" => ["python"], + "*.scad" => ["openscad"], + "*.scala" => ["scala"], "*.sce" => ["scilab"], "*.sci" => ["scilab"], - "*.tst" => ["scilab"], - "*.nsh" => ["natural"], - "*.nss" => ["natural"], - "*.nsm" => ["natural"], - "*.nsg" => ["natural"], - "*.nsc" => ["natural"], - "*.nsp" => ["natural"], - "*.ns7" => ["natural"], - "*.nsa" => ["natural"], - "*.nsl" => ["natural"], - "*.nsn" => ["natural"], - "*.odin" => ["odin"], + "*.scm" => ["scheme"], + "*.scope" => ["systemd"], + "*.scss" => ["scss"], + "*.sed" => ["sed"], + "*.service" => ["systemd", "ini"], + "*.sh" => ["bash"], + "*.sh-session" => ["bash_session"], + "*.sieve" => ["sieve"], + "*.sig" => ["standard_ml"], + "*.siv" => ["sieve"], + "*.slice" => ["systemd"], + "*.smali" => ["smali"], + "*.sml" => ["standard_ml"], + "*.snobol" => ["snobol"], + "*.socket" => ["systemd", "ini"], + "*.sol" => ["solidity"], + "*.sp" => ["sourcepawn"], + "*.sparql" => ["sparql"], + "*.spec" => ["rpm_spec"], + "*.spt" => ["cheetah"], + "*.sql" => ["mysql", "sql"], + "*.ss" => ["scheme"], + "*.st" => ["smalltalk"], + "*.stas" => ["stas"], + "*.styl" => ["stylus"], + "*.sv" => ["systemverilog"], + "*.svg" => ["xml"], + "*.svh" => ["systemverilog"], + "*.swap" => ["systemd"], + "*.swift" => ["swift"], + "*.t" => ["perl"], + "*.tac" => ["python"], + "*.tal" => ["tal"], + "*.tape" => ["vhs"], + "*.target" => ["systemd"], + "*.tasm" => ["tasm"], + "*.tcl" => ["tcl"], + "*.tcsh" => ["tcsh"], "*.td" => ["tablegen"], - "*.metal" => ["metal"], - "*.dart" => ["dart"], - "*.cql" => ["cassandra_cql"], - "*.properties" => ["properties"], - "*.chpl" => ["chapel"], - "*.chai" => ["chaiscript"], - "*.yaml" => ["yaml"], - "*.yml" => ["yaml"], - "*.abap" => ["abap"], - "*.patch" => ["diff"], - "*.diff" => ["diff"], - "*.bf" => ["brainfuck"], - "*.b" => ["brainfuck"], - "*.rs.in" => ["rust"], - "*.rs" => ["rust"], - "*.toc" => ["tex"], - "*.aux" => ["tex"], "*.tex" => ["tex"], - "*.hy" => ["hy"], - "*.ebnf" => ["ebnf"], - "*.java" => ["java"], - "xorg.conf" => ["xorg"], - "*.nix" => ["nix"], - "autodelegate" => ["myghty"], - "*.myt" => ["myghty"], - "*.cs" => ["c#"], - "*.z80" => ["z80_assembly"], - "*.pc" => ["pkgconfig"], - "*.php[345]" => ["php"], - "*.php" => ["php"], + "*.tf" => ["terraform"], + "*.thrift" => ["thrift"], + "*.timer" => ["systemd"], + "*.tmpl" => ["cheetah"], + "*.toc" => ["tex"], + "*.toml" => ["toml"], + "*.tpl" => ["smarty"], + "*.tpp" => ["c++"], + "*.trig" => ["psl"], + "*.ts" => ["typoscript", "typescript"], + "*.tst" => ["scilab"], + "*.tsx" => ["typescript"], + "*.ttl" => ["turtle"], + "*.tu" => ["turing"], + "*.turing" => ["turing"], + "*.tv" => ["tradingview"], + "*.twig" => ["twig"], + "*.txt" => ["plaintext"], + "*.uc" => ["ucode"], + "*.v" => ["coq", "v", "verilog"], "*.vala" => ["vala"], "*.vapi" => ["vala"], - "*.factor" => ["factor"], - "*.js" => ["javascript"], - "*.jsm" => ["javascript"], - "*.cjs" => ["javascript"], - "*.mjs" => ["javascript"], - "*.ino" => ["arduino"], - "*.bib" => ["bibtex"], - "*.capnp" => ["cap_n_proto"], - "*.jungle" => ["jungle"], - "*.swift" => ["swift"], - "*.gotmpl" => ["go_template"], - "*.go.tmpl" => ["go_template"], - "*.promql" => ["promql"], + "*.vb" => ["vb_net"], + "*.vcxproj" => ["xml"], + "*.vert" => ["glsl"], + "*.vhd" => ["vhdl"], + "*.vhdl" => ["vhdl"], + "*.vim" => ["viml"], + "*.vm" => ["VelocityLexer"], + "*.vsh" => ["v_shell"], + "*.vue" => ["vue"], + "*.vv" => ["v"], + "*.w" => ["openedge_abl"], "*.wdte" => ["wdte"], - "*.lpk" => ["objectpascal"], - "*.lpr" => ["objectpascal"], - "*.dpk" => ["objectpascal"], - "*.pas" => ["objectpascal"], - "*.dpr" => ["objectpascal"], - "*.el" => ["emacslisp"], - "*.tpl" => ["smarty"], - "*.scala" => ["scala"], - "*.tcsh" => ["tcsh"], - "*.csh" => ["tcsh"], - "*.aql" => ["arangodb_aql"], + "*.wgsl" => ["webgpu_shading_language"], + "*.whiley" => ["whiley"], + "*.wl" => ["mathematica"], + "*.wlua" => ["lua"], + "*.wsdl" => ["xml"], + "*.wsf" => ["xml"], + "*.x[bp]m" => ["c"], + "*.xhtml" => ["html"], + "*.xml" => ["xml"], + "*.xsd" => ["xml"], + "*.xsl" => ["xml"], + "*.xslt" => ["xml", "html"], + "*.yaml" => ["yaml"], + "*.yang" => ["yang"], + "*.yml" => ["yaml"], + "*.z80" => ["z80_assembly"], + "*.zed" => ["zed"], + "*.zig" => ["zig"], + "*.zone" => ["dns"], + "*.zsh" => ["bash"], + "*.zshrc" => ["bash"], + ".bash_*" => ["bash"], + ".bashrc" => ["bash"], + ".editorconfig" => ["ini"], + ".env" => ["bash"], + ".exrc" => ["viml"], + ".gitconfig" => ["ini"], + ".gvimrc" => ["viml"], + ".htaccess" => ["apacheconf"], + ".justfile" => ["makefile"], + ".pylintrc" => ["ini"], + ".renviron" => ["r"], + ".rhistory" => ["r"], + ".rprofile" => ["r"], + ".vimrc" => ["viml"], + ".zshrc" => ["bash"], + "_exrc" => ["viml"], + "_gvimrc" => ["viml"], + "_vimrc" => ["viml"], + "apache.conf" => ["apacheconf"], + "apache2.conf" => ["apacheconf"], + "autodelegate" => ["myghty"], + "autohandler" => ["mason"], + "bash_*" => ["bash"], + "bashrc" => ["bash"], + "bsdmakefile" => ["makefile"], + "buck" => ["python"], + "build" => ["python"], + "build.bazel" => ["python"], + "cmakelists.txt" => ["cmake"], + "dhandler" => ["mason"], + "dhcpd.conf" => ["iscdhcpd"], + "dockerfile" => ["docker"], + "dockerfile.*" => ["docker"], + "gemfile" => ["ruby"], + "gnumakefile" => ["makefile"], + "gvimrc" => ["viml"], + "justfile" => ["makefile"], + "makefile" => ["makefile"], + "makefile.*" => ["makefile"], + "meson.build" => ["meson"], + "meson_options.txt" => ["meson"], + "module.bazel" => ["python"], + "nginx.conf" => ["nginx_configuration_file"], + "pacman.conf" => ["pacmanconf"], + "pipfile" => ["toml"], + "pkgbuild" => ["bash"], + "poetry.lock" => ["toml"], + "pylintrc" => ["ini"], + "rakefile" => ["ruby"], + "repo.bazel" => ["python"], + "sconscript" => ["python"], + "sconstruct" => ["python"], + "squid.conf" => ["squidconf"], + "termcap" => ["termcap"], + "termcap.src" => ["termcap"], + "terminfo" => ["terminfo"], + "terminfo.src" => ["terminfo"], + "v.mod" => ["v"], + "vagrantfile" => ["ruby"], + "vimrc" => ["viml"], + "workspace" => ["python"], + "workspace.bazel" => ["python"], + "workspace.bzlmod" => ["python"], + "xorg.conf" => ["xorg"], + "zshrc" => ["bash"], } end diff --git a/src/formatters/ansi.cr b/src/formatters/ansi.cr index fc9e608..aaf8864 100644 --- a/src/formatters/ansi.cr +++ b/src/formatters/ansi.cr @@ -17,8 +17,8 @@ module Tartrazine outp.to_s end - def format(text : String, lexer : Lexer, outp : IO) : Nil - tokenizer = Tokenizer.new(lexer, text) + def format(text : String, lexer : BaseLexer, outp : IO) : Nil + tokenizer = lexer.tokenizer(text) i = 0 outp << line_label(i) if line_numbers? tokenizer.each do |token| diff --git a/src/formatters/html.cr b/src/formatters/html.cr index 6fab641..05079b4 100644 --- a/src/formatters/html.cr +++ b/src/formatters/html.cr @@ -40,7 +40,7 @@ module Tartrazine outp.to_s end - def format(text : String, lexer : Lexer, io : IO) : Nil + def format(text : String, lexer : BaseLexer, io : IO) : Nil pre, post = wrap_standalone io << pre if standalone? format_text(text, lexer, io) @@ -64,8 +64,8 @@ module Tartrazine "#{line_label} " end - def format_text(text : String, lexer : Lexer, outp : IO) - tokenizer = Tokenizer.new(lexer, text) + def format_text(text : String, lexer : BaseLexer, outp : IO) + tokenizer = lexer.tokenizer(text) i = 0 if surrounding_pre? pre_style = wrap_long_lines? ? "style=\"white-space: pre-wrap; word-break: break-word;\"" : "" diff --git a/src/formatters/json.cr b/src/formatters/json.cr index 5ba28ea..2669181 100644 --- a/src/formatters/json.cr +++ b/src/formatters/json.cr @@ -4,14 +4,14 @@ module Tartrazine class Json < Formatter property name = "json" - def format(text : String, lexer : Lexer) : String + def format(text : String, lexer : BaseLexer) : String outp = String::Builder.new("") format(text, lexer, outp) outp.to_s end - def format(text : String, lexer : Lexer, io : IO) : Nil - tokenizer = Tokenizer.new(lexer, text) + def format(text : String, lexer : BaseLexer, io : IO) : Nil + tokenizer = lexer.tokenizer(text) io << Tartrazine::Lexer.collapse_tokens(tokenizer.to_a).to_json end end diff --git a/src/lexer.cr b/src/lexer.cr index 30c82e9..e38a261 100644 --- a/src/lexer.cr +++ b/src/lexer.cr @@ -9,29 +9,46 @@ module Tartrazine # Get the lexer object for a language name # FIXME: support mimetypes - def self.lexer(name : String? = nil, filename : String? = nil) : Lexer - if name.nil? && filename.nil? - lexer_file_name = LEXERS_BY_NAME["plaintext"] - elsif name && name != "autodetect" - lexer_file_name = LEXERS_BY_NAME[name.downcase] - else - # Guess by filename - candidates = Set(String).new - LEXERS_BY_FILENAME.each do |k, v| - candidates += v.to_set if File.match?(k, File.basename(filename.to_s)) - end - case candidates.size - when 0 - lexer_file_name = LEXERS_BY_NAME["plaintext"] - when 1 - lexer_file_name = candidates.first - else - raise Exception.new("Multiple lexers match the filename: #{candidates.to_a.join(", ")}") - end - end + def self.lexer(name : String? = nil, filename : String? = nil) : BaseLexer + return lexer_by_name(name) if name && name != "autodetect" + return lexer_by_filename(filename) if filename + + Lexer.from_xml(LexerFiles.get("/#{LEXERS_BY_NAME["plaintext"]}.xml").gets_to_end) + end + + private def self.lexer_by_name(name : String) : BaseLexer + lexer_file_name = LEXERS_BY_NAME.fetch(name.downcase, nil) + return create_delegating_lexer(name) if lexer_file_name.nil? && name.includes? "+" + raise Exception.new("Unknown lexer: #{name}") if lexer_file_name.nil? + Lexer.from_xml(LexerFiles.get("/#{lexer_file_name}.xml").gets_to_end) end + private def self.lexer_by_filename(filename : String) : BaseLexer + candidates = Set(String).new + LEXERS_BY_FILENAME.each do |k, v| + candidates += v.to_set if File.match?(k, File.basename(filename)) + end + + case candidates.size + when 0 + lexer_file_name = LEXERS_BY_NAME["plaintext"] + when 1 + lexer_file_name = candidates.first + else + raise Exception.new("Multiple lexers match the filename: #{candidates.to_a.join(", ")}") + end + + Lexer.from_xml(LexerFiles.get("/#{lexer_file_name}.xml").gets_to_end) + end + + private def self.create_delegating_lexer(name : String) : BaseLexer + language, root = name.split("+", 2) + language_lexer = lexer(language) + root_lexer = lexer(root) + DelegatingLexer.new(language_lexer, root_lexer) + end + # Return a list of all lexers def self.lexers : Array(String) LEXERS_BY_NAME.keys.sort! @@ -40,15 +57,18 @@ module Tartrazine # A token, the output of the tokenizer alias Token = NamedTuple(type: String, value: String) - struct Tokenizer + abstract class BaseTokenizer + end + + class Tokenizer < BaseTokenizer include Iterator(Token) - property lexer : Lexer + property lexer : BaseLexer property text : Bytes property pos : Int32 = 0 @dq = Deque(Token).new property state_stack = ["root"] - def initialize(@lexer : Lexer, text : String, secondary = false) + def initialize(@lexer : BaseLexer, text : String, secondary = false) # Respect the `ensure_nl` config option if text.size > 0 && text[-1] != '\n' && @lexer.config[:ensure_nl] && !secondary text += "\n" @@ -106,13 +126,7 @@ module Tartrazine end end - # This implements a lexer for Pygments RegexLexers as expressed - # in Chroma's XML serialization. - # - # For explanations on what actions and states do - # the Pygments documentation is a good place to start. - # https://pygments.org/docs/lexerdevelopment/ - struct Lexer + abstract class BaseLexer property config = { name: "", priority: 0.0, @@ -123,6 +137,18 @@ module Tartrazine } property states = {} of String => State + def tokenizer(text : String, secondary = false) : BaseTokenizer + Tokenizer.new(self, text, secondary) + end + end + + # This implements a lexer for Pygments RegexLexers as expressed + # in Chroma's XML serialization. + # + # For explanations on what actions and states do + # the Pygments documentation is a good place to start. + # https://pygments.org/docs/lexerdevelopment/ + class Lexer < BaseLexer # Collapse consecutive tokens of the same type for easier comparison # and smaller output def self.collapse_tokens(tokens : Array(Tartrazine::Token)) : Array(Tartrazine::Token) @@ -204,6 +230,60 @@ module Tartrazine end end + # A lexer that takes two lexers as arguments. A root lexer + # and a language lexer. Everything is scalled using the + # language lexer, afterwards all `Other` tokens are lexed + # using the root lexer. + # + # This is useful for things like template languages, where + # you have Jinja + HTML or Jinja + CSS and so on. + class DelegatingLexer < BaseLexer + property language_lexer : BaseLexer + property root_lexer : BaseLexer + + def initialize(@language_lexer : BaseLexer, @root_lexer : BaseLexer) + end + + def tokenizer(text : String, secondary = false) : DelegatingTokenizer + DelegatingTokenizer.new(self, text, secondary) + end + end + + # This Tokenizer works with a DelegatingLexer. It first tokenizes + # using the language lexer, and "Other" tokens are tokenized using + # the root lexer. + class DelegatingTokenizer < BaseTokenizer + include Iterator(Token) + @dq = Deque(Token).new + @language_tokenizer : BaseTokenizer + + def initialize(@lexer : DelegatingLexer, text : String, secondary = false) + # Respect the `ensure_nl` config option + if text.size > 0 && text[-1] != '\n' && @lexer.config[:ensure_nl] && !secondary + text += "\n" + end + @language_tokenizer = @lexer.language_lexer.tokenizer(text, true) + end + + def next : Iterator::Stop | Token + if @dq.size > 0 + return @dq.shift + end + token = @language_tokenizer.next + if token.is_a? Iterator::Stop + return stop + elsif token.as(Token).[:type] == "Other" + root_tokenizer = @lexer.root_lexer.tokenizer(token.as(Token).[:value], true) + root_tokenizer.each do |root_token| + @dq << root_token + end + else + @dq << token.as(Token) + end + self.next + end + end + # A Lexer state. A state has a name and a list of rules. # The state machine has a state stack containing references # to states to decide which rules to apply.