Rst lexer

This commit is contained in:
Roberto Alsina 2024-08-24 19:49:02 -03:00
parent c6cd74e339
commit 38196d6e96
5 changed files with 107 additions and 65 deletions

View File

@ -10,4 +10,6 @@
* ✅ Add --line-numbers to terminal formatter * ✅ Add --line-numbers to terminal formatter
* Implement lexer loader by mime type * Implement lexer loader by mime type
* ✅ Implement Delegating lexers * ✅ Implement Delegating lexers
* Add RstLexer maybe others? * ✅ Add RstLexer
* Add Mako template lexer
* Implement heuristic lexer detection

View File

@ -1,47 +0,0 @@
<lexer>
<config>
<name>Twig</name>
<alias>twig</alias>
<mime_type>application/x-twig</mime_type>
<dot_all>true</dot_all>
</config>
<rules>
<state name="root">
<rule pattern="[^{]+"><token type="Other"/></rule>
<rule pattern="\{\{"><token type="CommentPreproc"/><push state="var"/></rule>
<rule pattern="\{\#.*?\#\}"><token type="Comment"/></rule>
<rule pattern="(\{%)(-?\s*)(raw)(\s*-?)(%\})(.*?)(\{%)(-?\s*)(endraw)(\s*-?)(%\})"><bygroups><token type="CommentPreproc"/><token type="Text"/><token type="Keyword"/><token type="Text"/><token type="CommentPreproc"/><token type="Other"/><token type="CommentPreproc"/><token type="Text"/><token type="Keyword"/><token type="Text"/><token type="CommentPreproc"/></bygroups></rule>
<rule pattern="(\{%)(-?\s*)(verbatim)(\s*-?)(%\})(.*?)(\{%)(-?\s*)(endverbatim)(\s*-?)(%\})"><bygroups><token type="CommentPreproc"/><token type="Text"/><token type="Keyword"/><token type="Text"/><token type="CommentPreproc"/><token type="Other"/><token type="CommentPreproc"/><token type="Text"/><token type="Keyword"/><token type="Text"/><token type="CommentPreproc"/></bygroups></rule>
<rule pattern="(\{%)(-?\s*)(filter)(\s+)((?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w-]|[^\x00-\x7f])*)"><bygroups><token type="CommentPreproc"/><token type="Text"/><token type="Keyword"/><token type="Text"/><token type="NameFunction"/></bygroups><push state="tag"/></rule>
<rule pattern="(\{%)(-?\s*)([a-zA-Z_]\w*)"><bygroups><token type="CommentPreproc"/><token type="Text"/><token type="Keyword"/></bygroups><push state="tag"/></rule>
<rule pattern="\{"><token type="Other"/></rule>
</state>
<state name="varnames">
<rule pattern="(\|)(\s*)((?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w-]|[^\x00-\x7f])*)"><bygroups><token type="Operator"/><token type="Text"/><token type="NameFunction"/></bygroups></rule>
<rule pattern="(is)(\s+)(not)?(\s*)((?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w-]|[^\x00-\x7f])*)"><bygroups><token type="Keyword"/><token type="Text"/><token type="Keyword"/><token type="Text"/><token type="NameFunction"/></bygroups></rule>
<rule pattern="(?i)(true|false|none|null)\b"><token type="KeywordPseudo"/></rule>
<rule pattern="(in|not|and|b-and|or|b-or|b-xor|isif|elseif|else|importconstant|defined|divisibleby|empty|even|iterable|odd|sameasmatches|starts\s+with|ends\s+with)\b"><token type="Keyword"/></rule>
<rule pattern="(loop|block|parent)\b"><token type="NameBuiltin"/></rule>
<rule pattern="(?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w-]|[^\x00-\x7f])*"><token type="NameVariable"/></rule>
<rule pattern="\.(?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w-]|[^\x00-\x7f])*"><token type="NameVariable"/></rule>
<rule pattern="\.[0-9]+"><token type="LiteralNumber"/></rule>
<rule pattern=":?&quot;(\\\\|\\[^\\]|[^&quot;\\])*&quot;"><token type="LiteralStringDouble"/></rule>
<rule pattern=":?&#x27;(\\\\|\\[^\\]|[^&#x27;\\])*&#x27;"><token type="LiteralStringSingle"/></rule>
<rule pattern="([{}()\[\]+\-*/,:~%]|\.\.|\?|:|\*\*|\/\/|!=|[&gt;&lt;=]=?)"><token type="Operator"/></rule>
<rule pattern="[0-9](\.[0-9]*)?(eE[+-][0-9])?[flFLdD]?|0[xX][0-9a-fA-F]+[Ll]?"><token type="LiteralNumber"/></rule>
</state>
<state name="var">
<rule pattern="\s+"><token type="Text"/></rule>
<rule pattern="(-?)(\}\})"><bygroups><token type="Text"/><token type="CommentPreproc"/></bygroups><pop depth="1"/></rule>
<rule><include state="varnames"/></rule>
</state>
<state name="tag">
<rule pattern="\s+"><token type="Text"/></rule>
<rule pattern="(-?)(%\})"><bygroups><token type="Text"/><token type="CommentPreproc"/></bygroups><pop depth="1"/></rule>
<rule><include state="varnames"/></rule>
<rule pattern="."><token type="Punctuation"/></rule>
</state>
</rules>
</lexer>

76
lexers/rst.xml Normal file
View File

@ -0,0 +1,76 @@
<lexer>
<config>
<name>reStructuredText</name>
<alias>restructuredtext</alias>
<alias>rst</alias>
<alias>rest</alias>
<filename>*.rst</filename>
<filename>*.rest</filename>
<mime_type>text/x-rst</mime_type>
<mime_type>text/prs.fallenstein.rst</mime_type>
</config>
<rules>
<state name="root">
<rule pattern="^(=+|-+|`+|:+|\.+|\&#x27;+|&quot;+|~+|\^+|_+|\*+|\++|#+)([ \t]*\n)(.+)(\n)(\1)(\n)"><bygroups><token type="GenericHeading"/><token type="Text"/><token type="GenericHeading"/><token type="Text"/><token type="GenericHeading"/><token type="Text"/></bygroups></rule>
<rule pattern="^(\S.*)(\n)(={3,}|-{3,}|`{3,}|:{3,}|\.{3,}|\&#x27;{3,}|&quot;{3,}|~{3,}|\^{3,}|_{3,}|\*{3,}|\+{3,}|#{3,})(\n)"><bygroups><token type="GenericHeading"/><token type="Text"/><token type="GenericHeading"/><token type="Text"/></bygroups></rule>
<rule pattern="^(\s*)([-*+])( .+\n(?:\1 .+\n)*)"><bygroups><token type="Text"/><token type="LiteralNumber"/><usingself state="inline"/></bygroups></rule>
<rule pattern="^(\s*)([0-9#ivxlcmIVXLCM]+\.)( .+\n(?:\1 .+\n)*)"><bygroups><token type="Text"/><token type="LiteralNumber"/><usingself state="inline"/></bygroups></rule>
<rule pattern="^(\s*)(\(?[0-9#ivxlcmIVXLCM]+\))( .+\n(?:\1 .+\n)*)"><bygroups><token type="Text"/><token type="LiteralNumber"/><usingself state="inline"/></bygroups></rule>
<rule pattern="^(\s*)([A-Z]+\.)( .+\n(?:\1 .+\n)+)"><bygroups><token type="Text"/><token type="LiteralNumber"/><usingself state="inline"/></bygroups></rule>
<rule pattern="^(\s*)(\(?[A-Za-z]+\))( .+\n(?:\1 .+\n)+)"><bygroups><token type="Text"/><token type="LiteralNumber"/><usingself state="inline"/></bygroups></rule>
<rule pattern="^(\s*)(\|)( .+\n(?:\| .+\n)*)"><bygroups><token type="Text"/><token type="Operator"/><usingself state="inline"/></bygroups></rule>
<rule pattern="^( *\.\.)(\s*)((?:source)?code(?:-block)?)(::)([ \t]*)([^\n]+)(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\8.*)?\n)+)">
<bygroups>
<token type="Punctuation"/>
<token type="Text"/>
<token type="OperatorWord"/>
<token type="Punctuation"/>
<token type="Text"/>
<token type="Keyword"/>
<token type="Text"/>
<token type="Text"/>
<UsingByGroup lexer="6" content="9,10,11"/>
</bygroups>
</rule>
<rule pattern="^( *\.\.)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))">
<bygroups>
<token type="Punctuation"/>
<token type="Text"/>
<token type="OperatorWord"/>
<token type="Punctuation"/>
<token type="Text"/>
<usingself state="inline"/>
</bygroups>
</rule>
<rule pattern="^( *\.\.)(\s*)(_(?:[^:\\]|\\.)+:)(.*?)$"><bygroups><token type="Punctuation"/><token type="Text"/><token type="NameTag"/><usingself state="inline"/></bygroups></rule>
<rule pattern="^( *\.\.)(\s*)(\[.+\])(.*?)$"><bygroups><token type="Punctuation"/><token type="Text"/><token type="NameTag"/><usingself state="inline"/></bygroups></rule>
<rule pattern="^( *\.\.)(\s*)(\|.+\|)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))"><bygroups><token type="Punctuation"/><token type="Text"/><token type="NameTag"/><token type="Text"/><token type="OperatorWord"/><token type="Punctuation"/><token type="Text"/><usingself state="inline"/></bygroups></rule>
<rule pattern="^ *\.\..*(\n( +.*\n|\n)+)?"><token type="Comment"/></rule>
<rule pattern="^( *)(:(?:\\\\|\\:|[^:\n])+:(?=\s))([ \t]*)"><bygroups><token type="Text"/><token type="NameClass"/><token type="Text"/></bygroups></rule>
<rule pattern="^(\S.*(?&lt;!::)\n)((?:(?: +.*)\n)+)"><bygroups><usingself state="inline"/><usingself state="inline"/></bygroups></rule>
<rule pattern="(::)(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\3.*)?\n)+)"><bygroups><token type="LiteralStringEscape"/><token type="Text"/><token type="LiteralString"/><token type="LiteralString"/><token type="Text"/><token type="LiteralString"/></bygroups></rule>
<rule><include state="inline"/></rule>
</state>
<state name="inline">
<rule pattern="\\."><token type="Text"/></rule>
<rule pattern="``"><token type="LiteralString"/><push state="literal"/></rule>
<rule pattern="(`.+?)(&lt;.+?&gt;)(`__?)"><bygroups><token type="LiteralString"/><token type="LiteralStringInterpol"/><token type="LiteralString"/></bygroups></rule>
<rule pattern="`.+?`__?"><token type="LiteralString"/></rule>
<rule pattern="(`.+?`)(:[a-zA-Z0-9:-]+?:)?"><bygroups><token type="NameVariable"/><token type="NameAttribute"/></bygroups></rule>
<rule pattern="(:[a-zA-Z0-9:-]+?:)(`.+?`)"><bygroups><token type="NameAttribute"/><token type="NameVariable"/></bygroups></rule>
<rule pattern="\*\*.+?\*\*"><token type="GenericStrong"/></rule>
<rule pattern="\*.+?\*"><token type="GenericEmph"/></rule>
<rule pattern="\[.*?\]_"><token type="LiteralString"/></rule>
<rule pattern="&lt;.+?&gt;"><token type="NameTag"/></rule>
<rule pattern="[^\\\n\[*`:]+"><token type="Text"/></rule>
<rule pattern="."><token type="Text"/></rule>
</state>
<state name="literal">
<rule pattern="[^`]+"><token type="LiteralString"/></rule>
<rule pattern="``((?=$)|(?=[-/:.,; \n\x00 &#x27;&quot;\)\]\}&gt;’”»!\?]))"><token type="LiteralString"/><pop depth="1"/></rule>
<rule pattern="`"><token type="LiteralString"/></rule>
</state>
</rules>
</lexer>

View File

@ -23,7 +23,7 @@ module Tartrazine
struct Action struct Action
property actions : Array(Action) = [] of Action property actions : Array(Action) = [] of Action
@content_index : Int32 = 0 @content_index : Array(Int32) = [] of Int32
@depth : Int32 = 0 @depth : Int32 = 0
@lexer_index : Int32 = 0 @lexer_index : Int32 = 0
@lexer_name : String = "" @lexer_name : String = ""
@ -67,7 +67,7 @@ module Tartrazine
}.map &.content }.map &.content
when ActionType::Usingbygroup when ActionType::Usingbygroup
@lexer_index = xml["lexer"].to_i @lexer_index = xml["lexer"].to_i
@content_index = xml["content"].to_i @content_index = xml["content"].split(",").map(&.to_i)
end end
end end
@ -143,8 +143,12 @@ module Tartrazine
when ActionType::Usingbygroup when ActionType::Usingbygroup
# Shunt to content-specified lexer # Shunt to content-specified lexer
return [] of Token if match.empty? return [] of Token if match.empty?
content = ""
@content_index.each do |i|
content += String.new(match[i].value)
end
Tartrazine.lexer(String.new(match[@lexer_index].value)).tokenizer( Tartrazine.lexer(String.new(match[@lexer_index].value)).tokenizer(
String.new(match[@content_index].value), content,
secondary: true).to_a secondary: true).to_a
else else
raise Exception.new("Unknown action type: #{@type}") raise Exception.new("Unknown action type: #{@type}")

View File

@ -324,10 +324,13 @@ module Tartrazine
"reg" => "reg", "reg" => "reg",
"registry" => "reg", "registry" => "reg",
"rego" => "rego", "rego" => "rego",
"rest" => "rst",
"restructuredtext" => "rst",
"rexx" => "rexx", "rexx" => "rexx",
"rkt" => "racket", "rkt" => "racket",
"rpmspec" => "rpm_spec", "rpmspec" => "rpm_spec",
"rs" => "rust", "rs" => "rust",
"rst" => "rst",
"ruby" => "ruby", "ruby" => "ruby",
"rust" => "rust", "rust" => "rust",
"s" => "r", "s" => "r",
@ -395,7 +398,7 @@ module Tartrazine
"turing" => "turing", "turing" => "turing",
"turtle" => "turtle", "turtle" => "turtle",
"tv" => "tradingview", "tv" => "tradingview",
"twig" => "TwigLexer", "twig" => "twig",
"typescript" => "typescript", "typescript" => "typescript",
"typoscript" => "typoscript", "typoscript" => "typoscript",
"typoscriptcssdata" => "typoscriptcssdata", "typoscriptcssdata" => "typoscriptcssdata",
@ -467,7 +470,7 @@ module Tartrazine
"application/x-fennel" => "fennel", "application/x-fennel" => "fennel",
"application/x-fish" => "fish", "application/x-fish" => "fish",
"application/x-forth" => "forth", "application/x-forth" => "forth",
"application/x-gdscript" => "gdscript3", "application/x-gdscript" => "gdscript",
"application/x-hcl" => "hcl", "application/x-hcl" => "hcl",
"application/x-hy" => "hy", "application/x-hy" => "hy",
"application/x-javascript" => "javascript", "application/x-javascript" => "javascript",
@ -500,7 +503,7 @@ module Tartrazine
"application/x-thrift" => "thrift", "application/x-thrift" => "thrift",
"application/x-troff" => "groff", "application/x-troff" => "groff",
"application/x-turtle" => "turtle", "application/x-turtle" => "turtle",
"application/x-twig" => "TwigLexer", "application/x-twig" => "twig",
"application/x-vue" => "vue", "application/x-vue" => "vue",
"application/x.ucode" => "ucode", "application/x.ucode" => "ucode",
"application/xhtml+xml" => "html", "application/xhtml+xml" => "html",
@ -527,6 +530,7 @@ module Tartrazine
"text/odin" => "odin", "text/odin" => "odin",
"text/org" => "org_mode", "text/org" => "org_mode",
"text/plain" => "plaintext", "text/plain" => "plaintext",
"text/prs.fallenstein.rst" => "rst",
"text/rust" => "rust", "text/rust" => "rust",
"text/s" => "r", "text/s" => "r",
"text/s-plus" => "r", "text/s-plus" => "r",
@ -589,7 +593,7 @@ module Tartrazine
"text/x-fortran" => "fortran", "text/x-fortran" => "fortran",
"text/x-fsharp" => "fsharp", "text/x-fsharp" => "fsharp",
"text/x-gas" => "gas", "text/x-gas" => "gas",
"text/x-gdscript" => "gdscript3", "text/x-gdscript" => "gdscript",
"text/x-gherkin" => "gherkin", "text/x-gherkin" => "gherkin",
"text/x-gleam" => "gleam", "text/x-gleam" => "gleam",
"text/x-glslsrc" => "glsl", "text/x-glslsrc" => "glsl",
@ -657,6 +661,7 @@ module Tartrazine
"text/x-reasonml" => "reasonml", "text/x-reasonml" => "reasonml",
"text/x-rexx" => "rexx", "text/x-rexx" => "rexx",
"text/x-rpm-spec" => "rpm_spec", "text/x-rpm-spec" => "rpm_spec",
"text/x-rst" => "rst",
"text/x-ruby" => "ruby", "text/x-ruby" => "ruby",
"text/x-rust" => "rust", "text/x-rust" => "rust",
"text/x-sas" => "sas", "text/x-sas" => "sas",
@ -725,8 +730,8 @@ module Tartrazine
"*.applescript" => ["applescript"], "*.applescript" => ["applescript"],
"*.aql" => ["arangodb_aql"], "*.aql" => ["arangodb_aql"],
"*.arexx" => ["rexx"], "*.arexx" => ["rexx"],
"*.as" => ["actionscript", "actionscript_3"], "*.as" => ["actionscript_3", "actionscript"],
"*.asm" => ["nasm", "z80_assembly", "tasm"], "*.asm" => ["tasm", "nasm", "z80_assembly"],
"*.au3" => ["autoit"], "*.au3" => ["autoit"],
"*.automount" => ["systemd"], "*.automount" => ["systemd"],
"*.aux" => ["tex"], "*.aux" => ["tex"],
@ -745,7 +750,7 @@ module Tartrazine
"*.bnf" => ["bnf"], "*.bnf" => ["bnf"],
"*.bqn" => ["bqn"], "*.bqn" => ["bqn"],
"*.bzl" => ["python"], "*.bzl" => ["python"],
"*.c" => ["c", "c++"], "*.c" => ["c++", "c"],
"*.c++" => ["c++"], "*.c++" => ["c++"],
"*.capnp" => ["cap_n_proto"], "*.capnp" => ["cap_n_proto"],
"*.cc" => ["c++"], "*.cc" => ["c++"],
@ -844,7 +849,7 @@ module Tartrazine
"*.graphql" => ["graphql"], "*.graphql" => ["graphql"],
"*.graphqls" => ["graphql"], "*.graphqls" => ["graphql"],
"*.groovy" => ["groovy"], "*.groovy" => ["groovy"],
"*.h" => ["objective-c", "c", "c++"], "*.h" => ["c++", "c", "objective-c"],
"*.h++" => ["c++"], "*.h++" => ["c++"],
"*.ha" => ["hare"], "*.ha" => ["hare"],
"*.handlebars" => ["handlebars"], "*.handlebars" => ["handlebars"],
@ -852,7 +857,7 @@ module Tartrazine
"*.hc" => ["holyc"], "*.hc" => ["holyc"],
"*.hc.z" => ["holyc"], "*.hc.z" => ["holyc"],
"*.hcl" => ["hcl"], "*.hcl" => ["hcl"],
"*.hh" => ["holyc", "c++"], "*.hh" => ["c++", "holyc"],
"*.hlb" => ["hlb"], "*.hlb" => ["hlb"],
"*.hlsl" => ["hlsl"], "*.hlsl" => ["hlsl"],
"*.hlsli" => ["hlsl"], "*.hlsli" => ["hlsl"],
@ -867,7 +872,7 @@ module Tartrazine
"*.idc" => ["c"], "*.idc" => ["c"],
"*.idr" => ["idris"], "*.idr" => ["idris"],
"*.ijs" => ["j"], "*.ijs" => ["j"],
"*.inc" => ["php", "sourcepawn", "objectpascal", "povray"], "*.inc" => ["objectpascal", "povray", "php", "sourcepawn"],
"*.inf" => ["ini"], "*.inf" => ["ini"],
"*.ini" => ["ini"], "*.ini" => ["ini"],
"*.ino" => ["arduino"], "*.ino" => ["arduino"],
@ -893,7 +898,7 @@ module Tartrazine
"*.lpk" => ["objectpascal"], "*.lpk" => ["objectpascal"],
"*.lpr" => ["objectpascal"], "*.lpr" => ["objectpascal"],
"*.lua" => ["lua"], "*.lua" => ["lua"],
"*.m" => ["mason", "mathematica", "matlab", "octave", "objective-c"], "*.m" => ["mathematica", "octave", "matlab", "objective-c", "mason"],
"*.ma" => ["mathematica"], "*.ma" => ["mathematica"],
"*.mak" => ["makefile"], "*.mak" => ["makefile"],
"*.man" => ["groff"], "*.man" => ["groff"],
@ -948,7 +953,7 @@ module Tartrazine
"*.php" => ["php"], "*.php" => ["php"],
"*.php[345]" => ["php"], "*.php[345]" => ["php"],
"*.pig" => ["pig"], "*.pig" => ["pig"],
"*.pl" => ["prolog", "perl"], "*.pl" => ["perl", "prolog"],
"*.plc" => ["plutus_core"], "*.plc" => ["plutus_core"],
"*.plot" => ["gnuplot"], "*.plot" => ["gnuplot"],
"*.plt" => ["gnuplot"], "*.plt" => ["gnuplot"],
@ -956,7 +961,7 @@ module Tartrazine
"*.pml" => ["promela"], "*.pml" => ["promela"],
"*.pony" => ["pony"], "*.pony" => ["pony"],
"*.pov" => ["povray"], "*.pov" => ["povray"],
"*.pp" => ["objectpascal", "puppet"], "*.pp" => ["puppet", "objectpascal"],
"*.pq" => ["powerquery"], "*.pq" => ["powerquery"],
"*.pr" => ["promela"], "*.pr" => ["promela"],
"*.prm" => ["promela"], "*.prm" => ["promela"],
@ -992,6 +997,7 @@ module Tartrazine
"*.reg" => ["reg"], "*.reg" => ["reg"],
"*.rego" => ["rego"], "*.rego" => ["rego"],
"*.rei" => ["reasonml"], "*.rei" => ["reasonml"],
"*.rest" => ["rst"],
"*.rex" => ["rexx"], "*.rex" => ["rexx"],
"*.rexx" => ["rexx"], "*.rexx" => ["rexx"],
"*.rkt" => ["racket"], "*.rkt" => ["racket"],
@ -1001,9 +1007,10 @@ module Tartrazine
"*.rs" => ["rust"], "*.rs" => ["rust"],
"*.rs.in" => ["rust"], "*.rs.in" => ["rust"],
"*.rss" => ["xml"], "*.rss" => ["xml"],
"*.rst" => ["rst"],
"*.rvt" => ["tcl"], "*.rvt" => ["tcl"],
"*.rx" => ["rexx"], "*.rx" => ["rexx"],
"*.s" => ["r", "armasm", "gas"], "*.s" => ["armasm", "r", "gas"],
"*.sage" => ["python"], "*.sage" => ["python"],
"*.sas" => ["sas"], "*.sas" => ["sas"],
"*.sass" => ["sass"], "*.sass" => ["sass"],