mirror of
https://github.com/ralsina/tartrazine.git
synced 2024-11-12 22:42:23 +00:00
Merge branch 'more_lexers'
This commit is contained in:
commit
06556877ef
1
.gitignore
vendored
1
.gitignore
vendored
@ -8,3 +8,4 @@ pygments/
|
||||
shard.lock
|
||||
.vscode/
|
||||
.crystal/
|
||||
venv/
|
||||
|
2
TODO.md
2
TODO.md
@ -9,4 +9,4 @@
|
||||
* ✅ Implement lexer loader by file extension
|
||||
* ✅ Add --line-numbers to terminal formatter
|
||||
* Implement lexer loader by mime type
|
||||
* Implement Delegating lexers
|
||||
* ✅ Implement Delegating lexers
|
||||
|
130
lexers/LiquidLexer.xml
Normal file
130
lexers/LiquidLexer.xml
Normal file
@ -0,0 +1,130 @@
|
||||
|
||||
<lexer>
|
||||
<config>
|
||||
<name>liquid</name>
|
||||
<alias>liquid</alias>
|
||||
<filename>*.liquid</filename>
|
||||
</config>
|
||||
<rules>
|
||||
<state name="root">
|
||||
<rule pattern="[^{]+"><token type="Text"/></rule>
|
||||
<rule pattern="(\{%)(\s*)"><bygroups><token type="Punctuation"/><token type="TextWhitespace"/></bygroups><push state="tag-or-block"/></rule>
|
||||
<rule pattern="(\{\{)(\s*)([^\s}]+)"><bygroups><token type="Punctuation"/><token type="TextWhitespace"/><usingself state="generic"/></bygroups><push state="output"/></rule>
|
||||
<rule pattern="\{"><token type="Text"/></rule>
|
||||
</state>
|
||||
<state name="tag-or-block">
|
||||
<rule pattern="(if|unless|elsif|case)(?=\s+)"><token type="KeywordReserved"/><push state="condition"/></rule>
|
||||
<rule pattern="(when)(\s+)"><bygroups><token type="KeywordReserved"/><token type="TextWhitespace"/></bygroups><combined state="end-of-block" state="whitespace" state="generic"/></rule>
|
||||
<rule pattern="(else)(\s*)(%\})"><bygroups><token type="KeywordReserved"/><token type="TextWhitespace"/><token type="Punctuation"/></bygroups><pop depth="1"/></rule>
|
||||
<rule pattern="(capture)(\s+)([^\s%]+)(\s*)(%\})"><bygroups><token type="NameTag"/><token type="TextWhitespace"/><usingself state="variable"/><token type="TextWhitespace"/><token type="Punctuation"/></bygroups><pop depth="1"/></rule>
|
||||
<rule pattern="(comment)(\s*)(%\})"><bygroups><token type="NameTag"/><token type="TextWhitespace"/><token type="Punctuation"/></bygroups><push state="comment"/></rule>
|
||||
<rule pattern="(raw)(\s*)(%\})"><bygroups><token type="NameTag"/><token type="TextWhitespace"/><token type="Punctuation"/></bygroups><push state="raw"/></rule>
|
||||
<rule pattern="(end(case|unless|if))(\s*)(%\})"><bygroups><token type="KeywordReserved"/>None<token type="TextWhitespace"/><token type="Punctuation"/></bygroups><pop depth="1"/></rule>
|
||||
<rule pattern="(end([^\s%]+))(\s*)(%\})"><bygroups><token type="NameTag"/>None<token type="TextWhitespace"/><token type="Punctuation"/></bygroups><pop depth="1"/></rule>
|
||||
<rule pattern="(cycle)(\s+)(?:([^\s:]*)(:))?(\s*)"><bygroups><token type="NameTag"/><token type="TextWhitespace"/><usingself state="generic"/><token type="Punctuation"/><token type="TextWhitespace"/></bygroups><push state="variable-tag-markup"/></rule>
|
||||
<rule pattern="([^\s%]+)(\s*)"><bygroups><token type="NameTag"/><token type="TextWhitespace"/></bygroups><push state="tag-markup"/></rule>
|
||||
</state>
|
||||
<state name="output">
|
||||
<rule><include state="whitespace"/></rule>
|
||||
<rule pattern="\}\}"><token type="Punctuation"/><pop depth="1"/></rule>
|
||||
<rule pattern="\|"><token type="Punctuation"/><push state="filters"/></rule>
|
||||
</state>
|
||||
<state name="filters">
|
||||
<rule><include state="whitespace"/></rule>
|
||||
<rule pattern="\}\}"><token type="Punctuation"/><push state="#pop" state="#pop"/></rule>
|
||||
<rule pattern="([^\s|:]+)(:?)(\s*)"><bygroups><token type="NameFunction"/><token type="Punctuation"/><token type="TextWhitespace"/></bygroups><push state="filter-markup"/></rule>
|
||||
</state>
|
||||
<state name="filter-markup">
|
||||
<rule pattern="\|"><token type="Punctuation"/><pop depth="1"/></rule>
|
||||
<rule><include state="end-of-tag"/></rule>
|
||||
<rule><include state="default-param-markup"/></rule>
|
||||
</state>
|
||||
<state name="condition">
|
||||
<rule><include state="end-of-block"/></rule>
|
||||
<rule><include state="whitespace"/></rule>
|
||||
<rule pattern="([^\s=!><]+)(\s*)([=!><]=?)(\s*)(\S+)(\s*)(%\})"><bygroups><usingself state="generic"/><token type="TextWhitespace"/><token type="Operator"/><token type="TextWhitespace"/><usingself state="generic"/><token type="TextWhitespace"/><token type="Punctuation"/></bygroups></rule>
|
||||
<rule pattern="\b!"><token type="Operator"/></rule>
|
||||
<rule pattern="\bnot\b"><token type="OperatorWord"/></rule>
|
||||
<rule pattern="([\w.\'"]+)(\s+)(contains)(\s+)([\w.\'"]+)"><bygroups><usingself state="generic"/><token type="TextWhitespace"/><token type="OperatorWord"/><token type="TextWhitespace"/><usingself state="generic"/></bygroups></rule>
|
||||
<rule><include state="generic"/></rule>
|
||||
<rule><include state="whitespace"/></rule>
|
||||
</state>
|
||||
<state name="generic-value">
|
||||
<rule><include state="generic"/></rule>
|
||||
<rule><include state="end-at-whitespace"/></rule>
|
||||
</state>
|
||||
<state name="operator">
|
||||
<rule pattern="(\s*)((=|!|>|<)=?)(\s*)"><bygroups><token type="TextWhitespace"/><token type="Operator"/>None<token type="TextWhitespace"/></bygroups><pop depth="1"/></rule>
|
||||
<rule pattern="(\s*)(\bcontains\b)(\s*)"><bygroups><token type="TextWhitespace"/><token type="OperatorWord"/><token type="TextWhitespace"/></bygroups><pop depth="1"/></rule>
|
||||
</state>
|
||||
<state name="end-of-tag">
|
||||
<rule pattern="\}\}"><token type="Punctuation"/><pop depth="1"/></rule>
|
||||
</state>
|
||||
<state name="end-of-block">
|
||||
<rule pattern="%\}"><token type="Punctuation"/><push state="#pop" state="#pop"/></rule>
|
||||
</state>
|
||||
<state name="end-at-whitespace">
|
||||
<rule pattern="\s+"><token type="TextWhitespace"/><pop depth="1"/></rule>
|
||||
</state>
|
||||
<state name="param-markup">
|
||||
<rule><include state="whitespace"/></rule>
|
||||
<rule pattern="([^\s=:]+)(\s*)(=|:)"><bygroups><token type="NameAttribute"/><token type="TextWhitespace"/><token type="Operator"/></bygroups></rule>
|
||||
<rule pattern="(\{\{)(\s*)([^\s}])(\s*)(\}\})"><bygroups><token type="Punctuation"/><token type="TextWhitespace"/><usingself state="variable"/><token type="TextWhitespace"/><token type="Punctuation"/></bygroups></rule>
|
||||
<rule><include state="string"/></rule>
|
||||
<rule><include state="number"/></rule>
|
||||
<rule><include state="keyword"/></rule>
|
||||
<rule pattern=","><token type="Punctuation"/></rule>
|
||||
</state>
|
||||
<state name="default-param-markup">
|
||||
<rule><include state="param-markup"/></rule>
|
||||
<rule pattern="."><token type="Text"/></rule>
|
||||
</state>
|
||||
<state name="variable-param-markup">
|
||||
<rule><include state="param-markup"/></rule>
|
||||
<rule><include state="variable"/></rule>
|
||||
<rule pattern="."><token type="Text"/></rule>
|
||||
</state>
|
||||
<state name="tag-markup">
|
||||
<rule pattern="%\}"><token type="Punctuation"/><push state="#pop" state="#pop"/></rule>
|
||||
<rule><include state="default-param-markup"/></rule>
|
||||
</state>
|
||||
<state name="variable-tag-markup">
|
||||
<rule pattern="%\}"><token type="Punctuation"/><push state="#pop" state="#pop"/></rule>
|
||||
<rule><include state="variable-param-markup"/></rule>
|
||||
</state>
|
||||
<state name="keyword">
|
||||
<rule pattern="\b(false|true)\b"><token type="KeywordConstant"/></rule>
|
||||
</state>
|
||||
<state name="variable">
|
||||
<rule pattern="[a-zA-Z_]\w*"><token type="NameVariable"/></rule>
|
||||
<rule pattern="(?<=\w)\.(?=\w)"><token type="Punctuation"/></rule>
|
||||
</state>
|
||||
<state name="string">
|
||||
<rule pattern="'[^']*'"><token type="LiteralStringSingle"/></rule>
|
||||
<rule pattern=""[^"]*""><token type="LiteralStringDouble"/></rule>
|
||||
</state>
|
||||
<state name="number">
|
||||
<rule pattern="\d+\.\d+"><token type="LiteralNumberFloat"/></rule>
|
||||
<rule pattern="\d+"><token type="LiteralNumberInteger"/></rule>
|
||||
</state>
|
||||
<state name="generic">
|
||||
<rule><include state="keyword"/></rule>
|
||||
<rule><include state="string"/></rule>
|
||||
<rule><include state="number"/></rule>
|
||||
<rule><include state="variable"/></rule>
|
||||
</state>
|
||||
<state name="whitespace">
|
||||
<rule pattern="[ \t]+"><token type="TextWhitespace"/></rule>
|
||||
</state>
|
||||
<state name="comment">
|
||||
<rule pattern="(\{%)(\s*)(endcomment)(\s*)(%\})"><bygroups><token type="Punctuation"/><token type="TextWhitespace"/><token type="NameTag"/><token type="TextWhitespace"/><token type="Punctuation"/></bygroups><push state="#pop" state="#pop"/></rule>
|
||||
<rule pattern="."><token type="Comment"/></rule>
|
||||
</state>
|
||||
<state name="raw">
|
||||
<rule pattern="[^{]+"><token type="Text"/></rule>
|
||||
<rule pattern="(\{%)(\s*)(endraw)(\s*)(%\})"><bygroups><token type="Punctuation"/><token type="TextWhitespace"/><token type="NameTag"/><token type="TextWhitespace"/><token type="Punctuation"/></bygroups><pop depth="1"/></rule>
|
||||
<rule pattern="\{"><token type="Text"/></rule>
|
||||
</state>
|
||||
</rules>
|
||||
</lexer>
|
||||
|
47
lexers/TwigLexer.xml
Normal file
47
lexers/TwigLexer.xml
Normal file
@ -0,0 +1,47 @@
|
||||
|
||||
<lexer>
|
||||
<config>
|
||||
<name>Twig</name>
|
||||
<alias>twig</alias>
|
||||
<mime_type>application/x-twig</mime_type>
|
||||
<dot_all>true</dot_all>
|
||||
</config>
|
||||
<rules>
|
||||
<state name="root">
|
||||
<rule pattern="[^{]+"><token type="Other"/></rule>
|
||||
<rule pattern="\{\{"><token type="CommentPreproc"/><push state="var"/></rule>
|
||||
<rule pattern="\{\#.*?\#\}"><token type="Comment"/></rule>
|
||||
<rule pattern="(\{%)(-?\s*)(raw)(\s*-?)(%\})(.*?)(\{%)(-?\s*)(endraw)(\s*-?)(%\})"><bygroups><token type="CommentPreproc"/><token type="Text"/><token type="Keyword"/><token type="Text"/><token type="CommentPreproc"/><token type="Other"/><token type="CommentPreproc"/><token type="Text"/><token type="Keyword"/><token type="Text"/><token type="CommentPreproc"/></bygroups></rule>
|
||||
<rule pattern="(\{%)(-?\s*)(verbatim)(\s*-?)(%\})(.*?)(\{%)(-?\s*)(endverbatim)(\s*-?)(%\})"><bygroups><token type="CommentPreproc"/><token type="Text"/><token type="Keyword"/><token type="Text"/><token type="CommentPreproc"/><token type="Other"/><token type="CommentPreproc"/><token type="Text"/><token type="Keyword"/><token type="Text"/><token type="CommentPreproc"/></bygroups></rule>
|
||||
<rule pattern="(\{%)(-?\s*)(filter)(\s+)((?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w-]|[^\x00-\x7f])*)"><bygroups><token type="CommentPreproc"/><token type="Text"/><token type="Keyword"/><token type="Text"/><token type="NameFunction"/></bygroups><push state="tag"/></rule>
|
||||
<rule pattern="(\{%)(-?\s*)([a-zA-Z_]\w*)"><bygroups><token type="CommentPreproc"/><token type="Text"/><token type="Keyword"/></bygroups><push state="tag"/></rule>
|
||||
<rule pattern="\{"><token type="Other"/></rule>
|
||||
</state>
|
||||
<state name="varnames">
|
||||
<rule pattern="(\|)(\s*)((?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w-]|[^\x00-\x7f])*)"><bygroups><token type="Operator"/><token type="Text"/><token type="NameFunction"/></bygroups></rule>
|
||||
<rule pattern="(is)(\s+)(not)?(\s*)((?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w-]|[^\x00-\x7f])*)"><bygroups><token type="Keyword"/><token type="Text"/><token type="Keyword"/><token type="Text"/><token type="NameFunction"/></bygroups></rule>
|
||||
<rule pattern="(?i)(true|false|none|null)\b"><token type="KeywordPseudo"/></rule>
|
||||
<rule pattern="(in|not|and|b-and|or|b-or|b-xor|isif|elseif|else|importconstant|defined|divisibleby|empty|even|iterable|odd|sameasmatches|starts\s+with|ends\s+with)\b"><token type="Keyword"/></rule>
|
||||
<rule pattern="(loop|block|parent)\b"><token type="NameBuiltin"/></rule>
|
||||
<rule pattern="(?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w-]|[^\x00-\x7f])*"><token type="NameVariable"/></rule>
|
||||
<rule pattern="\.(?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w-]|[^\x00-\x7f])*"><token type="NameVariable"/></rule>
|
||||
<rule pattern="\.[0-9]+"><token type="LiteralNumber"/></rule>
|
||||
<rule pattern=":?"(\\\\|\\[^\\]|[^"\\])*""><token type="LiteralStringDouble"/></rule>
|
||||
<rule pattern=":?'(\\\\|\\[^\\]|[^'\\])*'"><token type="LiteralStringSingle"/></rule>
|
||||
<rule pattern="([{}()\[\]+\-*/,:~%]|\.\.|\?|:|\*\*|\/\/|!=|[><=]=?)"><token type="Operator"/></rule>
|
||||
<rule pattern="[0-9](\.[0-9]*)?(eE[+-][0-9])?[flFLdD]?|0[xX][0-9a-fA-F]+[Ll]?"><token type="LiteralNumber"/></rule>
|
||||
</state>
|
||||
<state name="var">
|
||||
<rule pattern="\s+"><token type="Text"/></rule>
|
||||
<rule pattern="(-?)(\}\})"><bygroups><token type="Text"/><token type="CommentPreproc"/></bygroups><pop depth="1"/></rule>
|
||||
<rule><include state="varnames"/></rule>
|
||||
</state>
|
||||
<state name="tag">
|
||||
<rule pattern="\s+"><token type="Text"/></rule>
|
||||
<rule pattern="(-?)(%\})"><bygroups><token type="Text"/><token type="CommentPreproc"/></bygroups><pop depth="1"/></rule>
|
||||
<rule><include state="varnames"/></rule>
|
||||
<rule pattern="."><token type="Punctuation"/></rule>
|
||||
</state>
|
||||
</rules>
|
||||
</lexer>
|
||||
|
55
lexers/VelocityLexer.xml
Normal file
55
lexers/VelocityLexer.xml
Normal file
@ -0,0 +1,55 @@
|
||||
|
||||
<lexer>
|
||||
<config>
|
||||
<name>Velocity</name>
|
||||
<alias>velocity</alias>
|
||||
<filename>*.vm</filename>
|
||||
<filename>*.fhtml</filename>
|
||||
<dot_all>true</dot_all>
|
||||
</config>
|
||||
<rules>
|
||||
<state name="root">
|
||||
<rule pattern="[^{#$]+"><token type="Other"/></rule>
|
||||
<rule pattern="(#)(\*.*?\*)(#)"><bygroups><token type="CommentPreproc"/><token type="Comment"/><token type="CommentPreproc"/></bygroups></rule>
|
||||
<rule pattern="(##)(.*?$)"><bygroups><token type="CommentPreproc"/><token type="Comment"/></bygroups></rule>
|
||||
<rule pattern="(#\{?)([a-zA-Z_]\w*)(\}?)(\s?\()"><bygroups><token type="CommentPreproc"/><token type="NameFunction"/><token type="CommentPreproc"/><token type="Punctuation"/></bygroups><push state="directiveparams"/></rule>
|
||||
<rule pattern="(#\{?)([a-zA-Z_]\w*)(\}|\b)"><bygroups><token type="CommentPreproc"/><token type="NameFunction"/><token type="CommentPreproc"/></bygroups></rule>
|
||||
<rule pattern="\$!?\{?"><token type="Punctuation"/><push state="variable"/></rule>
|
||||
</state>
|
||||
<state name="variable">
|
||||
<rule pattern="[a-zA-Z_]\w*"><token type="NameVariable"/></rule>
|
||||
<rule pattern="\("><token type="Punctuation"/><push state="funcparams"/></rule>
|
||||
<rule pattern="(\.)([a-zA-Z_]\w*)"><bygroups><token type="Punctuation"/><token type="NameVariable"/></bygroups><push/></rule>
|
||||
<rule pattern="\}"><token type="Punctuation"/><pop depth="1"/></rule>
|
||||
<rule><pop depth="1"/></rule>
|
||||
</state>
|
||||
<state name="directiveparams">
|
||||
<rule pattern="(&&|\|\||==?|!=?|[-<>+*%&|^/])|\b(eq|ne|gt|lt|ge|le|not|in)\b"><token type="Operator"/></rule>
|
||||
<rule pattern="\["><token type="Operator"/><push state="rangeoperator"/></rule>
|
||||
<rule pattern="\b[a-zA-Z_]\w*\b"><token type="NameFunction"/></rule>
|
||||
<rule><include state="funcparams"/></rule>
|
||||
</state>
|
||||
<state name="rangeoperator">
|
||||
<rule pattern="\.\."><token type="Operator"/></rule>
|
||||
<rule><include state="funcparams"/></rule>
|
||||
<rule pattern="\]"><token type="Operator"/><pop depth="1"/></rule>
|
||||
</state>
|
||||
<state name="funcparams">
|
||||
<rule pattern="\$!?\{?"><token type="Punctuation"/><push state="variable"/></rule>
|
||||
<rule pattern="\s+"><token type="Text"/></rule>
|
||||
<rule pattern="[,:]"><token type="Punctuation"/></rule>
|
||||
<rule pattern=""(\\\\|\\[^\\]|[^"\\])*""><token type="LiteralStringDouble"/></rule>
|
||||
<rule pattern="'(\\\\|\\[^\\]|[^'\\])*'"><token type="LiteralStringSingle"/></rule>
|
||||
<rule pattern="0[xX][0-9a-fA-F]+[Ll]?"><token type="LiteralNumber"/></rule>
|
||||
<rule pattern="\b[0-9]+\b"><token type="LiteralNumber"/></rule>
|
||||
<rule pattern="(true|false|null)\b"><token type="KeywordConstant"/></rule>
|
||||
<rule pattern="\("><token type="Punctuation"/><push/></rule>
|
||||
<rule pattern="\)"><token type="Punctuation"/><pop depth="1"/></rule>
|
||||
<rule pattern="\{"><token type="Punctuation"/><push/></rule>
|
||||
<rule pattern="\}"><token type="Punctuation"/><pop depth="1"/></rule>
|
||||
<rule pattern="\["><token type="Punctuation"/><push/></rule>
|
||||
<rule pattern="\]"><token type="Punctuation"/><pop depth="1"/></rule>
|
||||
</state>
|
||||
</rules>
|
||||
</lexer>
|
||||
|
22
lexers/bbcode.xml
Normal file
22
lexers/bbcode.xml
Normal file
@ -0,0 +1,22 @@
|
||||
|
||||
<lexer>
|
||||
<config>
|
||||
<name>BBCode</name>
|
||||
<alias>bbcode</alias>
|
||||
<mime_type>text/x-bbcode</mime_type>
|
||||
</config>
|
||||
<rules>
|
||||
<state name="root">
|
||||
<rule pattern="[^[]+"><token type="Text"/></rule>
|
||||
<rule pattern="\[/?\w+"><token type="Keyword"/><push state="tag"/></rule>
|
||||
<rule pattern="\["><token type="Text"/></rule>
|
||||
</state>
|
||||
<state name="tag">
|
||||
<rule pattern="\s+"><token type="Text"/></rule>
|
||||
<rule pattern="(\w+)(=)("?[^\s"\]]+"?)"><bygroups><token type="NameAttribute"/><token type="Operator"/><token type="LiteralString"/></bygroups></rule>
|
||||
<rule pattern="(=)("?[^\s"\]]+"?)"><bygroups><token type="Operator"/><token type="LiteralString"/></bygroups></rule>
|
||||
<rule pattern="\]"><token type="Keyword"/><pop depth="1"/></rule>
|
||||
</state>
|
||||
</rules>
|
||||
</lexer>
|
||||
|
56
lexers/markdown.xml
Normal file
56
lexers/markdown.xml
Normal file
@ -0,0 +1,56 @@
|
||||
|
||||
<lexer>
|
||||
<config>
|
||||
<name>Markdown</name>
|
||||
<alias>markdown</alias>
|
||||
<alias>md</alias>
|
||||
<filename>*.md</filename>
|
||||
<filename>*.markdown</filename>
|
||||
<mime_type>text/x-markdown</mime_type>
|
||||
</config>
|
||||
<rules>
|
||||
<state name="root">
|
||||
<rule pattern="(^#[^#].+)(\n)"><bygroups><token type="GenericHeading"/><token type="Text"/></bygroups></rule>
|
||||
<rule pattern="(^#{2,6}[^#].+)(\n)"><bygroups><token type="GenericSubheading"/><token type="Text"/></bygroups></rule>
|
||||
<rule pattern="^(.+)(\n)(=+)(\n)"><bygroups><token type="GenericHeading"/><token type="Text"/><token type="GenericHeading"/><token type="Text"/></bygroups></rule>
|
||||
<rule pattern="^(.+)(\n)(-+)(\n)"><bygroups><token type="GenericSubheading"/><token type="Text"/><token type="GenericSubheading"/><token type="Text"/></bygroups></rule>
|
||||
<rule pattern="^(\s*)([*-] )(\[[ xX]\])( .+\n)"><bygroups><token type="TextWhitespace"/><token type="Keyword"/><token type="Keyword"/><usingself state="inline"/></bygroups></rule>
|
||||
<rule pattern="^(\s*)([*-])(\s)(.+\n)"><bygroups><token type="TextWhitespace"/><token type="Keyword"/><token type="TextWhitespace"/><usingself state="inline"/></bygroups></rule>
|
||||
<rule pattern="^(\s*)([0-9]+\.)( .+\n)"><bygroups><token type="TextWhitespace"/><token type="Keyword"/><usingself state="inline"/></bygroups></rule>
|
||||
<rule pattern="^(\s*>\s)(.+\n)"><bygroups><token type="Keyword"/><token type="GenericEmph"/></bygroups></rule>
|
||||
<rule pattern="^(```\n)([\w\W]*?)(^```$)">
|
||||
<bygroups>
|
||||
<token type="LiteralStringBacktick"/>
|
||||
<token type="Text"/>
|
||||
<token type="LiteralStringBacktick"/>
|
||||
</bygroups>
|
||||
</rule>
|
||||
<rule pattern="^(```)(\w+)(\n)([\w\W]*?)(^```$)">
|
||||
<bygroups>
|
||||
<token type="LiteralStringBacktick"/>
|
||||
<token type="NameLabel"/>
|
||||
<token type="TextWhitespace"/>
|
||||
<UsingByGroup lexer="2" content="4"/>
|
||||
<token type="LiteralStringBacktick"/>
|
||||
</bygroups>
|
||||
</rule>
|
||||
<rule><include state="inline"/></rule>
|
||||
</state>
|
||||
<state name="inline">
|
||||
<rule pattern="\\."><token type="Text"/></rule>
|
||||
<rule pattern="([^`]?)(`[^`\n]+`)"><bygroups><token type="Text"/><token type="LiteralStringBacktick"/></bygroups></rule>
|
||||
<rule pattern="([^\*]?)(\*\*[^* \n][^*\n]*\*\*)"><bygroups><token type="Text"/><token type="GenericStrong"/></bygroups></rule>
|
||||
<rule pattern="([^_]?)(__[^_ \n][^_\n]*__)"><bygroups><token type="Text"/><token type="GenericStrong"/></bygroups></rule>
|
||||
<rule pattern="([^\*]?)(\*[^* \n][^*\n]*\*)"><bygroups><token type="Text"/><token type="GenericEmph"/></bygroups></rule>
|
||||
<rule pattern="([^_]?)(_[^_ \n][^_\n]*_)"><bygroups><token type="Text"/><token type="GenericEmph"/></bygroups></rule>
|
||||
<rule pattern="([^~]?)(~~[^~ \n][^~\n]*~~)"><bygroups><token type="Text"/><token type="GenericDeleted"/></bygroups></rule>
|
||||
<rule pattern="[@#][\w/:]+"><token type="NameEntity"/></rule>
|
||||
<rule pattern="(!?\[)([^]]+)(\])(\()([^)]+)(\))"><bygroups><token type="Text"/><token type="NameTag"/><token type="Text"/><token type="Text"/><token type="NameAttribute"/><token type="Text"/></bygroups></rule>
|
||||
<rule pattern="(\[)([^]]+)(\])(\[)([^]]*)(\])"><bygroups><token type="Text"/><token type="NameTag"/><token type="Text"/><token type="Text"/><token type="NameLabel"/><token type="Text"/></bygroups></rule>
|
||||
<rule pattern="^(\s*\[)([^]]*)(\]:\s*)(.+)"><bygroups><token type="Text"/><token type="NameLabel"/><token type="Text"/><token type="NameAttribute"/></bygroups></rule>
|
||||
<rule pattern="[^\\\s]+"><token type="Text"/></rule>
|
||||
<rule pattern="."><token type="Text"/></rule>
|
||||
</state>
|
||||
</rules>
|
||||
</lexer>
|
||||
|
34
lexers/moinwiki.xml
Normal file
34
lexers/moinwiki.xml
Normal file
@ -0,0 +1,34 @@
|
||||
|
||||
<lexer>
|
||||
<config>
|
||||
<name>MoinMoin/Trac Wiki markup</name>
|
||||
<alias>trac-wiki</alias>
|
||||
<alias>moin</alias>
|
||||
<mime_type>text/x-trac-wiki</mime_type>
|
||||
<case_insensitive>true</case_insensitive>
|
||||
</config>
|
||||
<rules>
|
||||
<state name="root">
|
||||
<rule pattern="^#.*$"><token type="Comment"/></rule>
|
||||
<rule pattern="(!)(\S+)"><bygroups><token type="Keyword"/><token type="Text"/></bygroups></rule>
|
||||
<rule pattern="^(=+)([^=]+)(=+)(\s*#.+)?$"><bygroups><token type="GenericHeading"/><usingself state="root"/><token type="GenericHeading"/><token type="LiteralString"/></bygroups></rule>
|
||||
<rule pattern="(\{\{\{)(\n#!.+)?"><bygroups><token type="NameBuiltin"/><token type="NameNamespace"/></bygroups><push state="codeblock"/></rule>
|
||||
<rule pattern="(\'\'\'?|\|\||`|__|~~|\^|,,|::)"><token type="Comment"/></rule>
|
||||
<rule pattern="^( +)([.*-])( )"><bygroups><token type="Text"/><token type="NameBuiltin"/><token type="Text"/></bygroups></rule>
|
||||
<rule pattern="^( +)([a-z]{1,5}\.)( )"><bygroups><token type="Text"/><token type="NameBuiltin"/><token type="Text"/></bygroups></rule>
|
||||
<rule pattern="\[\[\w+.*?\]\]"><token type="Keyword"/></rule>
|
||||
<rule pattern="(\[[^\s\]]+)(\s+[^\]]+?)?(\])"><bygroups><token type="Keyword"/><token type="LiteralString"/><token type="Keyword"/></bygroups></rule>
|
||||
<rule pattern="^----+$"><token type="Keyword"/></rule>
|
||||
<rule pattern="[^\n\'\[{!_~^,|]+"><token type="Text"/></rule>
|
||||
<rule pattern="\n"><token type="Text"/></rule>
|
||||
<rule pattern="."><token type="Text"/></rule>
|
||||
</state>
|
||||
<state name="codeblock">
|
||||
<rule pattern="\}\}\}"><token type="NameBuiltin"/><pop depth="1"/></rule>
|
||||
<rule pattern="\{\{\{"><token type="Text"/><push/></rule>
|
||||
<rule pattern="[^{}]+"><token type="CommentPreproc"/></rule>
|
||||
<rule pattern="."><token type="CommentPreproc"/></rule>
|
||||
</state>
|
||||
</rules>
|
||||
</lexer>
|
||||
|
@ -40,15 +40,18 @@ for fname in glob.glob("lexers/*.xml"):
|
||||
with open("src/constants/lexers.cr", "w") as f:
|
||||
f.write("module Tartrazine\n")
|
||||
f.write(" LEXERS_BY_NAME = {\n")
|
||||
for k, v in lexer_by_name.items():
|
||||
for k in sorted(lexer_by_name.keys()):
|
||||
v = lexer_by_name[k]
|
||||
f.write(f'"{k}" => "{v}", \n')
|
||||
f.write("}\n")
|
||||
f.write(" LEXERS_BY_MIMETYPE = {\n")
|
||||
for k, v in lexer_by_mimetype.items():
|
||||
for k in sorted(lexer_by_mimetype.keys()):
|
||||
v = lexer_by_mimetype[k]
|
||||
f.write(f'"{k}" => "{v}", \n')
|
||||
f.write("}\n")
|
||||
f.write(" LEXERS_BY_FILENAME = {\n")
|
||||
for k, v in lexer_by_filename.items():
|
||||
for k in sorted(lexer_by_filename.keys()):
|
||||
v = lexer_by_filename[k]
|
||||
f.write(f'"{k}" => {str(list(v)).replace("'", "\"")}, \n')
|
||||
f.write("}\n")
|
||||
f.write("end\n")
|
||||
|
@ -72,8 +72,7 @@ end
|
||||
|
||||
# Helper that creates lexer and tokenizes
|
||||
def tokenize(lexer_name, text)
|
||||
lexer = Tartrazine.lexer(lexer_name)
|
||||
tokenizer = Tartrazine::Tokenizer.new(lexer, text)
|
||||
tokenizer = Tartrazine.lexer(lexer_name).tokenizer(text)
|
||||
Tartrazine::Lexer.collapse_tokens(tokenizer.to_a)
|
||||
end
|
||||
|
||||
|
@ -16,13 +16,16 @@ module Tartrazine
|
||||
Push
|
||||
Token
|
||||
Using
|
||||
Usingbygroup
|
||||
Usingself
|
||||
end
|
||||
|
||||
struct Action
|
||||
property actions : Array(Action) = [] of Action
|
||||
|
||||
@content_index : Int32 = 0
|
||||
@depth : Int32 = 0
|
||||
@lexer_index : Int32 = 0
|
||||
@lexer_name : String = ""
|
||||
@states : Array(String) = [] of String
|
||||
@states_to_push : Array(String) = [] of String
|
||||
@ -62,6 +65,9 @@ module Tartrazine
|
||||
@states = xml.attributes.select { |attrib|
|
||||
attrib.name == "state"
|
||||
}.map &.content
|
||||
when ActionType::Usingbygroup
|
||||
@lexer_index = xml["lexer"].to_i
|
||||
@content_index = xml["content"].to_i
|
||||
end
|
||||
end
|
||||
|
||||
@ -115,15 +121,13 @@ module Tartrazine
|
||||
when ActionType::Using
|
||||
# Shunt to another lexer entirely
|
||||
return [] of Token if match.empty?
|
||||
Tokenizer.new(
|
||||
Tartrazine.lexer(@lexer_name),
|
||||
Tartrazine.lexer(@lexer_name).tokenizer(
|
||||
String.new(match[match_group].value),
|
||||
secondary: true).to_a
|
||||
when ActionType::Usingself
|
||||
# Shunt to another copy of this lexer
|
||||
return [] of Token if match.empty?
|
||||
Tokenizer.new(
|
||||
tokenizer.lexer,
|
||||
tokenizer.lexer.tokenizer(
|
||||
String.new(match[match_group].value),
|
||||
secondary: true).to_a
|
||||
when ActionType::Combined
|
||||
@ -136,6 +140,12 @@ module Tartrazine
|
||||
tokenizer.lexer.states[new_state.name] = new_state
|
||||
tokenizer.state_stack << new_state.name
|
||||
[] of Token
|
||||
when ActionType::Usingbygroup
|
||||
# Shunt to content-specified lexer
|
||||
return [] of Token if match.empty?
|
||||
Tartrazine.lexer(String.new(match[@lexer_index].value)).tokenizer(
|
||||
String.new(match[@content_index].value),
|
||||
secondary: true).to_a
|
||||
else
|
||||
raise Exception.new("Unknown action type: #{@type}")
|
||||
end
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -17,8 +17,8 @@ module Tartrazine
|
||||
outp.to_s
|
||||
end
|
||||
|
||||
def format(text : String, lexer : Lexer, outp : IO) : Nil
|
||||
tokenizer = Tokenizer.new(lexer, text)
|
||||
def format(text : String, lexer : BaseLexer, outp : IO) : Nil
|
||||
tokenizer = lexer.tokenizer(text)
|
||||
i = 0
|
||||
outp << line_label(i) if line_numbers?
|
||||
tokenizer.each do |token|
|
||||
|
@ -40,7 +40,7 @@ module Tartrazine
|
||||
outp.to_s
|
||||
end
|
||||
|
||||
def format(text : String, lexer : Lexer, io : IO) : Nil
|
||||
def format(text : String, lexer : BaseLexer, io : IO) : Nil
|
||||
pre, post = wrap_standalone
|
||||
io << pre if standalone?
|
||||
format_text(text, lexer, io)
|
||||
@ -64,8 +64,8 @@ module Tartrazine
|
||||
"<span #{line_id} #{line_class} style=\"user-select: none;\">#{line_label} </span>"
|
||||
end
|
||||
|
||||
def format_text(text : String, lexer : Lexer, outp : IO)
|
||||
tokenizer = Tokenizer.new(lexer, text)
|
||||
def format_text(text : String, lexer : BaseLexer, outp : IO)
|
||||
tokenizer = lexer.tokenizer(text)
|
||||
i = 0
|
||||
if surrounding_pre?
|
||||
pre_style = wrap_long_lines? ? "style=\"white-space: pre-wrap; word-break: break-word;\"" : ""
|
||||
|
@ -4,14 +4,14 @@ module Tartrazine
|
||||
class Json < Formatter
|
||||
property name = "json"
|
||||
|
||||
def format(text : String, lexer : Lexer) : String
|
||||
def format(text : String, lexer : BaseLexer) : String
|
||||
outp = String::Builder.new("")
|
||||
format(text, lexer, outp)
|
||||
outp.to_s
|
||||
end
|
||||
|
||||
def format(text : String, lexer : Lexer, io : IO) : Nil
|
||||
tokenizer = Tokenizer.new(lexer, text)
|
||||
def format(text : String, lexer : BaseLexer, io : IO) : Nil
|
||||
tokenizer = lexer.tokenizer(text)
|
||||
io << Tartrazine::Lexer.collapse_tokens(tokenizer.to_a).to_json
|
||||
end
|
||||
end
|
||||
|
140
src/lexer.cr
140
src/lexer.cr
@ -9,29 +9,46 @@ module Tartrazine
|
||||
|
||||
# Get the lexer object for a language name
|
||||
# FIXME: support mimetypes
|
||||
def self.lexer(name : String? = nil, filename : String? = nil) : Lexer
|
||||
if name.nil? && filename.nil?
|
||||
lexer_file_name = LEXERS_BY_NAME["plaintext"]
|
||||
elsif name && name != "autodetect"
|
||||
lexer_file_name = LEXERS_BY_NAME[name.downcase]
|
||||
else
|
||||
# Guess by filename
|
||||
candidates = Set(String).new
|
||||
LEXERS_BY_FILENAME.each do |k, v|
|
||||
candidates += v.to_set if File.match?(k, File.basename(filename.to_s))
|
||||
end
|
||||
case candidates.size
|
||||
when 0
|
||||
lexer_file_name = LEXERS_BY_NAME["plaintext"]
|
||||
when 1
|
||||
lexer_file_name = candidates.first
|
||||
else
|
||||
raise Exception.new("Multiple lexers match the filename: #{candidates.to_a.join(", ")}")
|
||||
end
|
||||
end
|
||||
def self.lexer(name : String? = nil, filename : String? = nil) : BaseLexer
|
||||
return lexer_by_name(name) if name && name != "autodetect"
|
||||
return lexer_by_filename(filename) if filename
|
||||
|
||||
Lexer.from_xml(LexerFiles.get("/#{LEXERS_BY_NAME["plaintext"]}.xml").gets_to_end)
|
||||
end
|
||||
|
||||
private def self.lexer_by_name(name : String) : BaseLexer
|
||||
lexer_file_name = LEXERS_BY_NAME.fetch(name.downcase, nil)
|
||||
return create_delegating_lexer(name) if lexer_file_name.nil? && name.includes? "+"
|
||||
raise Exception.new("Unknown lexer: #{name}") if lexer_file_name.nil?
|
||||
|
||||
Lexer.from_xml(LexerFiles.get("/#{lexer_file_name}.xml").gets_to_end)
|
||||
end
|
||||
|
||||
private def self.lexer_by_filename(filename : String) : BaseLexer
|
||||
candidates = Set(String).new
|
||||
LEXERS_BY_FILENAME.each do |k, v|
|
||||
candidates += v.to_set if File.match?(k, File.basename(filename))
|
||||
end
|
||||
|
||||
case candidates.size
|
||||
when 0
|
||||
lexer_file_name = LEXERS_BY_NAME["plaintext"]
|
||||
when 1
|
||||
lexer_file_name = candidates.first
|
||||
else
|
||||
raise Exception.new("Multiple lexers match the filename: #{candidates.to_a.join(", ")}")
|
||||
end
|
||||
|
||||
Lexer.from_xml(LexerFiles.get("/#{lexer_file_name}.xml").gets_to_end)
|
||||
end
|
||||
|
||||
private def self.create_delegating_lexer(name : String) : BaseLexer
|
||||
language, root = name.split("+", 2)
|
||||
language_lexer = lexer(language)
|
||||
root_lexer = lexer(root)
|
||||
DelegatingLexer.new(language_lexer, root_lexer)
|
||||
end
|
||||
|
||||
# Return a list of all lexers
|
||||
def self.lexers : Array(String)
|
||||
LEXERS_BY_NAME.keys.sort!
|
||||
@ -40,15 +57,18 @@ module Tartrazine
|
||||
# A token, the output of the tokenizer
|
||||
alias Token = NamedTuple(type: String, value: String)
|
||||
|
||||
struct Tokenizer
|
||||
abstract class BaseTokenizer
|
||||
end
|
||||
|
||||
class Tokenizer < BaseTokenizer
|
||||
include Iterator(Token)
|
||||
property lexer : Lexer
|
||||
property lexer : BaseLexer
|
||||
property text : Bytes
|
||||
property pos : Int32 = 0
|
||||
@dq = Deque(Token).new
|
||||
property state_stack = ["root"]
|
||||
|
||||
def initialize(@lexer : Lexer, text : String, secondary = false)
|
||||
def initialize(@lexer : BaseLexer, text : String, secondary = false)
|
||||
# Respect the `ensure_nl` config option
|
||||
if text.size > 0 && text[-1] != '\n' && @lexer.config[:ensure_nl] && !secondary
|
||||
text += "\n"
|
||||
@ -106,13 +126,7 @@ module Tartrazine
|
||||
end
|
||||
end
|
||||
|
||||
# This implements a lexer for Pygments RegexLexers as expressed
|
||||
# in Chroma's XML serialization.
|
||||
#
|
||||
# For explanations on what actions and states do
|
||||
# the Pygments documentation is a good place to start.
|
||||
# https://pygments.org/docs/lexerdevelopment/
|
||||
struct Lexer
|
||||
abstract class BaseLexer
|
||||
property config = {
|
||||
name: "",
|
||||
priority: 0.0,
|
||||
@ -123,6 +137,18 @@ module Tartrazine
|
||||
}
|
||||
property states = {} of String => State
|
||||
|
||||
def tokenizer(text : String, secondary = false) : BaseTokenizer
|
||||
Tokenizer.new(self, text, secondary)
|
||||
end
|
||||
end
|
||||
|
||||
# This implements a lexer for Pygments RegexLexers as expressed
|
||||
# in Chroma's XML serialization.
|
||||
#
|
||||
# For explanations on what actions and states do
|
||||
# the Pygments documentation is a good place to start.
|
||||
# https://pygments.org/docs/lexerdevelopment/
|
||||
class Lexer < BaseLexer
|
||||
# Collapse consecutive tokens of the same type for easier comparison
|
||||
# and smaller output
|
||||
def self.collapse_tokens(tokens : Array(Tartrazine::Token)) : Array(Tartrazine::Token)
|
||||
@ -204,6 +230,60 @@ module Tartrazine
|
||||
end
|
||||
end
|
||||
|
||||
# A lexer that takes two lexers as arguments. A root lexer
|
||||
# and a language lexer. Everything is scalled using the
|
||||
# language lexer, afterwards all `Other` tokens are lexed
|
||||
# using the root lexer.
|
||||
#
|
||||
# This is useful for things like template languages, where
|
||||
# you have Jinja + HTML or Jinja + CSS and so on.
|
||||
class DelegatingLexer < BaseLexer
|
||||
property language_lexer : BaseLexer
|
||||
property root_lexer : BaseLexer
|
||||
|
||||
def initialize(@language_lexer : BaseLexer, @root_lexer : BaseLexer)
|
||||
end
|
||||
|
||||
def tokenizer(text : String, secondary = false) : DelegatingTokenizer
|
||||
DelegatingTokenizer.new(self, text, secondary)
|
||||
end
|
||||
end
|
||||
|
||||
# This Tokenizer works with a DelegatingLexer. It first tokenizes
|
||||
# using the language lexer, and "Other" tokens are tokenized using
|
||||
# the root lexer.
|
||||
class DelegatingTokenizer < BaseTokenizer
|
||||
include Iterator(Token)
|
||||
@dq = Deque(Token).new
|
||||
@language_tokenizer : BaseTokenizer
|
||||
|
||||
def initialize(@lexer : DelegatingLexer, text : String, secondary = false)
|
||||
# Respect the `ensure_nl` config option
|
||||
if text.size > 0 && text[-1] != '\n' && @lexer.config[:ensure_nl] && !secondary
|
||||
text += "\n"
|
||||
end
|
||||
@language_tokenizer = @lexer.language_lexer.tokenizer(text, true)
|
||||
end
|
||||
|
||||
def next : Iterator::Stop | Token
|
||||
if @dq.size > 0
|
||||
return @dq.shift
|
||||
end
|
||||
token = @language_tokenizer.next
|
||||
if token.is_a? Iterator::Stop
|
||||
return stop
|
||||
elsif token.as(Token).[:type] == "Other"
|
||||
root_tokenizer = @lexer.root_lexer.tokenizer(token.as(Token).[:value], true)
|
||||
root_tokenizer.each do |root_token|
|
||||
@dq << root_token
|
||||
end
|
||||
else
|
||||
@dq << token.as(Token)
|
||||
end
|
||||
self.next
|
||||
end
|
||||
end
|
||||
|
||||
# A Lexer state. A state has a name and a list of rules.
|
||||
# The state machine has a state stack containing references
|
||||
# to states to decide which rules to apply.
|
||||
|
Loading…
Reference in New Issue
Block a user