feat: use the native crystal highlighter

The chroma highlighter for crystal is not great,
because the pygments one special cases things like
heredocs and that got lost in translation.

Since the crystal compiler comes with a highlighter
why not use it?
This commit is contained in:
Roberto Alsina 2024-09-09 12:22:11 -03:00
parent c011bd8347
commit 84980459cf
24 changed files with 83 additions and 1438 deletions

View File

@ -1,762 +0,0 @@
<lexer>
<config>
<name>Crystal</name>
<alias>cr</alias>
<alias>crystal</alias>
<filename>*.cr</filename>
<mime_type>text/x-crystal</mime_type>
<dot_all>true</dot_all>
</config>
<rules>
<state name="pa-intp-string">
<rule pattern="\\[\(]">
<token type="LiteralStringOther"/>
</rule>
<rule pattern="\(">
<token type="LiteralStringOther"/>
<push/>
</rule>
<rule pattern="\)">
<token type="LiteralStringOther"/>
<pop depth="1"/>
</rule>
<rule>
<include state="string-intp-escaped"/>
</rule>
<rule pattern="[\\#()]">
<token type="LiteralStringOther"/>
</rule>
<rule pattern="[^\\#()]+">
<token type="LiteralStringOther"/>
</rule>
</state>
<state name="ab-regex">
<rule pattern="\\[\\&lt;&gt;]">
<token type="LiteralStringRegex"/>
</rule>
<rule pattern="&lt;">
<token type="LiteralStringRegex"/>
<push/>
</rule>
<rule pattern="&gt;[imsx]*">
<token type="LiteralStringRegex"/>
<pop depth="1"/>
</rule>
<rule>
<include state="string-intp"/>
</rule>
<rule pattern="[\\#&lt;&gt;]">
<token type="LiteralStringRegex"/>
</rule>
<rule pattern="[^\\#&lt;&gt;]+">
<token type="LiteralStringRegex"/>
</rule>
</state>
<state name="cb-regex">
<rule pattern="\\[\\{}]">
<token type="LiteralStringRegex"/>
</rule>
<rule pattern="\{">
<token type="LiteralStringRegex"/>
<push/>
</rule>
<rule pattern="\}[imsx]*">
<token type="LiteralStringRegex"/>
<pop depth="1"/>
</rule>
<rule>
<include state="string-intp"/>
</rule>
<rule pattern="[\\#{}]">
<token type="LiteralStringRegex"/>
</rule>
<rule pattern="[^\\#{}]+">
<token type="LiteralStringRegex"/>
</rule>
</state>
<state name="simple-backtick">
<rule>
<include state="string-intp-escaped"/>
</rule>
<rule pattern="[^\\`#]+">
<token type="LiteralStringBacktick"/>
</rule>
<rule pattern="[\\#]">
<token type="LiteralStringBacktick"/>
</rule>
<rule pattern="`">
<token type="LiteralStringBacktick"/>
<pop depth="1"/>
</rule>
</state>
<state name="string-intp">
<rule pattern="#\{">
<token type="LiteralStringInterpol"/>
<push state="in-intp"/>
</rule>
</state>
<state name="interpolated-regex">
<rule>
<include state="string-intp"/>
</rule>
<rule pattern="[\\#]">
<token type="LiteralStringRegex"/>
</rule>
<rule pattern="[^\\#]+">
<token type="LiteralStringRegex"/>
</rule>
</state>
<state name="cb-string">
<rule pattern="\\[\\{}]">
<token type="LiteralStringOther"/>
</rule>
<rule pattern="\{">
<token type="LiteralStringOther"/>
<push/>
</rule>
<rule pattern="\}">
<token type="LiteralStringOther"/>
<pop depth="1"/>
</rule>
<rule pattern="[\\#{}]">
<token type="LiteralStringOther"/>
</rule>
<rule pattern="[^\\#{}]+">
<token type="LiteralStringOther"/>
</rule>
</state>
<state name="in-macro-control">
<rule pattern="\{%">
<token type="LiteralStringInterpol"/>
<push/>
</rule>
<rule pattern="%\}">
<token type="LiteralStringInterpol"/>
<pop depth="1"/>
</rule>
<rule pattern="for\b|in\b">
<token type="Keyword"/>
</rule>
<rule>
<include state="root"/>
</rule>
</state>
<state name="interpolated-string">
<rule>
<include state="string-intp"/>
</rule>
<rule pattern="[\\#]">
<token type="LiteralStringOther"/>
</rule>
<rule pattern="[^\\#]+">
<token type="LiteralStringOther"/>
</rule>
</state>
<state name="in-macro-expr">
<rule pattern="\{\{">
<token type="LiteralStringInterpol"/>
<push/>
</rule>
<rule pattern="\}\}">
<token type="LiteralStringInterpol"/>
<pop depth="1"/>
</rule>
<rule>
<include state="root"/>
</rule>
</state>
<state name="simple-string">
<rule>
<include state="string-intp-escaped"/>
</rule>
<rule pattern="[^\\&#34;#]+">
<token type="LiteralStringDouble"/>
</rule>
<rule pattern="[\\#]">
<token type="LiteralStringDouble"/>
</rule>
<rule pattern="&#34;">
<token type="LiteralStringDouble"/>
<pop depth="1"/>
</rule>
</state>
<state name="cb-intp-string">
<rule pattern="\\[\{]">
<token type="LiteralStringOther"/>
</rule>
<rule pattern="\{">
<token type="LiteralStringOther"/>
<push/>
</rule>
<rule pattern="\}">
<token type="LiteralStringOther"/>
<pop depth="1"/>
</rule>
<rule>
<include state="string-intp-escaped"/>
</rule>
<rule pattern="[\\#{}]">
<token type="LiteralStringOther"/>
</rule>
<rule pattern="[^\\#{}]+">
<token type="LiteralStringOther"/>
</rule>
</state>
<state name="string-intp-escaped">
<rule>
<include state="string-intp"/>
</rule>
<rule>
<include state="string-escaped"/>
</rule>
</state>
<state name="sb-regex">
<rule pattern="\\[\\\[\]]">
<token type="LiteralStringRegex"/>
</rule>
<rule pattern="\[">
<token type="LiteralStringRegex"/>
<push/>
</rule>
<rule pattern="\][imsx]*">
<token type="LiteralStringRegex"/>
<pop depth="1"/>
</rule>
<rule>
<include state="string-intp"/>
</rule>
<rule pattern="[\\#\[\]]">
<token type="LiteralStringRegex"/>
</rule>
<rule pattern="[^\\#\[\]]+">
<token type="LiteralStringRegex"/>
</rule>
</state>
<state name="classname">
<rule pattern="[A-Z_]\w*">
<token type="NameClass"/>
</rule>
<rule pattern="(\()(\s*)([A-Z_]\w*)(\s*)(\))">
<bygroups>
<token type="Punctuation"/>
<token type="Text"/>
<token type="NameClass"/>
<token type="Text"/>
<token type="Punctuation"/>
</bygroups>
</rule>
<rule>
<pop depth="1"/>
</rule>
</state>
<state name="string-escaped">
<rule pattern="\\([\\befnstv#&#34;\&#39;]|x[a-fA-F0-9]{1,2}|[0-7]{1,3})">
<token type="LiteralStringEscape"/>
</rule>
</state>
<state name="sb-intp-string">
<rule pattern="\\[\[]">
<token type="LiteralStringOther"/>
</rule>
<rule pattern="\[">
<token type="LiteralStringOther"/>
<push/>
</rule>
<rule pattern="\]">
<token type="LiteralStringOther"/>
<pop depth="1"/>
</rule>
<rule>
<include state="string-intp-escaped"/>
</rule>
<rule pattern="[\\#\[\]]">
<token type="LiteralStringOther"/>
</rule>
<rule pattern="[^\\#\[\]]+">
<token type="LiteralStringOther"/>
</rule>
</state>
<state name="pa-regex">
<rule pattern="\\[\\()]">
<token type="LiteralStringRegex"/>
</rule>
<rule pattern="\(">
<token type="LiteralStringRegex"/>
<push/>
</rule>
<rule pattern="\)[imsx]*">
<token type="LiteralStringRegex"/>
<pop depth="1"/>
</rule>
<rule>
<include state="string-intp"/>
</rule>
<rule pattern="[\\#()]">
<token type="LiteralStringRegex"/>
</rule>
<rule pattern="[^\\#()]+">
<token type="LiteralStringRegex"/>
</rule>
</state>
<state name="in-attr">
<rule pattern="\[">
<token type="Operator"/>
<push/>
</rule>
<rule pattern="\]">
<token type="Operator"/>
<pop depth="1"/>
</rule>
<rule>
<include state="root"/>
</rule>
</state>
<state name="ab-intp-string">
<rule pattern="\\[&lt;]">
<token type="LiteralStringOther"/>
</rule>
<rule pattern="&lt;">
<token type="LiteralStringOther"/>
<push/>
</rule>
<rule pattern="&gt;">
<token type="LiteralStringOther"/>
<pop depth="1"/>
</rule>
<rule>
<include state="string-intp-escaped"/>
</rule>
<rule pattern="[\\#&lt;&gt;]">
<token type="LiteralStringOther"/>
</rule>
<rule pattern="[^\\#&lt;&gt;]+">
<token type="LiteralStringOther"/>
</rule>
</state>
<state name="in-intp">
<rule pattern="\{">
<token type="LiteralStringInterpol"/>
<push/>
</rule>
<rule pattern="\}">
<token type="LiteralStringInterpol"/>
<pop depth="1"/>
</rule>
<rule>
<include state="root"/>
</rule>
</state>
<state name="end-part">
<rule pattern=".+">
<token type="CommentPreproc"/>
<pop depth="1"/>
</rule>
</state>
<state name="root">
<rule pattern="#.*?$">
<token type="CommentSingle"/>
</rule>
<rule pattern="(instance_sizeof|pointerof|protected|abstract|require|private|include|unless|typeof|sizeof|return|extend|ensure|rescue|ifdef|super|break|begin|until|while|elsif|yield|next|when|else|then|case|with|end|asm|if|do|as|of)\b">
<token type="Keyword"/>
</rule>
<rule pattern="(false|true|nil)\b">
<token type="KeywordConstant"/>
</rule>
<rule pattern="(module|lib)(\s+)([a-zA-Z_]\w*(?:::[a-zA-Z_]\w*)*)">
<bygroups>
<token type="Keyword"/>
<token type="Text"/>
<token type="NameNamespace"/>
</bygroups>
</rule>
<rule pattern="(def|fun|macro)(\s+)((?:[a-zA-Z_]\w*::)*)">
<bygroups>
<token type="Keyword"/>
<token type="Text"/>
<token type="NameNamespace"/>
</bygroups>
<push state="funcname"/>
</rule>
<rule pattern="def(?=[*%&amp;^`~+-/\[&lt;&gt;=])">
<token type="Keyword"/>
<push state="funcname"/>
</rule>
<rule pattern="(class|struct|union|type|alias|enum)(\s+)((?:[a-zA-Z_]\w*::)*)">
<bygroups>
<token type="Keyword"/>
<token type="Text"/>
<token type="NameNamespace"/>
</bygroups>
<push state="classname"/>
</rule>
<rule pattern="(self|out|uninitialized)\b|(is_a|responds_to)\?">
<token type="KeywordPseudo"/>
</rule>
<rule pattern="(def_equals_and_hash|assert_responds_to|forward_missing_to|def_equals|property|def_hash|parallel|delegate|debugger|getter|record|setter|spawn|pp)\b">
<token type="NameBuiltinPseudo"/>
</rule>
<rule pattern="getter[!?]|property[!?]|__(DIR|FILE|LINE)__\b">
<token type="NameBuiltinPseudo"/>
</rule>
<rule pattern="(?&lt;!\.)(get_stack_top|StaticArray|Concurrent|with_color|Reference|Scheduler|read_line|Exception|at_exit|Pointer|Channel|Float64|sprintf|Float32|Process|Object|Struct|caller|UInt16|UInt32|UInt64|system|future|Number|printf|String|Symbol|Int32|Range|Slice|Regex|Mutex|sleep|Array|Class|raise|Tuple|Deque|delay|Float|Int16|print|abort|Value|UInt8|Int64|puts|Proc|File|Void|exit|fork|Bool|Char|gets|lazy|loop|main|rand|Enum|Int8|Time|Hash|Set|Box|Nil|Dir|Int|p)\b">
<token type="NameBuiltin"/>
</rule>
<rule pattern="(?&lt;!\w)(&lt;&lt;-?)([&#34;`\&#39;]?)([a-zA-Z_]\w*)(\2)(.*?\n)">
<token type="LiteralStringHeredoc"/>
</rule>
<rule pattern="(&lt;&lt;-?)(&#34;|\&#39;)()(\2)(.*?\n)">
<token type="LiteralStringHeredoc"/>
</rule>
<rule pattern="__END__">
<token type="CommentPreproc"/>
<push state="end-part"/>
</rule>
<rule pattern="(?:^|(?&lt;=[=&lt;&gt;~!:])|(?&lt;=(?:\s|;)when\s)|(?&lt;=(?:\s|;)or\s)|(?&lt;=(?:\s|;)and\s)|(?&lt;=\.index\s)|(?&lt;=\.scan\s)|(?&lt;=\.sub\s)|(?&lt;=\.sub!\s)|(?&lt;=\.gsub\s)|(?&lt;=\.gsub!\s)|(?&lt;=\.match\s)|(?&lt;=(?:\s|;)if\s)|(?&lt;=(?:\s|;)elsif\s)|(?&lt;=^when\s)|(?&lt;=^index\s)|(?&lt;=^scan\s)|(?&lt;=^sub\s)|(?&lt;=^gsub\s)|(?&lt;=^sub!\s)|(?&lt;=^gsub!\s)|(?&lt;=^match\s)|(?&lt;=^if\s)|(?&lt;=^elsif\s))(\s*)(/)">
<bygroups>
<token type="Text"/>
<token type="LiteralStringRegex"/>
</bygroups>
<push state="multiline-regex"/>
</rule>
<rule pattern="(?&lt;=\(|,|\[)/">
<token type="LiteralStringRegex"/>
<push state="multiline-regex"/>
</rule>
<rule pattern="(\s+)(/)(?![\s=])">
<bygroups>
<token type="Text"/>
<token type="LiteralStringRegex"/>
</bygroups>
<push state="multiline-regex"/>
</rule>
<rule pattern="(0o[0-7]+(?:_[0-7]+)*(?:_?[iu][0-9]+)?)\b(\s*)([/?])?">
<bygroups>
<token type="LiteralNumberOct"/>
<token type="Text"/>
<token type="Operator"/>
</bygroups>
</rule>
<rule pattern="(0x[0-9A-Fa-f]+(?:_[0-9A-Fa-f]+)*(?:_?[iu][0-9]+)?)\b(\s*)([/?])?">
<bygroups>
<token type="LiteralNumberHex"/>
<token type="Text"/>
<token type="Operator"/>
</bygroups>
</rule>
<rule pattern="(0b[01]+(?:_[01]+)*(?:_?[iu][0-9]+)?)\b(\s*)([/?])?">
<bygroups>
<token type="LiteralNumberBin"/>
<token type="Text"/>
<token type="Operator"/>
</bygroups>
</rule>
<rule pattern="((?:0(?![0-9])|[1-9][\d_]*)(?:\.\d[\d_]*)(?:e[+-]?[0-9]+)?(?:_?f[0-9]+)?)(\s*)([/?])?">
<bygroups>
<token type="LiteralNumberFloat"/>
<token type="Text"/>
<token type="Operator"/>
</bygroups>
</rule>
<rule pattern="((?:0(?![0-9])|[1-9][\d_]*)(?:\.\d[\d_]*)?(?:e[+-]?[0-9]+)(?:_?f[0-9]+)?)(\s*)([/?])?">
<bygroups>
<token type="LiteralNumberFloat"/>
<token type="Text"/>
<token type="Operator"/>
</bygroups>
</rule>
<rule pattern="((?:0(?![0-9])|[1-9][\d_]*)(?:\.\d[\d_]*)?(?:e[+-]?[0-9]+)?(?:_?f[0-9]+))(\s*)([/?])?">
<bygroups>
<token type="LiteralNumberFloat"/>
<token type="Text"/>
<token type="Operator"/>
</bygroups>
</rule>
<rule pattern="(0\b|[1-9][\d]*(?:_\d+)*(?:_?[iu][0-9]+)?)\b(\s*)([/?])?">
<bygroups>
<token type="LiteralNumberInteger"/>
<token type="Text"/>
<token type="Operator"/>
</bygroups>
</rule>
<rule pattern="@@[a-zA-Z_]\w*">
<token type="NameVariableClass"/>
</rule>
<rule pattern="@[a-zA-Z_]\w*">
<token type="NameVariableInstance"/>
</rule>
<rule pattern="\$\w+">
<token type="NameVariableGlobal"/>
</rule>
<rule pattern="\$[!@&amp;`\&#39;+~=/\\,;.&lt;&gt;_*$?:&#34;^-]">
<token type="NameVariableGlobal"/>
</rule>
<rule pattern="\$-[0adFiIlpvw]">
<token type="NameVariableGlobal"/>
</rule>
<rule pattern="::">
<token type="Operator"/>
</rule>
<rule>
<include state="strings"/>
</rule>
<rule pattern="\?(\\[MC]-)*(\\([\\befnrtv#&#34;\&#39;]|x[a-fA-F0-9]{1,2}|[0-7]{1,3})|\S)(?!\w)">
<token type="LiteralStringChar"/>
</rule>
<rule pattern="[A-Z][A-Z_]+\b">
<token type="NameConstant"/>
</rule>
<rule pattern="\{%">
<token type="LiteralStringInterpol"/>
<push state="in-macro-control"/>
</rule>
<rule pattern="\{\{">
<token type="LiteralStringInterpol"/>
<push state="in-macro-expr"/>
</rule>
<rule pattern="(@\[)(\s*)([A-Z]\w*)">
<bygroups>
<token type="Operator"/>
<token type="Text"/>
<token type="NameDecorator"/>
</bygroups>
<push state="in-attr"/>
</rule>
<rule pattern="(\.|::)(\[\]\?|&lt;=&gt;|===|\[\]=|&gt;&gt;|&amp;&amp;|\*\*|\[\]|\|\||&gt;=|=~|!~|&lt;&lt;|&lt;=|!=|==|&lt;|/|=|-|\+|&gt;|\*|&amp;|%|\^|!|\||~)">
<bygroups>
<token type="Operator"/>
<token type="NameOperator"/>
</bygroups>
</rule>
<rule pattern="(\.|::)([a-zA-Z_]\w*[!?]?|[*%&amp;^`~+\-/\[&lt;&gt;=])">
<bygroups>
<token type="Operator"/>
<token type="Name"/>
</bygroups>
</rule>
<rule pattern="[a-zA-Z_]\w*(?:[!?](?!=))?">
<token type="Name"/>
</rule>
<rule pattern="(\[|\]\??|\*\*|&lt;=&gt;?|&gt;=|&lt;&lt;?|&gt;&gt;?|=~|===|!~|&amp;&amp;?|\|\||\.{1,3})">
<token type="Operator"/>
</rule>
<rule pattern="[-+/*%=&lt;&gt;&amp;!^|~]=?">
<token type="Operator"/>
</rule>
<rule pattern="[(){};,/?:\\]">
<token type="Punctuation"/>
</rule>
<rule pattern="\s+">
<token type="Text"/>
</rule>
</state>
<state name="multiline-regex">
<rule>
<include state="string-intp"/>
</rule>
<rule pattern="\\\\">
<token type="LiteralStringRegex"/>
</rule>
<rule pattern="\\/">
<token type="LiteralStringRegex"/>
</rule>
<rule pattern="[\\#]">
<token type="LiteralStringRegex"/>
</rule>
<rule pattern="[^\\/#]+">
<token type="LiteralStringRegex"/>
</rule>
<rule pattern="/[imsx]*">
<token type="LiteralStringRegex"/>
<pop depth="1"/>
</rule>
</state>
<state name="ab-string">
<rule pattern="\\[\\&lt;&gt;]">
<token type="LiteralStringOther"/>
</rule>
<rule pattern="&lt;">
<token type="LiteralStringOther"/>
<push/>
</rule>
<rule pattern="&gt;">
<token type="LiteralStringOther"/>
<pop depth="1"/>
</rule>
<rule pattern="[\\#&lt;&gt;]">
<token type="LiteralStringOther"/>
</rule>
<rule pattern="[^\\#&lt;&gt;]+">
<token type="LiteralStringOther"/>
</rule>
</state>
<state name="pa-string">
<rule pattern="\\[\\()]">
<token type="LiteralStringOther"/>
</rule>
<rule pattern="\(">
<token type="LiteralStringOther"/>
<push/>
</rule>
<rule pattern="\)">
<token type="LiteralStringOther"/>
<pop depth="1"/>
</rule>
<rule pattern="[\\#()]">
<token type="LiteralStringOther"/>
</rule>
<rule pattern="[^\\#()]+">
<token type="LiteralStringOther"/>
</rule>
</state>
<state name="strings">
<rule pattern="\:@{0,2}[a-zA-Z_]\w*[!?]?">
<token type="LiteralStringSymbol"/>
</rule>
<rule pattern="\:@{0,2}(\[\]\?|&lt;=&gt;|===|\[\]=|&gt;&gt;|&amp;&amp;|\*\*|\[\]|\|\||&gt;=|=~|!~|&lt;&lt;|&lt;=|!=|==|&lt;|/|=|-|\+|&gt;|\*|&amp;|%|\^|!|\||~)">
<token type="LiteralStringSymbol"/>
</rule>
<rule pattern=":&#39;(\\\\|\\&#39;|[^&#39;])*&#39;">
<token type="LiteralStringSymbol"/>
</rule>
<rule pattern="&#39;(\\\\|\\&#39;|[^&#39;]|\\[^&#39;\\]+)&#39;">
<token type="LiteralStringChar"/>
</rule>
<rule pattern=":&#34;">
<token type="LiteralStringSymbol"/>
<push state="simple-sym"/>
</rule>
<rule pattern="([a-zA-Z_]\w*)(:)(?!:)">
<bygroups>
<token type="LiteralStringSymbol"/>
<token type="Punctuation"/>
</bygroups>
</rule>
<rule pattern="&#34;">
<token type="LiteralStringDouble"/>
<push state="simple-string"/>
</rule>
<rule pattern="(?&lt;!\.)`">
<token type="LiteralStringBacktick"/>
<push state="simple-backtick"/>
</rule>
<rule pattern="%\{">
<token type="LiteralStringOther"/>
<push state="cb-intp-string"/>
</rule>
<rule pattern="%[wi]\{">
<token type="LiteralStringOther"/>
<push state="cb-string"/>
</rule>
<rule pattern="%r\{">
<token type="LiteralStringRegex"/>
<push state="cb-regex"/>
</rule>
<rule pattern="%\[">
<token type="LiteralStringOther"/>
<push state="sb-intp-string"/>
</rule>
<rule pattern="%[wi]\[">
<token type="LiteralStringOther"/>
<push state="sb-string"/>
</rule>
<rule pattern="%r\[">
<token type="LiteralStringRegex"/>
<push state="sb-regex"/>
</rule>
<rule pattern="%\(">
<token type="LiteralStringOther"/>
<push state="pa-intp-string"/>
</rule>
<rule pattern="%[wi]\(">
<token type="LiteralStringOther"/>
<push state="pa-string"/>
</rule>
<rule pattern="%r\(">
<token type="LiteralStringRegex"/>
<push state="pa-regex"/>
</rule>
<rule pattern="%&lt;">
<token type="LiteralStringOther"/>
<push state="ab-intp-string"/>
</rule>
<rule pattern="%[wi]&lt;">
<token type="LiteralStringOther"/>
<push state="ab-string"/>
</rule>
<rule pattern="%r&lt;">
<token type="LiteralStringRegex"/>
<push state="ab-regex"/>
</rule>
<rule pattern="(%r([\W_]))((?:\\\2|(?!\2).)*)(\2[imsx]*)">
<token type="LiteralString"/>
</rule>
<rule pattern="(%[wi]([\W_]))((?:\\\2|(?!\2).)*)(\2)">
<token type="LiteralString"/>
</rule>
<rule pattern="(?&lt;=[-+/*%=&lt;&gt;&amp;!^|~,(])(\s*)(%([\t ])(?:(?:\\\3|(?!\3).)*)\3)">
<bygroups>
<token type="Text"/>
<token type="LiteralStringOther"/>
<token type="None"/>
</bygroups>
</rule>
<rule pattern="^(\s*)(%([\t ])(?:(?:\\\3|(?!\3).)*)\3)">
<bygroups>
<token type="Text"/>
<token type="LiteralStringOther"/>
<token type="None"/>
</bygroups>
</rule>
<rule pattern="(%([\[{(&lt;]))((?:\\\2|(?!\2).)*)(\2)">
<token type="LiteralString"/>
</rule>
</state>
<state name="sb-string">
<rule pattern="\\[\\\[\]]">
<token type="LiteralStringOther"/>
</rule>
<rule pattern="\[">
<token type="LiteralStringOther"/>
<push/>
</rule>
<rule pattern="\]">
<token type="LiteralStringOther"/>
<pop depth="1"/>
</rule>
<rule pattern="[\\#\[\]]">
<token type="LiteralStringOther"/>
</rule>
<rule pattern="[^\\#\[\]]+">
<token type="LiteralStringOther"/>
</rule>
</state>
<state name="funcname">
<rule pattern="(?:([a-zA-Z_]\w*)(\.))?([a-zA-Z_]\w*[!?]?|\*\*?|[-+]@?|[/%&amp;|^`~]|\[\]=?|&lt;&lt;|&gt;&gt;|&lt;=?&gt;|&gt;=?|===?)">
<bygroups>
<token type="NameClass"/>
<token type="Operator"/>
<token type="NameFunction"/>
</bygroups>
<pop depth="1"/>
</rule>
<rule>
<pop depth="1"/>
</rule>
</state>
<state name="simple-sym">
<rule>
<include state="string-escaped"/>
</rule>
<rule pattern="[^\\&#34;#]+">
<token type="LiteralStringSymbol"/>
</rule>
<rule pattern="[\\#]">
<token type="LiteralStringSymbol"/>
</rule>
<rule pattern="&#34;">
<token type="LiteralStringSymbol"/>
<pop depth="1"/>
</rule>
</state>
</rules>
</lexer>

View File

@ -1,16 +0,0 @@
---input---
@[FOO::Bar::Baz(opt: "xx")]
---tokens---
'@[' Operator
'FOO::Bar::Baz' Name.Decorator
'(' Punctuation
'opt' Literal.String.Symbol
':' Punctuation
' ' Text.Whitespace
'"' Literal.String.Double
'xx' Literal.String.Double
'"' Literal.String.Double
')' Punctuation
']' Operator
'\n' Text.Whitespace

View File

@ -1,11 +0,0 @@
---input---
[5][5]?
---tokens---
'[' Operator
'5' Literal.Number.Integer
']' Operator
'[' Operator
'5' Literal.Number.Integer
']?' Operator
'\n' Text.Whitespace

View File

@ -1,25 +0,0 @@
---input---
'a'
'я'
'\u{1234}'
'
'
'abc'
---tokens---
"'a'" Literal.String.Char
'\n' Text.Whitespace
"'я'" Literal.String.Char
'\n' Text.Whitespace
"'\\u{1234}'" Literal.String.Char
'\n' Text.Whitespace
"'\n'" Literal.String.Char
'\n' Text.Whitespace
"'" Error
'abc' Name
"'" Error
'\n' Text.Whitespace

View File

@ -1,14 +0,0 @@
---input---
HTTP
HTTP::Server.new
---tokens---
'HTTP' Name.Constant
'\n' Text.Whitespace
'HTTP' Name
'::' Operator
'Server' Name
'.' Operator
'new' Name
'\n' Text.Whitespace

View File

@ -1,27 +0,0 @@
---input---
%()
%[]
%{}
%<>
%||
---tokens---
'%(' Literal.String.Other
')' Literal.String.Other
'\n' Text.Whitespace
'%[' Literal.String.Other
']' Literal.String.Other
'\n' Text.Whitespace
'%{' Literal.String.Other
'}' Literal.String.Other
'\n' Text.Whitespace
'%<' Literal.String.Other
'>' Literal.String.Other
'\n' Text.Whitespace
'%|' Literal.String.Other
'|' Literal.String.Other
'\n' Text.Whitespace

View File

@ -1,19 +0,0 @@
---input---
str.gsub(%r{\\\\}, "/")
---tokens---
'str' Name
'.' Operator
'gsub' Name
'(' Punctuation
'%r{' Literal.String.Regex
'\\\\' Literal.String.Regex
'\\\\' Literal.String.Regex
'}' Literal.String.Regex
',' Punctuation
' ' Text.Whitespace
'"' Literal.String.Double
'/' Literal.String.Double
'"' Literal.String.Double
')' Punctuation
'\n' Text.Whitespace

View File

@ -1,9 +0,0 @@
---input---
"\#{a + b}"
---tokens---
'"' Literal.String.Double
'\\#' Literal.String.Escape
'{a + b}' Literal.String.Double
'"' Literal.String.Double
'\n' Text.Whitespace

View File

@ -1,56 +0,0 @@
---input---
"A#{ (3..5).group_by { |x| x/2}.map do |k,v| "#{k}" end.join }" + "Z"
---tokens---
'"' Literal.String.Double
'A' Literal.String.Double
'#{' Literal.String.Interpol
' ' Text.Whitespace
'(' Punctuation
'3' Literal.Number.Integer
'..' Operator
'5' Literal.Number.Integer
')' Punctuation
'.' Operator
'group_by' Name
' ' Text.Whitespace
'{' Literal.String.Interpol
' ' Text.Whitespace
'|' Operator
'x' Name
'|' Operator
' ' Text.Whitespace
'x' Name
'/' Operator
'2' Literal.Number.Integer
'}' Literal.String.Interpol
'.' Operator
'map' Name
' ' Text.Whitespace
'do' Keyword
' ' Text.Whitespace
'|' Operator
'k' Name
',' Punctuation
'v' Name
'|' Operator
' ' Text.Whitespace
'"' Literal.String.Double
'#{' Literal.String.Interpol
'k' Name
'}' Literal.String.Interpol
'"' Literal.String.Double
' ' Text.Whitespace
'end' Keyword
'.' Operator
'join' Name
' ' Text.Whitespace
'}' Literal.String.Interpol
'"' Literal.String.Double
' ' Text.Whitespace
'+' Operator
' ' Text.Whitespace
'"' Literal.String.Double
'Z' Literal.String.Double
'"' Literal.String.Double
'\n' Text.Whitespace

View File

@ -1,58 +0,0 @@
---input---
@[Link("some")]
lib LibSome
@[CallConvention("X86_StdCall")]
fun foo="some.foo"(thing : Void*) : LibC::Int
end
---tokens---
'@[' Operator
'Link' Name.Decorator
'(' Punctuation
'"' Literal.String.Double
'some' Literal.String.Double
'"' Literal.String.Double
')' Punctuation
']' Operator
'\n' Text.Whitespace
'lib' Keyword
' ' Text.Whitespace
'LibSome' Name.Namespace
'\n' Text.Whitespace
'@[' Operator
'CallConvention' Name.Decorator
'(' Punctuation
'"' Literal.String.Double
'X86_StdCall' Literal.String.Double
'"' Literal.String.Double
')' Punctuation
']' Operator
'\n' Text.Whitespace
'fun' Keyword
' ' Text.Whitespace
'foo' Name.Function
'=' Operator
'"' Literal.String.Double
'some.foo' Literal.String.Double
'"' Literal.String.Double
'(' Punctuation
'thing' Name
' ' Text.Whitespace
':' Punctuation
' ' Text.Whitespace
'Void' Name
'*' Operator
')' Punctuation
' ' Text.Whitespace
':' Punctuation
' ' Text.Whitespace
'LibC' Name
'::' Operator
'Int' Name
'\n' Text.Whitespace
'end' Keyword
'\n' Text.Whitespace

View File

@ -1,76 +0,0 @@
---input---
def<=>(other : self) : Int
{%for field in %w(first_name middle_name last_name)%}
cmp={{field.id}}<=>other.{{field.id}}
return cmp if cmp!=0
{%end%}
0
end
---tokens---
'def' Keyword
'<=>' Name.Function
'(' Punctuation
'other' Name
' ' Text.Whitespace
':' Punctuation
' ' Text.Whitespace
'self' Keyword
')' Punctuation
' ' Text.Whitespace
':' Punctuation
' ' Text.Whitespace
'Int' Name
'\n' Text.Whitespace
'{%' Literal.String.Interpol
'for' Keyword
' ' Text.Whitespace
'field' Name
' ' Text.Whitespace
'in' Keyword
' ' Text.Whitespace
'%w(' Literal.String.Other
'first_name middle_name last_name' Literal.String.Other
')' Literal.String.Other
'%}' Literal.String.Interpol
'\n' Text.Whitespace
'cmp' Name
'=' Operator
'{{' Literal.String.Interpol
'field' Name
'.' Operator
'id' Name
'}}' Literal.String.Interpol
'<=>' Operator
'other' Name
'.' Operator
'{{' Literal.String.Interpol
'field' Name
'.' Operator
'id' Name
'}}' Literal.String.Interpol
'\n' Text.Whitespace
'return' Keyword
' ' Text.Whitespace
'cmp' Name
' ' Text.Whitespace
'if' Keyword
' ' Text.Whitespace
'cmp' Name
'!=' Operator
'0' Literal.Number.Integer
'\n' Text.Whitespace
'{%' Literal.String.Interpol
'end' Keyword
'%}' Literal.String.Interpol
'\n' Text.Whitespace
'0' Literal.Number.Integer
'\n' Text.Whitespace
'end' Keyword
'\n' Text.Whitespace

View File

@ -1,84 +0,0 @@
---input---
# Integers
0
1
1_000_000
1u8
11231231231121312i64
# Floats
0.0
1.0_f32
1_f32
0f64
1e+4
1e111
1_234.567_890
# Error
01
0b2
0x129g2
0o12358
---tokens---
'# Integers' Comment.Single
'\n' Text.Whitespace
'0' Literal.Number.Integer
'\n' Text.Whitespace
'1' Literal.Number.Integer
'\n' Text.Whitespace
'1_000_000' Literal.Number.Integer
'\n' Text.Whitespace
'1u8' Literal.Number.Integer
'\n' Text.Whitespace
'11231231231121312i64' Literal.Number.Integer
'\n\n' Text.Whitespace
'# Floats' Comment.Single
'\n' Text.Whitespace
'0.0' Literal.Number.Float
'\n' Text.Whitespace
'1.0_f32' Literal.Number.Float
'\n' Text.Whitespace
'1_f32' Literal.Number.Float
'\n' Text.Whitespace
'0f64' Literal.Number.Float
'\n' Text.Whitespace
'1e+4' Literal.Number.Float
'\n' Text.Whitespace
'1e111' Literal.Number.Float
'\n' Text.Whitespace
'1_234.567_890' Literal.Number.Float
'\n\n' Text.Whitespace
'# Error' Comment.Single
'\n' Text.Whitespace
'0' Error
'1' Literal.Number.Integer
'\n' Text.Whitespace
'0' Error
'b2' Name
'\n' Text.Whitespace
'0' Error
'x129g2' Name
'\n' Text.Whitespace
'0' Error
'o12358' Name
'\n' Text.Whitespace

View File

@ -1,18 +0,0 @@
---input---
([] of Int32).[]?(5)
---tokens---
'(' Punctuation
'[' Operator
']' Operator
' ' Text.Whitespace
'of' Keyword
' ' Text.Whitespace
'Int32' Name
')' Punctuation
'.' Operator
'[]?' Name.Operator
'(' Punctuation
'5' Literal.Number.Integer
')' Punctuation
'\n' Text.Whitespace

View File

@ -1,41 +0,0 @@
---input---
%(hello ("world"))
%[hello ["world"]]
%{hello "world"}
%<hello <"world">>
%|hello "world"|
---tokens---
'%(' Literal.String.Other
'hello ' Literal.String.Other
'(' Literal.String.Other
'"world"' Literal.String.Other
')' Literal.String.Other
')' Literal.String.Other
'\n' Text.Whitespace
'%[' Literal.String.Other
'hello ' Literal.String.Other
'[' Literal.String.Other
'"world"' Literal.String.Other
']' Literal.String.Other
']' Literal.String.Other
'\n' Text.Whitespace
'%{' Literal.String.Other
'hello "world"' Literal.String.Other
'}' Literal.String.Other
'\n' Text.Whitespace
'%<' Literal.String.Other
'hello ' Literal.String.Other
'<' Literal.String.Other
'"world"' Literal.String.Other
'>' Literal.String.Other
'>' Literal.String.Other
'\n' Text.Whitespace
'%|' Literal.String.Other
'hello "world"' Literal.String.Other
'|' Literal.String.Other
'\n' Text.Whitespace

View File

@ -1,31 +0,0 @@
---input---
%Q(hello \n #{name})
%q(hello \n #{name})
%w(foo\nbar baz)
---tokens---
'%Q(' Literal.String.Other
'hello ' Literal.String.Other
'\\n' Literal.String.Escape
' ' Literal.String.Other
'#{' Literal.String.Interpol
'name' Name
'}' Literal.String.Interpol
')' Literal.String.Other
'\n' Text.Whitespace
'%q(' Literal.String.Other
'hello ' Literal.String.Other
'\\' Literal.String.Other
'n ' Literal.String.Other
'#' Literal.String.Other
'{name}' Literal.String.Other
')' Literal.String.Other
'\n' Text.Whitespace
'%w(' Literal.String.Other
'foo' Literal.String.Other
'\\' Literal.String.Other
'nbar baz' Literal.String.Other
')' Literal.String.Other
'\n' Text.Whitespace

View File

@ -1,20 +0,0 @@
---input---
record Cls do
def_equals s
end
---tokens---
'record' Name.Builtin.Pseudo
' ' Text.Whitespace
'Cls' Name
' ' Text.Whitespace
'do' Keyword
'\n' Text.Whitespace
'def_equals' Name.Builtin.Pseudo
' ' Text.Whitespace
's' Name
'\n' Text.Whitespace
'end' Keyword
'\n' Text.Whitespace

View File

@ -1,50 +0,0 @@
---input---
def f(x : T, line = __LINE__) forall T
if x.is_a?(String)
pp! x
end
end
---tokens---
'def' Keyword
' ' Text.Whitespace
'f' Name.Function
'(' Punctuation
'x' Name
' ' Text.Whitespace
':' Punctuation
' ' Text.Whitespace
'T' Name
',' Punctuation
' ' Text.Whitespace
'line' Name
' ' Text.Whitespace
'=' Operator
' ' Text.Whitespace
'__LINE__' Keyword.Pseudo
')' Punctuation
' ' Text.Whitespace
'forall' Keyword.Pseudo
' ' Text.Whitespace
'T' Name
'\n' Text.Whitespace
'if' Keyword
' ' Text.Whitespace
'x' Name
'.is_a?' Keyword.Pseudo
'(' Punctuation
'String' Name
')' Punctuation
'\n' Text.Whitespace
'pp!' Name.Builtin.Pseudo
' ' Text.Whitespace
'x' Name
'\n' Text.Whitespace
'end' Keyword
'\n' Text.Whitespace
'end' Keyword
'\n' Text.Whitespace

View File

@ -1,8 +0,0 @@
---input---
1...3
---tokens---
'1' Literal.Number.Integer
'...' Operator
'3' Literal.Number.Integer
'\n' Text.Whitespace

View File

@ -1,10 +0,0 @@
---input---
1 .. 3
---tokens---
'1' Literal.Number.Integer
' ' Text.Whitespace
'..' Operator
' ' Text.Whitespace
'3' Literal.Number.Integer
'\n' Text.Whitespace

View File

@ -1,58 +0,0 @@
---input---
"a\nz"
"a\az"
"a\xffz"
"a\u1234z"
"a\000z"
"a\u{0}z"
"a\u{10AfF9}z"
---tokens---
'"' Literal.String.Double
'a' Literal.String.Double
'\\n' Literal.String.Escape
'z' Literal.String.Double
'"' Literal.String.Double
'\n' Text.Whitespace
'"' Literal.String.Double
'a' Literal.String.Double
'\\a' Literal.String.Escape
'z' Literal.String.Double
'"' Literal.String.Double
'\n' Text.Whitespace
'"' Literal.String.Double
'a' Literal.String.Double
'\\xff' Literal.String.Escape
'z' Literal.String.Double
'"' Literal.String.Double
'\n' Text.Whitespace
'"' Literal.String.Double
'a' Literal.String.Double
'\\u1234' Literal.String.Escape
'z' Literal.String.Double
'"' Literal.String.Double
'\n' Text.Whitespace
'"' Literal.String.Double
'a' Literal.String.Double
'\\000' Literal.String.Escape
'z' Literal.String.Double
'"' Literal.String.Double
'\n' Text.Whitespace
'"' Literal.String.Double
'a' Literal.String.Double
'\\u{0}' Literal.String.Escape
'z' Literal.String.Double
'"' Literal.String.Double
'\n' Text.Whitespace
'"' Literal.String.Double
'a' Literal.String.Double
'\\u{10AfF9}' Literal.String.Escape
'z' Literal.String.Double
'"' Literal.String.Double
'\n' Text.Whitespace

View File

@ -1,20 +0,0 @@
---input---
:sym_bol
:あ
:question?
:"symbol"
---tokens---
':sym_bol' Literal.String.Symbol
'\n' Text.Whitespace
':あ' Literal.String.Symbol
'\n' Text.Whitespace
':question?' Literal.String.Symbol
'\n' Text.Whitespace
':"' Literal.String.Symbol
'symbol' Literal.String.Symbol
'"' Literal.String.Symbol
'\n' Text.Whitespace

View File

@ -1,6 +1,7 @@
require "./constants/lexers"
require "./heuristics"
require "baked_file_system"
require "crystal/syntax_highlighter"
module Tartrazine
class LexerFiles
@ -26,6 +27,7 @@ module Tartrazine
end
private def self.lexer_by_name(name : String) : BaseLexer
return CrystalLexer.new if name == "crystal"
lexer_file_name = LEXERS_BY_NAME.fetch(name.downcase, nil)
return create_delegating_lexer(name) if lexer_file_name.nil? && name.includes? "+"
raise Exception.new("Unknown lexer: #{name}") if lexer_file_name.nil?
@ -34,6 +36,10 @@ module Tartrazine
end
private def self.lexer_by_filename(filename : String) : BaseLexer
if filename.ends_with?(".cr")
return CrystalLexer.new
end
candidates = Set(String).new
LEXERS_BY_FILENAME.each do |k, v|
candidates += v.to_set if File.match?(k, File.basename(filename))
@ -327,4 +333,81 @@ module Tartrazine
new_state
end
end
class CustomCrystalHighlighter < Crystal::SyntaxHighlighter
@tokens = [] of Token
def render_delimiter(&block)
@tokens << {type: "LiteralString", value: block.call.to_s}
end
def render_interpolation(&block)
@tokens << {type: "LiteralStringInterpol", value: "\#{"}
@tokens << {type: "Text", value: block.call.to_s}
@tokens << {type: "LiteralStringInterpol", value: "}"}
end
def render_string_array(&block)
@tokens << {type: "LiteralString", value: block.call.to_s}
end
# ameba:disable Metrics/CyclomaticComplexity
def render(type : TokenType, value : String)
case type
when .comment?
@tokens << {type: "Comment", value: value}
when .number?
@tokens << {type: "LiteralNumber", value: value}
when .char?
@tokens << {type: "LiteralStringChar", value: value}
when .symbol?
@tokens << {type: "LiteralStringSymbol", value: value}
when .const?
@tokens << {type: "NameConstant", value: value}
when .string?
@tokens << {type: "LiteralString", value: value}
when .ident?
@tokens << {type: "NameVariable", value: value}
when .keyword?, .self?
@tokens << {type: "NameKeyword", value: value}
when .primitive_literal?
@tokens << {type: "Literal", value: value}
when .operator?
@tokens << {type: "Operator", value: value}
when Crystal::SyntaxHighlighter::TokenType::DELIMITED_TOKEN, Crystal::SyntaxHighlighter::TokenType::DELIMITER_START, Crystal::SyntaxHighlighter::TokenType::DELIMITER_END
@tokens << {type: "LiteralString", value: value}
else
@tokens << {type: "Text", value: value}
end
end
end
class CrystalTokenizer < Tartrazine::BaseTokenizer
include Iterator(Token)
@hl = CustomCrystalHighlighter.new
@lexer : BaseLexer
@iter : Iterator(Token)
# delegate next, to: @iter
def initialize(@lexer : BaseLexer, text : String, secondary = false)
# Respect the `ensure_nl` config option
if text.size > 0 && text[-1] != '\n' && @lexer.config[:ensure_nl] && !secondary
text += "\n"
end
# Just do the tokenizing
@hl.highlight(text)
@iter = @hl.@tokens.each
end
def next : Iterator::Stop | Token
@iter.next
end
end
class CrystalLexer < BaseLexer
def tokenizer(text : String, secondary = false) : BaseTokenizer
CrystalTokenizer.new(self, text, secondary)
end
end
end

View File

@ -17,7 +17,6 @@ module Tartrazine
abstract struct BaseRule
abstract def match(text : Bytes, pos : Int32, tokenizer : Tokenizer) : Tuple(Bool, Int32, Array(Token))
abstract def initialize(node : XML::Node)
@actions : Array(Action) = [] of Action
@ -40,9 +39,6 @@ module Tartrazine
return true, pos + match[0].size, @actions.flat_map(&.emit(match, tokenizer))
end
def initialize(node : XML::Node)
end
def initialize(node : XML::Node, multiline, dotall, ignorecase)
pattern = node["pattern"]
pattern = "(?m)" + pattern if multiline