mirror of
https://github.com/ralsina/tartrazine.git
synced 2025-06-08 04:30:26 -03:00
Compare commits
56 Commits
c7a37baf17
...
72afec773e
Author | SHA1 | Date | |
---|---|---|---|
72afec773e | |||
a5926af518 | |||
fc9f834bc8 | |||
58fd42d936 | |||
5a88a51f3e | |||
fd7c6fa4b3 | |||
6264bfc754 | |||
38196d6e96 | |||
c6cd74e339 | |||
17c66a6572 | |||
cd7e150aae | |||
|
176b8e9bc9 | ||
d8ddf5d8b6 | |||
06556877ef | |||
3d5d073471 | |||
a2884c4c78 | |||
bd3df10d2c | |||
0f3b7fc3c5 | |||
7f4296e9d7 | |||
f883065092 | |||
746abe53ea | |||
90971e8f1b | |||
057879c6ee | |||
215d53e173 | |||
f435d7df21 | |||
5b0a1789dc | |||
76ef1fea41 | |||
3ebedec6c1 | |||
57e63f2308 | |||
4a598a575b | |||
9042138053 | |||
fa647e898a | |||
ad92929a10 | |||
bb952a44b8 | |||
ae03e4612e | |||
471b2f5050 | |||
5a3b08e716 | |||
9ebb9f2765 | |||
7538fc76aa | |||
788577b226 | |||
1f01146b1f | |||
9041b763ea | |||
ada30915c3 | |||
78eff45ea0 | |||
e817aedd60 | |||
20d6b65346 | |||
cb09dff9f1 | |||
b589726352 | |||
a3a7b5bd9a | |||
58e8dac038 | |||
f72a40f095 | |||
bf257a5b82 | |||
029495590c | |||
115debdec6 | |||
4612db58fe | |||
f45a86c83a |
1
.gitignore
vendored
1
.gitignore
vendored
@ -8,3 +8,4 @@ pygments/
|
|||||||
shard.lock
|
shard.lock
|
||||||
.vscode/
|
.vscode/
|
||||||
.crystal/
|
.crystal/
|
||||||
|
venv/
|
||||||
|
25
README.md
25
README.md
@ -29,7 +29,7 @@ This only covers the RegexLexers, which are the most common ones,
|
|||||||
but it means the supported languages are a subset of Chroma's, which
|
but it means the supported languages are a subset of Chroma's, which
|
||||||
is a subset of Pygments'.
|
is a subset of Pygments'.
|
||||||
|
|
||||||
Currently Tartrazine supports ... 241 languages.
|
Currently Tartrazine supports ... 248 languages.
|
||||||
|
|
||||||
It has 331 themes (63 from Chroma, the rest are base16 themes via
|
It has 331 themes (63 from Chroma, the rest are base16 themes via
|
||||||
[Sixteen](https://github.com/ralsina/sixteen)
|
[Sixteen](https://github.com/ralsina/sixteen)
|
||||||
@ -47,7 +47,22 @@ To build from source:
|
|||||||
2. Run `make` to build the `tartrazine` binary
|
2. Run `make` to build the `tartrazine` binary
|
||||||
3. Copy the binary somewhere in your PATH.
|
3. Copy the binary somewhere in your PATH.
|
||||||
|
|
||||||
## Usage
|
## Usage as a CLI tool
|
||||||
|
|
||||||
|
Show a syntax highlighted version of a C source file in your terminal:
|
||||||
|
|
||||||
|
```shell
|
||||||
|
$ tartrazine whatever.c -l c -t catppuccin-macchiato --line-numbers -f terminal
|
||||||
|
```
|
||||||
|
|
||||||
|
Generate a standalone HTML file from a C source file with the syntax highlighted:
|
||||||
|
|
||||||
|
```shell
|
||||||
|
$ tartrazine whatever.c -l c -t catppuccin-macchiato --line-numbers \
|
||||||
|
--standalone -f html -o whatever.html
|
||||||
|
```
|
||||||
|
|
||||||
|
## Usage as a Library
|
||||||
|
|
||||||
This works:
|
This works:
|
||||||
|
|
||||||
@ -56,7 +71,9 @@ require "tartrazine"
|
|||||||
|
|
||||||
lexer = Tartrazine.lexer("crystal")
|
lexer = Tartrazine.lexer("crystal")
|
||||||
theme = Tartrazine.theme("catppuccin-macchiato")
|
theme = Tartrazine.theme("catppuccin-macchiato")
|
||||||
puts Tartrazine::Html.new.format(File.read(ARGV[0]), lexer, theme)
|
formatter = Tartrazine::Html.new
|
||||||
|
formatter.theme = theme
|
||||||
|
puts formatter.format(File.read(ARGV[0]), lexer)
|
||||||
```
|
```
|
||||||
|
|
||||||
## Contributing
|
## Contributing
|
||||||
@ -69,4 +86,4 @@ puts Tartrazine::Html.new.format(File.read(ARGV[0]), lexer, theme)
|
|||||||
|
|
||||||
## Contributors
|
## Contributors
|
||||||
|
|
||||||
- [Roberto Alsina](https://github.com/ralsina) - creator and maintainer
|
- [Roberto Alsina](https://github.com/ralsina) - creator and maintainer
|
||||||
|
5
TODO.md
5
TODO.md
@ -9,4 +9,7 @@
|
|||||||
* ✅ Implement lexer loader by file extension
|
* ✅ Implement lexer loader by file extension
|
||||||
* ✅ Add --line-numbers to terminal formatter
|
* ✅ Add --line-numbers to terminal formatter
|
||||||
* Implement lexer loader by mime type
|
* Implement lexer loader by mime type
|
||||||
* Implement Pygment's "DelegateLexer"
|
* ✅ Implement Delegating lexers
|
||||||
|
* ✅ Add RstLexer
|
||||||
|
* Add Mako template lexer
|
||||||
|
* Implement heuristic lexer detection
|
||||||
|
130
lexers/LiquidLexer.xml
Normal file
130
lexers/LiquidLexer.xml
Normal file
@ -0,0 +1,130 @@
|
|||||||
|
|
||||||
|
<lexer>
|
||||||
|
<config>
|
||||||
|
<name>liquid</name>
|
||||||
|
<alias>liquid</alias>
|
||||||
|
<filename>*.liquid</filename>
|
||||||
|
</config>
|
||||||
|
<rules>
|
||||||
|
<state name="root">
|
||||||
|
<rule pattern="[^{]+"><token type="Text"/></rule>
|
||||||
|
<rule pattern="(\{%)(\s*)"><bygroups><token type="Punctuation"/><token type="TextWhitespace"/></bygroups><push state="tag-or-block"/></rule>
|
||||||
|
<rule pattern="(\{\{)(\s*)([^\s}]+)"><bygroups><token type="Punctuation"/><token type="TextWhitespace"/><usingself state="generic"/></bygroups><push state="output"/></rule>
|
||||||
|
<rule pattern="\{"><token type="Text"/></rule>
|
||||||
|
</state>
|
||||||
|
<state name="tag-or-block">
|
||||||
|
<rule pattern="(if|unless|elsif|case)(?=\s+)"><token type="KeywordReserved"/><push state="condition"/></rule>
|
||||||
|
<rule pattern="(when)(\s+)"><bygroups><token type="KeywordReserved"/><token type="TextWhitespace"/></bygroups><combined state="end-of-block" state="whitespace" state="generic"/></rule>
|
||||||
|
<rule pattern="(else)(\s*)(%\})"><bygroups><token type="KeywordReserved"/><token type="TextWhitespace"/><token type="Punctuation"/></bygroups><pop depth="1"/></rule>
|
||||||
|
<rule pattern="(capture)(\s+)([^\s%]+)(\s*)(%\})"><bygroups><token type="NameTag"/><token type="TextWhitespace"/><usingself state="variable"/><token type="TextWhitespace"/><token type="Punctuation"/></bygroups><pop depth="1"/></rule>
|
||||||
|
<rule pattern="(comment)(\s*)(%\})"><bygroups><token type="NameTag"/><token type="TextWhitespace"/><token type="Punctuation"/></bygroups><push state="comment"/></rule>
|
||||||
|
<rule pattern="(raw)(\s*)(%\})"><bygroups><token type="NameTag"/><token type="TextWhitespace"/><token type="Punctuation"/></bygroups><push state="raw"/></rule>
|
||||||
|
<rule pattern="(end(case|unless|if))(\s*)(%\})"><bygroups><token type="KeywordReserved"/>None<token type="TextWhitespace"/><token type="Punctuation"/></bygroups><pop depth="1"/></rule>
|
||||||
|
<rule pattern="(end([^\s%]+))(\s*)(%\})"><bygroups><token type="NameTag"/>None<token type="TextWhitespace"/><token type="Punctuation"/></bygroups><pop depth="1"/></rule>
|
||||||
|
<rule pattern="(cycle)(\s+)(?:([^\s:]*)(:))?(\s*)"><bygroups><token type="NameTag"/><token type="TextWhitespace"/><usingself state="generic"/><token type="Punctuation"/><token type="TextWhitespace"/></bygroups><push state="variable-tag-markup"/></rule>
|
||||||
|
<rule pattern="([^\s%]+)(\s*)"><bygroups><token type="NameTag"/><token type="TextWhitespace"/></bygroups><push state="tag-markup"/></rule>
|
||||||
|
</state>
|
||||||
|
<state name="output">
|
||||||
|
<rule><include state="whitespace"/></rule>
|
||||||
|
<rule pattern="\}\}"><token type="Punctuation"/><pop depth="1"/></rule>
|
||||||
|
<rule pattern="\|"><token type="Punctuation"/><push state="filters"/></rule>
|
||||||
|
</state>
|
||||||
|
<state name="filters">
|
||||||
|
<rule><include state="whitespace"/></rule>
|
||||||
|
<rule pattern="\}\}"><token type="Punctuation"/><push state="#pop" state="#pop"/></rule>
|
||||||
|
<rule pattern="([^\s|:]+)(:?)(\s*)"><bygroups><token type="NameFunction"/><token type="Punctuation"/><token type="TextWhitespace"/></bygroups><push state="filter-markup"/></rule>
|
||||||
|
</state>
|
||||||
|
<state name="filter-markup">
|
||||||
|
<rule pattern="\|"><token type="Punctuation"/><pop depth="1"/></rule>
|
||||||
|
<rule><include state="end-of-tag"/></rule>
|
||||||
|
<rule><include state="default-param-markup"/></rule>
|
||||||
|
</state>
|
||||||
|
<state name="condition">
|
||||||
|
<rule><include state="end-of-block"/></rule>
|
||||||
|
<rule><include state="whitespace"/></rule>
|
||||||
|
<rule pattern="([^\s=!><]+)(\s*)([=!><]=?)(\s*)(\S+)(\s*)(%\})"><bygroups><usingself state="generic"/><token type="TextWhitespace"/><token type="Operator"/><token type="TextWhitespace"/><usingself state="generic"/><token type="TextWhitespace"/><token type="Punctuation"/></bygroups></rule>
|
||||||
|
<rule pattern="\b!"><token type="Operator"/></rule>
|
||||||
|
<rule pattern="\bnot\b"><token type="OperatorWord"/></rule>
|
||||||
|
<rule pattern="([\w.\'"]+)(\s+)(contains)(\s+)([\w.\'"]+)"><bygroups><usingself state="generic"/><token type="TextWhitespace"/><token type="OperatorWord"/><token type="TextWhitespace"/><usingself state="generic"/></bygroups></rule>
|
||||||
|
<rule><include state="generic"/></rule>
|
||||||
|
<rule><include state="whitespace"/></rule>
|
||||||
|
</state>
|
||||||
|
<state name="generic-value">
|
||||||
|
<rule><include state="generic"/></rule>
|
||||||
|
<rule><include state="end-at-whitespace"/></rule>
|
||||||
|
</state>
|
||||||
|
<state name="operator">
|
||||||
|
<rule pattern="(\s*)((=|!|>|<)=?)(\s*)"><bygroups><token type="TextWhitespace"/><token type="Operator"/>None<token type="TextWhitespace"/></bygroups><pop depth="1"/></rule>
|
||||||
|
<rule pattern="(\s*)(\bcontains\b)(\s*)"><bygroups><token type="TextWhitespace"/><token type="OperatorWord"/><token type="TextWhitespace"/></bygroups><pop depth="1"/></rule>
|
||||||
|
</state>
|
||||||
|
<state name="end-of-tag">
|
||||||
|
<rule pattern="\}\}"><token type="Punctuation"/><pop depth="1"/></rule>
|
||||||
|
</state>
|
||||||
|
<state name="end-of-block">
|
||||||
|
<rule pattern="%\}"><token type="Punctuation"/><push state="#pop" state="#pop"/></rule>
|
||||||
|
</state>
|
||||||
|
<state name="end-at-whitespace">
|
||||||
|
<rule pattern="\s+"><token type="TextWhitespace"/><pop depth="1"/></rule>
|
||||||
|
</state>
|
||||||
|
<state name="param-markup">
|
||||||
|
<rule><include state="whitespace"/></rule>
|
||||||
|
<rule pattern="([^\s=:]+)(\s*)(=|:)"><bygroups><token type="NameAttribute"/><token type="TextWhitespace"/><token type="Operator"/></bygroups></rule>
|
||||||
|
<rule pattern="(\{\{)(\s*)([^\s}])(\s*)(\}\})"><bygroups><token type="Punctuation"/><token type="TextWhitespace"/><usingself state="variable"/><token type="TextWhitespace"/><token type="Punctuation"/></bygroups></rule>
|
||||||
|
<rule><include state="string"/></rule>
|
||||||
|
<rule><include state="number"/></rule>
|
||||||
|
<rule><include state="keyword"/></rule>
|
||||||
|
<rule pattern=","><token type="Punctuation"/></rule>
|
||||||
|
</state>
|
||||||
|
<state name="default-param-markup">
|
||||||
|
<rule><include state="param-markup"/></rule>
|
||||||
|
<rule pattern="."><token type="Text"/></rule>
|
||||||
|
</state>
|
||||||
|
<state name="variable-param-markup">
|
||||||
|
<rule><include state="param-markup"/></rule>
|
||||||
|
<rule><include state="variable"/></rule>
|
||||||
|
<rule pattern="."><token type="Text"/></rule>
|
||||||
|
</state>
|
||||||
|
<state name="tag-markup">
|
||||||
|
<rule pattern="%\}"><token type="Punctuation"/><push state="#pop" state="#pop"/></rule>
|
||||||
|
<rule><include state="default-param-markup"/></rule>
|
||||||
|
</state>
|
||||||
|
<state name="variable-tag-markup">
|
||||||
|
<rule pattern="%\}"><token type="Punctuation"/><push state="#pop" state="#pop"/></rule>
|
||||||
|
<rule><include state="variable-param-markup"/></rule>
|
||||||
|
</state>
|
||||||
|
<state name="keyword">
|
||||||
|
<rule pattern="\b(false|true)\b"><token type="KeywordConstant"/></rule>
|
||||||
|
</state>
|
||||||
|
<state name="variable">
|
||||||
|
<rule pattern="[a-zA-Z_]\w*"><token type="NameVariable"/></rule>
|
||||||
|
<rule pattern="(?<=\w)\.(?=\w)"><token type="Punctuation"/></rule>
|
||||||
|
</state>
|
||||||
|
<state name="string">
|
||||||
|
<rule pattern="'[^']*'"><token type="LiteralStringSingle"/></rule>
|
||||||
|
<rule pattern=""[^"]*""><token type="LiteralStringDouble"/></rule>
|
||||||
|
</state>
|
||||||
|
<state name="number">
|
||||||
|
<rule pattern="\d+\.\d+"><token type="LiteralNumberFloat"/></rule>
|
||||||
|
<rule pattern="\d+"><token type="LiteralNumberInteger"/></rule>
|
||||||
|
</state>
|
||||||
|
<state name="generic">
|
||||||
|
<rule><include state="keyword"/></rule>
|
||||||
|
<rule><include state="string"/></rule>
|
||||||
|
<rule><include state="number"/></rule>
|
||||||
|
<rule><include state="variable"/></rule>
|
||||||
|
</state>
|
||||||
|
<state name="whitespace">
|
||||||
|
<rule pattern="[ \t]+"><token type="TextWhitespace"/></rule>
|
||||||
|
</state>
|
||||||
|
<state name="comment">
|
||||||
|
<rule pattern="(\{%)(\s*)(endcomment)(\s*)(%\})"><bygroups><token type="Punctuation"/><token type="TextWhitespace"/><token type="NameTag"/><token type="TextWhitespace"/><token type="Punctuation"/></bygroups><push state="#pop" state="#pop"/></rule>
|
||||||
|
<rule pattern="."><token type="Comment"/></rule>
|
||||||
|
</state>
|
||||||
|
<state name="raw">
|
||||||
|
<rule pattern="[^{]+"><token type="Text"/></rule>
|
||||||
|
<rule pattern="(\{%)(\s*)(endraw)(\s*)(%\})"><bygroups><token type="Punctuation"/><token type="TextWhitespace"/><token type="NameTag"/><token type="TextWhitespace"/><token type="Punctuation"/></bygroups><pop depth="1"/></rule>
|
||||||
|
<rule pattern="\{"><token type="Text"/></rule>
|
||||||
|
</state>
|
||||||
|
</rules>
|
||||||
|
</lexer>
|
||||||
|
|
55
lexers/VelocityLexer.xml
Normal file
55
lexers/VelocityLexer.xml
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
|
||||||
|
<lexer>
|
||||||
|
<config>
|
||||||
|
<name>Velocity</name>
|
||||||
|
<alias>velocity</alias>
|
||||||
|
<filename>*.vm</filename>
|
||||||
|
<filename>*.fhtml</filename>
|
||||||
|
<dot_all>true</dot_all>
|
||||||
|
</config>
|
||||||
|
<rules>
|
||||||
|
<state name="root">
|
||||||
|
<rule pattern="[^{#$]+"><token type="Other"/></rule>
|
||||||
|
<rule pattern="(#)(\*.*?\*)(#)"><bygroups><token type="CommentPreproc"/><token type="Comment"/><token type="CommentPreproc"/></bygroups></rule>
|
||||||
|
<rule pattern="(##)(.*?$)"><bygroups><token type="CommentPreproc"/><token type="Comment"/></bygroups></rule>
|
||||||
|
<rule pattern="(#\{?)([a-zA-Z_]\w*)(\}?)(\s?\()"><bygroups><token type="CommentPreproc"/><token type="NameFunction"/><token type="CommentPreproc"/><token type="Punctuation"/></bygroups><push state="directiveparams"/></rule>
|
||||||
|
<rule pattern="(#\{?)([a-zA-Z_]\w*)(\}|\b)"><bygroups><token type="CommentPreproc"/><token type="NameFunction"/><token type="CommentPreproc"/></bygroups></rule>
|
||||||
|
<rule pattern="\$!?\{?"><token type="Punctuation"/><push state="variable"/></rule>
|
||||||
|
</state>
|
||||||
|
<state name="variable">
|
||||||
|
<rule pattern="[a-zA-Z_]\w*"><token type="NameVariable"/></rule>
|
||||||
|
<rule pattern="\("><token type="Punctuation"/><push state="funcparams"/></rule>
|
||||||
|
<rule pattern="(\.)([a-zA-Z_]\w*)"><bygroups><token type="Punctuation"/><token type="NameVariable"/></bygroups><push/></rule>
|
||||||
|
<rule pattern="\}"><token type="Punctuation"/><pop depth="1"/></rule>
|
||||||
|
<rule><pop depth="1"/></rule>
|
||||||
|
</state>
|
||||||
|
<state name="directiveparams">
|
||||||
|
<rule pattern="(&&|\|\||==?|!=?|[-<>+*%&|^/])|\b(eq|ne|gt|lt|ge|le|not|in)\b"><token type="Operator"/></rule>
|
||||||
|
<rule pattern="\["><token type="Operator"/><push state="rangeoperator"/></rule>
|
||||||
|
<rule pattern="\b[a-zA-Z_]\w*\b"><token type="NameFunction"/></rule>
|
||||||
|
<rule><include state="funcparams"/></rule>
|
||||||
|
</state>
|
||||||
|
<state name="rangeoperator">
|
||||||
|
<rule pattern="\.\."><token type="Operator"/></rule>
|
||||||
|
<rule><include state="funcparams"/></rule>
|
||||||
|
<rule pattern="\]"><token type="Operator"/><pop depth="1"/></rule>
|
||||||
|
</state>
|
||||||
|
<state name="funcparams">
|
||||||
|
<rule pattern="\$!?\{?"><token type="Punctuation"/><push state="variable"/></rule>
|
||||||
|
<rule pattern="\s+"><token type="Text"/></rule>
|
||||||
|
<rule pattern="[,:]"><token type="Punctuation"/></rule>
|
||||||
|
<rule pattern=""(\\\\|\\[^\\]|[^"\\])*""><token type="LiteralStringDouble"/></rule>
|
||||||
|
<rule pattern="'(\\\\|\\[^\\]|[^'\\])*'"><token type="LiteralStringSingle"/></rule>
|
||||||
|
<rule pattern="0[xX][0-9a-fA-F]+[Ll]?"><token type="LiteralNumber"/></rule>
|
||||||
|
<rule pattern="\b[0-9]+\b"><token type="LiteralNumber"/></rule>
|
||||||
|
<rule pattern="(true|false|null)\b"><token type="KeywordConstant"/></rule>
|
||||||
|
<rule pattern="\("><token type="Punctuation"/><push/></rule>
|
||||||
|
<rule pattern="\)"><token type="Punctuation"/><pop depth="1"/></rule>
|
||||||
|
<rule pattern="\{"><token type="Punctuation"/><push/></rule>
|
||||||
|
<rule pattern="\}"><token type="Punctuation"/><pop depth="1"/></rule>
|
||||||
|
<rule pattern="\["><token type="Punctuation"/><push/></rule>
|
||||||
|
<rule pattern="\]"><token type="Punctuation"/><pop depth="1"/></rule>
|
||||||
|
</state>
|
||||||
|
</rules>
|
||||||
|
</lexer>
|
||||||
|
|
22
lexers/bbcode.xml
Normal file
22
lexers/bbcode.xml
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
|
||||||
|
<lexer>
|
||||||
|
<config>
|
||||||
|
<name>BBCode</name>
|
||||||
|
<alias>bbcode</alias>
|
||||||
|
<mime_type>text/x-bbcode</mime_type>
|
||||||
|
</config>
|
||||||
|
<rules>
|
||||||
|
<state name="root">
|
||||||
|
<rule pattern="[^[]+"><token type="Text"/></rule>
|
||||||
|
<rule pattern="\[/?\w+"><token type="Keyword"/><push state="tag"/></rule>
|
||||||
|
<rule pattern="\["><token type="Text"/></rule>
|
||||||
|
</state>
|
||||||
|
<state name="tag">
|
||||||
|
<rule pattern="\s+"><token type="Text"/></rule>
|
||||||
|
<rule pattern="(\w+)(=)("?[^\s"\]]+"?)"><bygroups><token type="NameAttribute"/><token type="Operator"/><token type="LiteralString"/></bygroups></rule>
|
||||||
|
<rule pattern="(=)("?[^\s"\]]+"?)"><bygroups><token type="Operator"/><token type="LiteralString"/></bygroups></rule>
|
||||||
|
<rule pattern="\]"><token type="Keyword"/><pop depth="1"/></rule>
|
||||||
|
</state>
|
||||||
|
</rules>
|
||||||
|
</lexer>
|
||||||
|
|
@ -3,6 +3,7 @@
|
|||||||
<name>Groff</name>
|
<name>Groff</name>
|
||||||
<alias>groff</alias>
|
<alias>groff</alias>
|
||||||
<alias>nroff</alias>
|
<alias>nroff</alias>
|
||||||
|
<alias>roff</alias>
|
||||||
<alias>man</alias>
|
<alias>man</alias>
|
||||||
<filename>*.[1-9]</filename>
|
<filename>*.[1-9]</filename>
|
||||||
<filename>*.1p</filename>
|
<filename>*.1p</filename>
|
||||||
@ -87,4 +88,4 @@
|
|||||||
</rule>
|
</rule>
|
||||||
</state>
|
</state>
|
||||||
</rules>
|
</rules>
|
||||||
</lexer>
|
</lexer>
|
||||||
|
@ -30,12 +30,12 @@
|
|||||||
disambiguations:
|
disambiguations:
|
||||||
- extensions: ['.1', '.2', '.3', '.4', '.5', '.6', '.7', '.8', '.9']
|
- extensions: ['.1', '.2', '.3', '.4', '.5', '.6', '.7', '.8', '.9']
|
||||||
rules:
|
rules:
|
||||||
- language: Roff Manpage
|
- language: man
|
||||||
and:
|
and:
|
||||||
- named_pattern: mdoc-date
|
- named_pattern: mdoc-date
|
||||||
- named_pattern: mdoc-title
|
- named_pattern: mdoc-title
|
||||||
- named_pattern: mdoc-heading
|
- named_pattern: mdoc-heading
|
||||||
- language: Roff Manpage
|
- language: man
|
||||||
and:
|
and:
|
||||||
- named_pattern: man-title
|
- named_pattern: man-title
|
||||||
- named_pattern: man-heading
|
- named_pattern: man-heading
|
||||||
@ -43,12 +43,12 @@ disambiguations:
|
|||||||
pattern: '^\.(?:[A-Za-z]{2}(?:\s|$)|\\")'
|
pattern: '^\.(?:[A-Za-z]{2}(?:\s|$)|\\")'
|
||||||
- extensions: ['.1in', '.1m', '.1x', '.3in', '.3m', '.3p', '.3pm', '.3qt', '.3x', '.man', '.mdoc']
|
- extensions: ['.1in', '.1m', '.1x', '.3in', '.3m', '.3p', '.3pm', '.3qt', '.3x', '.man', '.mdoc']
|
||||||
rules:
|
rules:
|
||||||
- language: Roff Manpage
|
- language: man
|
||||||
and:
|
and:
|
||||||
- named_pattern: mdoc-date
|
- named_pattern: mdoc-date
|
||||||
- named_pattern: mdoc-title
|
- named_pattern: mdoc-title
|
||||||
- named_pattern: mdoc-heading
|
- named_pattern: mdoc-heading
|
||||||
- language: Roff Manpage
|
- language: man
|
||||||
and:
|
and:
|
||||||
- named_pattern: man-title
|
- named_pattern: man-title
|
||||||
- named_pattern: man-heading
|
- named_pattern: man-heading
|
56
lexers/markdown.xml
Normal file
56
lexers/markdown.xml
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
|
||||||
|
<lexer>
|
||||||
|
<config>
|
||||||
|
<name>Markdown</name>
|
||||||
|
<alias>markdown</alias>
|
||||||
|
<alias>md</alias>
|
||||||
|
<filename>*.md</filename>
|
||||||
|
<filename>*.markdown</filename>
|
||||||
|
<mime_type>text/x-markdown</mime_type>
|
||||||
|
</config>
|
||||||
|
<rules>
|
||||||
|
<state name="root">
|
||||||
|
<rule pattern="(^#[^#].+)(\n)"><bygroups><token type="GenericHeading"/><token type="Text"/></bygroups></rule>
|
||||||
|
<rule pattern="(^#{2,6}[^#].+)(\n)"><bygroups><token type="GenericSubheading"/><token type="Text"/></bygroups></rule>
|
||||||
|
<rule pattern="^(.+)(\n)(=+)(\n)"><bygroups><token type="GenericHeading"/><token type="Text"/><token type="GenericHeading"/><token type="Text"/></bygroups></rule>
|
||||||
|
<rule pattern="^(.+)(\n)(-+)(\n)"><bygroups><token type="GenericSubheading"/><token type="Text"/><token type="GenericSubheading"/><token type="Text"/></bygroups></rule>
|
||||||
|
<rule pattern="^(\s*)([*-] )(\[[ xX]\])( .+\n)"><bygroups><token type="TextWhitespace"/><token type="Keyword"/><token type="Keyword"/><usingself state="inline"/></bygroups></rule>
|
||||||
|
<rule pattern="^(\s*)([*-])(\s)(.+\n)"><bygroups><token type="TextWhitespace"/><token type="Keyword"/><token type="TextWhitespace"/><usingself state="inline"/></bygroups></rule>
|
||||||
|
<rule pattern="^(\s*)([0-9]+\.)( .+\n)"><bygroups><token type="TextWhitespace"/><token type="Keyword"/><usingself state="inline"/></bygroups></rule>
|
||||||
|
<rule pattern="^(\s*>\s)(.+\n)"><bygroups><token type="Keyword"/><token type="GenericEmph"/></bygroups></rule>
|
||||||
|
<rule pattern="^(```\n)([\w\W]*?)(^```$)">
|
||||||
|
<bygroups>
|
||||||
|
<token type="LiteralStringBacktick"/>
|
||||||
|
<token type="Text"/>
|
||||||
|
<token type="LiteralStringBacktick"/>
|
||||||
|
</bygroups>
|
||||||
|
</rule>
|
||||||
|
<rule pattern="^(```)(\w+)(\n)([\w\W]*?)(^```$)">
|
||||||
|
<bygroups>
|
||||||
|
<token type="LiteralStringBacktick"/>
|
||||||
|
<token type="NameLabel"/>
|
||||||
|
<token type="TextWhitespace"/>
|
||||||
|
<UsingByGroup lexer="2" content="4"/>
|
||||||
|
<token type="LiteralStringBacktick"/>
|
||||||
|
</bygroups>
|
||||||
|
</rule>
|
||||||
|
<rule><include state="inline"/></rule>
|
||||||
|
</state>
|
||||||
|
<state name="inline">
|
||||||
|
<rule pattern="\\."><token type="Text"/></rule>
|
||||||
|
<rule pattern="([^`]?)(`[^`\n]+`)"><bygroups><token type="Text"/><token type="LiteralStringBacktick"/></bygroups></rule>
|
||||||
|
<rule pattern="([^\*]?)(\*\*[^* \n][^*\n]*\*\*)"><bygroups><token type="Text"/><token type="GenericStrong"/></bygroups></rule>
|
||||||
|
<rule pattern="([^_]?)(__[^_ \n][^_\n]*__)"><bygroups><token type="Text"/><token type="GenericStrong"/></bygroups></rule>
|
||||||
|
<rule pattern="([^\*]?)(\*[^* \n][^*\n]*\*)"><bygroups><token type="Text"/><token type="GenericEmph"/></bygroups></rule>
|
||||||
|
<rule pattern="([^_]?)(_[^_ \n][^_\n]*_)"><bygroups><token type="Text"/><token type="GenericEmph"/></bygroups></rule>
|
||||||
|
<rule pattern="([^~]?)(~~[^~ \n][^~\n]*~~)"><bygroups><token type="Text"/><token type="GenericDeleted"/></bygroups></rule>
|
||||||
|
<rule pattern="[@#][\w/:]+"><token type="NameEntity"/></rule>
|
||||||
|
<rule pattern="(!?\[)([^]]+)(\])(\()([^)]+)(\))"><bygroups><token type="Text"/><token type="NameTag"/><token type="Text"/><token type="Text"/><token type="NameAttribute"/><token type="Text"/></bygroups></rule>
|
||||||
|
<rule pattern="(\[)([^]]+)(\])(\[)([^]]*)(\])"><bygroups><token type="Text"/><token type="NameTag"/><token type="Text"/><token type="Text"/><token type="NameLabel"/><token type="Text"/></bygroups></rule>
|
||||||
|
<rule pattern="^(\s*\[)([^]]*)(\]:\s*)(.+)"><bygroups><token type="Text"/><token type="NameLabel"/><token type="Text"/><token type="NameAttribute"/></bygroups></rule>
|
||||||
|
<rule pattern="[^\\\s]+"><token type="Text"/></rule>
|
||||||
|
<rule pattern="."><token type="Text"/></rule>
|
||||||
|
</state>
|
||||||
|
</rules>
|
||||||
|
</lexer>
|
||||||
|
|
34
lexers/moinwiki.xml
Normal file
34
lexers/moinwiki.xml
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
|
||||||
|
<lexer>
|
||||||
|
<config>
|
||||||
|
<name>MoinMoin/Trac Wiki markup</name>
|
||||||
|
<alias>trac-wiki</alias>
|
||||||
|
<alias>moin</alias>
|
||||||
|
<mime_type>text/x-trac-wiki</mime_type>
|
||||||
|
<case_insensitive>true</case_insensitive>
|
||||||
|
</config>
|
||||||
|
<rules>
|
||||||
|
<state name="root">
|
||||||
|
<rule pattern="^#.*$"><token type="Comment"/></rule>
|
||||||
|
<rule pattern="(!)(\S+)"><bygroups><token type="Keyword"/><token type="Text"/></bygroups></rule>
|
||||||
|
<rule pattern="^(=+)([^=]+)(=+)(\s*#.+)?$"><bygroups><token type="GenericHeading"/><usingself state="root"/><token type="GenericHeading"/><token type="LiteralString"/></bygroups></rule>
|
||||||
|
<rule pattern="(\{\{\{)(\n#!.+)?"><bygroups><token type="NameBuiltin"/><token type="NameNamespace"/></bygroups><push state="codeblock"/></rule>
|
||||||
|
<rule pattern="(\'\'\'?|\|\||`|__|~~|\^|,,|::)"><token type="Comment"/></rule>
|
||||||
|
<rule pattern="^( +)([.*-])( )"><bygroups><token type="Text"/><token type="NameBuiltin"/><token type="Text"/></bygroups></rule>
|
||||||
|
<rule pattern="^( +)([a-z]{1,5}\.)( )"><bygroups><token type="Text"/><token type="NameBuiltin"/><token type="Text"/></bygroups></rule>
|
||||||
|
<rule pattern="\[\[\w+.*?\]\]"><token type="Keyword"/></rule>
|
||||||
|
<rule pattern="(\[[^\s\]]+)(\s+[^\]]+?)?(\])"><bygroups><token type="Keyword"/><token type="LiteralString"/><token type="Keyword"/></bygroups></rule>
|
||||||
|
<rule pattern="^----+$"><token type="Keyword"/></rule>
|
||||||
|
<rule pattern="[^\n\'\[{!_~^,|]+"><token type="Text"/></rule>
|
||||||
|
<rule pattern="\n"><token type="Text"/></rule>
|
||||||
|
<rule pattern="."><token type="Text"/></rule>
|
||||||
|
</state>
|
||||||
|
<state name="codeblock">
|
||||||
|
<rule pattern="\}\}\}"><token type="NameBuiltin"/><pop depth="1"/></rule>
|
||||||
|
<rule pattern="\{\{\{"><token type="Text"/><push/></rule>
|
||||||
|
<rule pattern="[^{}]+"><token type="CommentPreproc"/></rule>
|
||||||
|
<rule pattern="."><token type="CommentPreproc"/></rule>
|
||||||
|
</state>
|
||||||
|
</rules>
|
||||||
|
</lexer>
|
||||||
|
|
76
lexers/rst.xml
Normal file
76
lexers/rst.xml
Normal file
@ -0,0 +1,76 @@
|
|||||||
|
|
||||||
|
<lexer>
|
||||||
|
<config>
|
||||||
|
<name>reStructuredText</name>
|
||||||
|
<alias>restructuredtext</alias>
|
||||||
|
<alias>rst</alias>
|
||||||
|
<alias>rest</alias>
|
||||||
|
<filename>*.rst</filename>
|
||||||
|
<filename>*.rest</filename>
|
||||||
|
<mime_type>text/x-rst</mime_type>
|
||||||
|
<mime_type>text/prs.fallenstein.rst</mime_type>
|
||||||
|
</config>
|
||||||
|
<rules>
|
||||||
|
<state name="root">
|
||||||
|
<rule pattern="^(=+|-+|`+|:+|\.+|\'+|"+|~+|\^+|_+|\*+|\++|#+)([ \t]*\n)(.+)(\n)(\1)(\n)"><bygroups><token type="GenericHeading"/><token type="Text"/><token type="GenericHeading"/><token type="Text"/><token type="GenericHeading"/><token type="Text"/></bygroups></rule>
|
||||||
|
<rule pattern="^(\S.*)(\n)(={3,}|-{3,}|`{3,}|:{3,}|\.{3,}|\'{3,}|"{3,}|~{3,}|\^{3,}|_{3,}|\*{3,}|\+{3,}|#{3,})(\n)"><bygroups><token type="GenericHeading"/><token type="Text"/><token type="GenericHeading"/><token type="Text"/></bygroups></rule>
|
||||||
|
<rule pattern="^(\s*)([-*+])( .+\n(?:\1 .+\n)*)"><bygroups><token type="Text"/><token type="LiteralNumber"/><usingself state="inline"/></bygroups></rule>
|
||||||
|
<rule pattern="^(\s*)([0-9#ivxlcmIVXLCM]+\.)( .+\n(?:\1 .+\n)*)"><bygroups><token type="Text"/><token type="LiteralNumber"/><usingself state="inline"/></bygroups></rule>
|
||||||
|
<rule pattern="^(\s*)(\(?[0-9#ivxlcmIVXLCM]+\))( .+\n(?:\1 .+\n)*)"><bygroups><token type="Text"/><token type="LiteralNumber"/><usingself state="inline"/></bygroups></rule>
|
||||||
|
<rule pattern="^(\s*)([A-Z]+\.)( .+\n(?:\1 .+\n)+)"><bygroups><token type="Text"/><token type="LiteralNumber"/><usingself state="inline"/></bygroups></rule>
|
||||||
|
<rule pattern="^(\s*)(\(?[A-Za-z]+\))( .+\n(?:\1 .+\n)+)"><bygroups><token type="Text"/><token type="LiteralNumber"/><usingself state="inline"/></bygroups></rule>
|
||||||
|
<rule pattern="^(\s*)(\|)( .+\n(?:\| .+\n)*)"><bygroups><token type="Text"/><token type="Operator"/><usingself state="inline"/></bygroups></rule>
|
||||||
|
<rule pattern="^( *\.\.)(\s*)((?:source)?code(?:-block)?)(::)([ \t]*)([^\n]+)(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\8.*)?\n)+)">
|
||||||
|
<bygroups>
|
||||||
|
<token type="Punctuation"/>
|
||||||
|
<token type="Text"/>
|
||||||
|
<token type="OperatorWord"/>
|
||||||
|
<token type="Punctuation"/>
|
||||||
|
<token type="Text"/>
|
||||||
|
<token type="Keyword"/>
|
||||||
|
<token type="Text"/>
|
||||||
|
<token type="Text"/>
|
||||||
|
<UsingByGroup lexer="6" content="9,10,11"/>
|
||||||
|
</bygroups>
|
||||||
|
</rule>
|
||||||
|
<rule pattern="^( *\.\.)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))">
|
||||||
|
<bygroups>
|
||||||
|
<token type="Punctuation"/>
|
||||||
|
<token type="Text"/>
|
||||||
|
<token type="OperatorWord"/>
|
||||||
|
<token type="Punctuation"/>
|
||||||
|
<token type="Text"/>
|
||||||
|
<usingself state="inline"/>
|
||||||
|
</bygroups>
|
||||||
|
</rule>
|
||||||
|
<rule pattern="^( *\.\.)(\s*)(_(?:[^:\\]|\\.)+:)(.*?)$"><bygroups><token type="Punctuation"/><token type="Text"/><token type="NameTag"/><usingself state="inline"/></bygroups></rule>
|
||||||
|
<rule pattern="^( *\.\.)(\s*)(\[.+\])(.*?)$"><bygroups><token type="Punctuation"/><token type="Text"/><token type="NameTag"/><usingself state="inline"/></bygroups></rule>
|
||||||
|
<rule pattern="^( *\.\.)(\s*)(\|.+\|)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))"><bygroups><token type="Punctuation"/><token type="Text"/><token type="NameTag"/><token type="Text"/><token type="OperatorWord"/><token type="Punctuation"/><token type="Text"/><usingself state="inline"/></bygroups></rule>
|
||||||
|
<rule pattern="^ *\.\..*(\n( +.*\n|\n)+)?"><token type="Comment"/></rule>
|
||||||
|
<rule pattern="^( *)(:(?:\\\\|\\:|[^:\n])+:(?=\s))([ \t]*)"><bygroups><token type="Text"/><token type="NameClass"/><token type="Text"/></bygroups></rule>
|
||||||
|
<rule pattern="^(\S.*(?<!::)\n)((?:(?: +.*)\n)+)"><bygroups><usingself state="inline"/><usingself state="inline"/></bygroups></rule>
|
||||||
|
<rule pattern="(::)(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\3.*)?\n)+)"><bygroups><token type="LiteralStringEscape"/><token type="Text"/><token type="LiteralString"/><token type="LiteralString"/><token type="Text"/><token type="LiteralString"/></bygroups></rule>
|
||||||
|
<rule><include state="inline"/></rule>
|
||||||
|
</state>
|
||||||
|
<state name="inline">
|
||||||
|
<rule pattern="\\."><token type="Text"/></rule>
|
||||||
|
<rule pattern="``"><token type="LiteralString"/><push state="literal"/></rule>
|
||||||
|
<rule pattern="(`.+?)(<.+?>)(`__?)"><bygroups><token type="LiteralString"/><token type="LiteralStringInterpol"/><token type="LiteralString"/></bygroups></rule>
|
||||||
|
<rule pattern="`.+?`__?"><token type="LiteralString"/></rule>
|
||||||
|
<rule pattern="(`.+?`)(:[a-zA-Z0-9:-]+?:)?"><bygroups><token type="NameVariable"/><token type="NameAttribute"/></bygroups></rule>
|
||||||
|
<rule pattern="(:[a-zA-Z0-9:-]+?:)(`.+?`)"><bygroups><token type="NameAttribute"/><token type="NameVariable"/></bygroups></rule>
|
||||||
|
<rule pattern="\*\*.+?\*\*"><token type="GenericStrong"/></rule>
|
||||||
|
<rule pattern="\*.+?\*"><token type="GenericEmph"/></rule>
|
||||||
|
<rule pattern="\[.*?\]_"><token type="LiteralString"/></rule>
|
||||||
|
<rule pattern="<.+?>"><token type="NameTag"/></rule>
|
||||||
|
<rule pattern="[^\\\n\[*`:]+"><token type="Text"/></rule>
|
||||||
|
<rule pattern="."><token type="Text"/></rule>
|
||||||
|
</state>
|
||||||
|
<state name="literal">
|
||||||
|
<rule pattern="[^`]+"><token type="LiteralString"/></rule>
|
||||||
|
<rule pattern="``((?=$)|(?=[-/:.,; \n\x00‐‑‒–— '"\)\]\}>’”»!\?]))"><token type="LiteralString"/><pop depth="1"/></rule>
|
||||||
|
<rule pattern="`"><token type="LiteralString"/></rule>
|
||||||
|
</state>
|
||||||
|
</rules>
|
||||||
|
</lexer>
|
||||||
|
|
@ -40,15 +40,18 @@ for fname in glob.glob("lexers/*.xml"):
|
|||||||
with open("src/constants/lexers.cr", "w") as f:
|
with open("src/constants/lexers.cr", "w") as f:
|
||||||
f.write("module Tartrazine\n")
|
f.write("module Tartrazine\n")
|
||||||
f.write(" LEXERS_BY_NAME = {\n")
|
f.write(" LEXERS_BY_NAME = {\n")
|
||||||
for k, v in lexer_by_name.items():
|
for k in sorted(lexer_by_name.keys()):
|
||||||
|
v = lexer_by_name[k]
|
||||||
f.write(f'"{k}" => "{v}", \n')
|
f.write(f'"{k}" => "{v}", \n')
|
||||||
f.write("}\n")
|
f.write("}\n")
|
||||||
f.write(" LEXERS_BY_MIMETYPE = {\n")
|
f.write(" LEXERS_BY_MIMETYPE = {\n")
|
||||||
for k, v in lexer_by_mimetype.items():
|
for k in sorted(lexer_by_mimetype.keys()):
|
||||||
|
v = lexer_by_mimetype[k]
|
||||||
f.write(f'"{k}" => "{v}", \n')
|
f.write(f'"{k}" => "{v}", \n')
|
||||||
f.write("}\n")
|
f.write("}\n")
|
||||||
f.write(" LEXERS_BY_FILENAME = {\n")
|
f.write(" LEXERS_BY_FILENAME = {\n")
|
||||||
for k, v in lexer_by_filename.items():
|
for k in sorted(lexer_by_filename.keys()):
|
||||||
|
v = lexer_by_filename[k]
|
||||||
f.write(f'"{k}" => {str(list(v)).replace("'", "\"")}, \n')
|
f.write(f'"{k}" => {str(list(v)).replace("'", "\"")}, \n')
|
||||||
f.write("}\n")
|
f.write("}\n")
|
||||||
f.write("end\n")
|
f.write("end\n")
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
name: tartrazine
|
name: tartrazine
|
||||||
version: 0.4.0
|
version: 0.6.0
|
||||||
|
|
||||||
authors:
|
authors:
|
||||||
- Roberto Alsina <roberto.alsina@gmail.com>
|
- Roberto Alsina <roberto.alsina@gmail.com>
|
||||||
|
@ -72,8 +72,8 @@ end
|
|||||||
|
|
||||||
# Helper that creates lexer and tokenizes
|
# Helper that creates lexer and tokenizes
|
||||||
def tokenize(lexer_name, text)
|
def tokenize(lexer_name, text)
|
||||||
lexer = Tartrazine.lexer(lexer_name)
|
tokenizer = Tartrazine.lexer(lexer_name).tokenizer(text)
|
||||||
lexer.tokenize(text)
|
Tartrazine::Lexer.collapse_tokens(tokenizer.to_a)
|
||||||
end
|
end
|
||||||
|
|
||||||
# Helper that tokenizes using chroma to validate the lexer
|
# Helper that tokenizes using chroma to validate the lexer
|
||||||
|
138
src/actions.cr
138
src/actions.cr
@ -8,19 +8,32 @@ require "./tartrazine"
|
|||||||
# perform a list of actions. These actions can emit tokens
|
# perform a list of actions. These actions can emit tokens
|
||||||
# or change the state machine.
|
# or change the state machine.
|
||||||
module Tartrazine
|
module Tartrazine
|
||||||
class Action
|
enum ActionType
|
||||||
property type : String
|
Bygroups
|
||||||
property xml : XML::Node
|
Combined
|
||||||
|
Include
|
||||||
|
Pop
|
||||||
|
Push
|
||||||
|
Token
|
||||||
|
Using
|
||||||
|
Usingbygroup
|
||||||
|
Usingself
|
||||||
|
end
|
||||||
|
|
||||||
|
struct Action
|
||||||
property actions : Array(Action) = [] of Action
|
property actions : Array(Action) = [] of Action
|
||||||
|
|
||||||
property token_type : String = ""
|
@content_index : Array(Int32) = [] of Int32
|
||||||
property states_to_push : Array(String) = [] of String
|
@depth : Int32 = 0
|
||||||
property depth = 0
|
@lexer_index : Int32 = 0
|
||||||
property lexer_name : String = ""
|
@lexer_name : String = ""
|
||||||
property states_to_combine : Array(String) = [] of String
|
@states : Array(String) = [] of String
|
||||||
|
@states_to_push : Array(String) = [] of String
|
||||||
|
@token_type : String = ""
|
||||||
|
@type : ActionType = ActionType::Token
|
||||||
|
|
||||||
def initialize(@type : String, @xml : XML::Node?)
|
def initialize(t : String, xml : XML::Node?)
|
||||||
# Extract information from the XML node we will use later
|
@type = ActionType.parse(t.capitalize)
|
||||||
|
|
||||||
# Some actions may have actions in them, like this:
|
# Some actions may have actions in them, like this:
|
||||||
# <bygroups>
|
# <bygroups>
|
||||||
@ -31,61 +44,56 @@ module Tartrazine
|
|||||||
#
|
#
|
||||||
# The token actions match with the first 2 groups in the regex
|
# The token actions match with the first 2 groups in the regex
|
||||||
# the using action matches the 3rd and shunts it to another lexer
|
# the using action matches the 3rd and shunts it to another lexer
|
||||||
|
xml.children.each do |node|
|
||||||
known_types = %w(token push pop bygroups using usingself include combined)
|
|
||||||
raise Exception.new(
|
|
||||||
"Unknown action type: #{@type}") unless known_types.includes? @type
|
|
||||||
|
|
||||||
@xml.children.each do |node|
|
|
||||||
next unless node.element?
|
next unless node.element?
|
||||||
@actions << Action.new(node.name, node)
|
@actions << Action.new(node.name, node)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# Prefetch the attributes we ned from the XML and keep them
|
||||||
case @type
|
case @type
|
||||||
when "token"
|
when ActionType::Token
|
||||||
@token_type = xml["type"]? || ""
|
@token_type = xml["type"]
|
||||||
when "push"
|
when ActionType::Push
|
||||||
@states_to_push = xml.attributes.select { |attrib|
|
@states_to_push = xml.attributes.select { |attrib|
|
||||||
attrib.name == "state"
|
attrib.name == "state"
|
||||||
}.map &.content || [] of String
|
}.map &.content
|
||||||
when "pop"
|
when ActionType::Pop
|
||||||
@depth = xml["depth"]?.try &.to_i || 0
|
@depth = xml["depth"].to_i
|
||||||
when "using"
|
when ActionType::Using
|
||||||
@lexer_name = xml["lexer"]?.try &.downcase || ""
|
@lexer_name = xml["lexer"].downcase
|
||||||
when "combined"
|
when ActionType::Combined
|
||||||
@states_to_combine = xml.attributes.select { |attrib|
|
@states = xml.attributes.select { |attrib|
|
||||||
attrib.name == "state"
|
attrib.name == "state"
|
||||||
}.map &.content
|
}.map &.content
|
||||||
|
when ActionType::Usingbygroup
|
||||||
|
@lexer_index = xml["lexer"].to_i
|
||||||
|
@content_index = xml["content"].split(",").map(&.to_i)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
# ameba:disable Metrics/CyclomaticComplexity
|
# ameba:disable Metrics/CyclomaticComplexity
|
||||||
def emit(match : MatchData, lexer : Lexer, match_group = 0) : Array(Token)
|
def emit(match : MatchData, tokenizer : Tokenizer, match_group = 0) : Array(Token)
|
||||||
case type
|
case @type
|
||||||
when "token"
|
when ActionType::Token
|
||||||
raise Exception.new "Can't have a token without a match" if match.empty?
|
raise Exception.new "Can't have a token without a match" if match.empty?
|
||||||
[Token.new(type: @token_type, value: String.new(match[match_group].value))]
|
[Token.new(type: @token_type, value: String.new(match[match_group].value))]
|
||||||
when "push"
|
when ActionType::Push
|
||||||
if @states_to_push.empty?
|
to_push = @states_to_push.empty? ? [tokenizer.state_stack.last] : @states_to_push
|
||||||
# Push without a state means push the current state
|
to_push.each do |state|
|
||||||
@states_to_push = [lexer.state_stack.last]
|
if state == "#pop" && tokenizer.state_stack.size > 1
|
||||||
end
|
|
||||||
@states_to_push.each do |state|
|
|
||||||
if state == "#pop"
|
|
||||||
# Pop the state
|
# Pop the state
|
||||||
lexer.state_stack.pop
|
tokenizer.state_stack.pop
|
||||||
else
|
else
|
||||||
# Really push
|
# Really push
|
||||||
lexer.state_stack << state
|
tokenizer.state_stack << state
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
[] of Token
|
[] of Token
|
||||||
when "pop"
|
when ActionType::Pop
|
||||||
if lexer.state_stack.size > @depth
|
to_pop = [@depth, tokenizer.state_stack.size - 1].min
|
||||||
lexer.state_stack.pop(@depth)
|
tokenizer.state_stack.pop(to_pop)
|
||||||
end
|
|
||||||
[] of Token
|
[] of Token
|
||||||
when "bygroups"
|
when ActionType::Bygroups
|
||||||
# FIXME: handle
|
# FIXME: handle
|
||||||
# ><bygroups>
|
# ><bygroups>
|
||||||
# <token type="Punctuation"/>
|
# <token type="Punctuation"/>
|
||||||
@ -94,7 +102,7 @@ module Tartrazine
|
|||||||
#
|
#
|
||||||
# where that None means skipping a group
|
# where that None means skipping a group
|
||||||
#
|
#
|
||||||
raise Exception.new "Can't have a token without a match" if match.empty?
|
raise Exception.new "Can't have a token without a match" if match.nil?
|
||||||
|
|
||||||
# Each group matches an action. If the group match is empty,
|
# Each group matches an action. If the group match is empty,
|
||||||
# the action is skipped.
|
# the action is skipped.
|
||||||
@ -103,33 +111,47 @@ module Tartrazine
|
|||||||
begin
|
begin
|
||||||
next if match[i + 1].size == 0
|
next if match[i + 1].size == 0
|
||||||
rescue IndexError
|
rescue IndexError
|
||||||
# No match for the last group
|
# FIXME: This should not actually happen
|
||||||
|
# No match for this group
|
||||||
next
|
next
|
||||||
end
|
end
|
||||||
result += e.emit(match, lexer, i + 1)
|
result += e.emit(match, tokenizer, i + 1)
|
||||||
end
|
end
|
||||||
result
|
result
|
||||||
when "using"
|
when ActionType::Using
|
||||||
# Shunt to another lexer entirely
|
# Shunt to another lexer entirely
|
||||||
return [] of Token if match.empty?
|
return [] of Token if match.empty?
|
||||||
Tartrazine.lexer(@lexer_name).tokenize(String.new(match[match_group].value), usingself: true)
|
Tartrazine.lexer(@lexer_name).tokenizer(
|
||||||
when "usingself"
|
String.new(match[match_group].value),
|
||||||
|
secondary: true).to_a
|
||||||
|
when ActionType::Usingself
|
||||||
# Shunt to another copy of this lexer
|
# Shunt to another copy of this lexer
|
||||||
return [] of Token if match.empty?
|
return [] of Token if match.empty?
|
||||||
new_lexer = Lexer.from_xml(lexer.xml)
|
tokenizer.lexer.tokenizer(
|
||||||
new_lexer.tokenize(String.new(match[match_group].value), usingself: true)
|
String.new(match[match_group].value),
|
||||||
when "combined"
|
secondary: true).to_a
|
||||||
# Combine two states into one anonymous state
|
when ActionType::Combined
|
||||||
new_state = @states_to_combine.map { |name|
|
# Combine two or more states into one anonymous state
|
||||||
lexer.states[name]
|
new_state = @states.map { |name|
|
||||||
|
tokenizer.lexer.states[name]
|
||||||
}.reduce { |state1, state2|
|
}.reduce { |state1, state2|
|
||||||
state1 + state2
|
state1 + state2
|
||||||
}
|
}
|
||||||
lexer.states[new_state.name] = new_state
|
tokenizer.lexer.states[new_state.name] = new_state
|
||||||
lexer.state_stack << new_state.name
|
tokenizer.state_stack << new_state.name
|
||||||
[] of Token
|
[] of Token
|
||||||
|
when ActionType::Usingbygroup
|
||||||
|
# Shunt to content-specified lexer
|
||||||
|
return [] of Token if match.empty?
|
||||||
|
content = ""
|
||||||
|
@content_index.each do |i|
|
||||||
|
content += String.new(match[i].value)
|
||||||
|
end
|
||||||
|
Tartrazine.lexer(String.new(match[@lexer_index].value)).tokenizer(
|
||||||
|
content,
|
||||||
|
secondary: true).to_a
|
||||||
else
|
else
|
||||||
raise Exception.new("Unhandled action type: #{type}")
|
raise Exception.new("Unknown action type: #{@type}")
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -3,7 +3,7 @@ module BytesRegex
|
|||||||
|
|
||||||
class Regex
|
class Regex
|
||||||
def initialize(pattern : String, multiline = false, dotall = false, ignorecase = false, anchored = false)
|
def initialize(pattern : String, multiline = false, dotall = false, ignorecase = false, anchored = false)
|
||||||
flags = LibPCRE2::UTF | LibPCRE2::DUPNAMES | LibPCRE2::UCP | LibPCRE2::NO_UTF_CHECK
|
flags = LibPCRE2::UTF | LibPCRE2::UCP | LibPCRE2::NO_UTF_CHECK
|
||||||
flags |= LibPCRE2::MULTILINE if multiline
|
flags |= LibPCRE2::MULTILINE if multiline
|
||||||
flags |= LibPCRE2::DOTALL if dotall
|
flags |= LibPCRE2::DOTALL if dotall
|
||||||
flags |= LibPCRE2::CASELESS if ignorecase
|
flags |= LibPCRE2::CASELESS if ignorecase
|
||||||
@ -31,7 +31,6 @@ module BytesRegex
|
|||||||
end
|
end
|
||||||
|
|
||||||
def match(str : Bytes, pos = 0) : Array(Match)
|
def match(str : Bytes, pos = 0) : Array(Match)
|
||||||
match = [] of Match
|
|
||||||
rc = LibPCRE2.match(
|
rc = LibPCRE2.match(
|
||||||
@re,
|
@re,
|
||||||
str,
|
str,
|
||||||
@ -40,24 +39,25 @@ module BytesRegex
|
|||||||
LibPCRE2::NO_UTF_CHECK,
|
LibPCRE2::NO_UTF_CHECK,
|
||||||
@match_data,
|
@match_data,
|
||||||
nil)
|
nil)
|
||||||
if rc >= 0
|
if rc > 0
|
||||||
ovector = LibPCRE2.get_ovector_pointer(@match_data)
|
ovector = LibPCRE2.get_ovector_pointer(@match_data)
|
||||||
(0...rc).each do |i|
|
(0...rc).map do |i|
|
||||||
m_start = ovector[2 * i]
|
m_start = ovector[2 * i]
|
||||||
m_size = ovector[2 * i + 1] - m_start
|
m_end = ovector[2 * i + 1]
|
||||||
if m_size == 0
|
if m_start == m_end
|
||||||
m_value = Bytes.new(0)
|
m_value = Bytes.new(0)
|
||||||
else
|
else
|
||||||
m_value = str[m_start...m_start + m_size]
|
m_value = str[m_start...m_end]
|
||||||
end
|
end
|
||||||
match << Match.new(m_value, m_start, m_size)
|
Match.new(m_value, m_start, m_end - m_start)
|
||||||
end
|
end
|
||||||
|
else
|
||||||
|
[] of Match
|
||||||
end
|
end
|
||||||
match
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
class Match
|
struct Match
|
||||||
property value : Bytes
|
property value : Bytes
|
||||||
property start : UInt64
|
property start : UInt64
|
||||||
property size : UInt64
|
property size : UInt64
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -12,6 +12,10 @@ module Tartrazine
|
|||||||
property theme : Theme = Tartrazine.theme("default-dark")
|
property theme : Theme = Tartrazine.theme("default-dark")
|
||||||
|
|
||||||
# Format the text using the given lexer.
|
# Format the text using the given lexer.
|
||||||
|
def format(text : String, lexer : Lexer, io : IO = nil) : Nil
|
||||||
|
raise Exception.new("Not implemented")
|
||||||
|
end
|
||||||
|
|
||||||
def format(text : String, lexer : Lexer) : String
|
def format(text : String, lexer : Lexer) : String
|
||||||
raise Exception.new("Not implemented")
|
raise Exception.new("Not implemented")
|
||||||
end
|
end
|
||||||
|
@ -7,17 +7,27 @@ module Tartrazine
|
|||||||
def initialize(@theme : Theme = Tartrazine.theme("default-dark"), @line_numbers : Bool = false)
|
def initialize(@theme : Theme = Tartrazine.theme("default-dark"), @line_numbers : Bool = false)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
private def line_label(i : Int32) : String
|
||||||
|
"#{i + 1}".rjust(4).ljust(5)
|
||||||
|
end
|
||||||
|
|
||||||
def format(text : String, lexer : Lexer) : String
|
def format(text : String, lexer : Lexer) : String
|
||||||
output = String.build do |outp|
|
outp = String::Builder.new("")
|
||||||
lexer.group_tokens_in_lines(lexer.tokenize(text)).each_with_index do |line, i|
|
format(text, lexer, outp)
|
||||||
label = line_numbers? ? "#{i + 1}".rjust(4).ljust(5) : ""
|
outp.to_s
|
||||||
outp << label
|
end
|
||||||
line.each do |token|
|
|
||||||
outp << colorize(token[:value], token[:type])
|
def format(text : String, lexer : BaseLexer, outp : IO) : Nil
|
||||||
end
|
tokenizer = lexer.tokenizer(text)
|
||||||
|
i = 0
|
||||||
|
outp << line_label(i) if line_numbers?
|
||||||
|
tokenizer.each do |token|
|
||||||
|
outp << colorize(token[:value], token[:type])
|
||||||
|
if token[:value].includes?("\n")
|
||||||
|
i += 1
|
||||||
|
outp << line_label(i) if line_numbers?
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
output
|
|
||||||
end
|
end
|
||||||
|
|
||||||
def colorize(text : String, token : String) : String
|
def colorize(text : String, token : String) : String
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
require "../constants/token_abbrevs.cr"
|
require "../constants/token_abbrevs.cr"
|
||||||
require "../formatter"
|
require "../formatter"
|
||||||
|
require "html"
|
||||||
|
|
||||||
module Tartrazine
|
module Tartrazine
|
||||||
class Html < Formatter
|
class Html < Formatter
|
||||||
@ -34,46 +35,52 @@ module Tartrazine
|
|||||||
end
|
end
|
||||||
|
|
||||||
def format(text : String, lexer : Lexer) : String
|
def format(text : String, lexer : Lexer) : String
|
||||||
text = format_text(text, lexer)
|
outp = String::Builder.new("")
|
||||||
if standalone?
|
format(text, lexer, outp)
|
||||||
text = wrap_standalone(text)
|
outp.to_s
|
||||||
end
|
end
|
||||||
text
|
|
||||||
|
def format(text : String, lexer : BaseLexer, io : IO) : Nil
|
||||||
|
pre, post = wrap_standalone
|
||||||
|
io << pre if standalone?
|
||||||
|
format_text(text, lexer, io)
|
||||||
|
io << post if standalone?
|
||||||
end
|
end
|
||||||
|
|
||||||
# Wrap text into a full HTML document, including the CSS for the theme
|
# Wrap text into a full HTML document, including the CSS for the theme
|
||||||
def wrap_standalone(text) : String
|
def wrap_standalone
|
||||||
output = String.build do |outp|
|
output = String.build do |outp|
|
||||||
outp << "<!DOCTYPE html><html><head><style>"
|
outp << "<!DOCTYPE html><html><head><style>"
|
||||||
outp << style_defs
|
outp << style_defs
|
||||||
outp << "</style></head><body>"
|
outp << "</style></head><body>"
|
||||||
outp << text
|
|
||||||
outp << "</body></html>"
|
|
||||||
end
|
end
|
||||||
output
|
{output.to_s, "</body></html>"}
|
||||||
end
|
end
|
||||||
|
|
||||||
def format_text(text : String, lexer : Lexer) : String
|
private def line_label(i : Int32) : String
|
||||||
lines = lexer.group_tokens_in_lines(lexer.tokenize(text))
|
line_label = "#{i + 1}".rjust(4).ljust(5)
|
||||||
output = String.build do |outp|
|
line_class = highlighted?(i + 1) ? "class=\"#{get_css_class("LineHighlight")}\"" : ""
|
||||||
if surrounding_pre?
|
line_id = linkable_line_numbers? ? "id=\"#{line_number_id_prefix}#{i + 1}\"" : ""
|
||||||
pre_style = wrap_long_lines? ? "style=\"white-space: pre-wrap; word-break: break-word;\"" : ""
|
"<span #{line_id} #{line_class} style=\"user-select: none;\">#{line_label} </span>"
|
||||||
outp << "<pre class=\"#{get_css_class("Background")}\" #{pre_style}>"
|
end
|
||||||
end
|
|
||||||
outp << "<code class=\"#{get_css_class("Background")}\">"
|
def format_text(text : String, lexer : BaseLexer, outp : IO)
|
||||||
lines.each_with_index(offset: line_number_start - 1) do |line, i|
|
tokenizer = lexer.tokenizer(text)
|
||||||
line_label = line_numbers? ? "#{i + 1}".rjust(4).ljust(5) : ""
|
i = 0
|
||||||
line_class = highlighted?(i + 1) ? "class=\"#{get_css_class("LineHighlight")}\"" : ""
|
if surrounding_pre?
|
||||||
line_id = linkable_line_numbers? ? "id=\"#{line_number_id_prefix}#{i + 1}\"" : ""
|
pre_style = wrap_long_lines? ? "style=\"white-space: pre-wrap; word-break: break-word;\"" : ""
|
||||||
outp << "<span #{line_id} #{line_class} style=\"user-select: none;\">#{line_label} </span>"
|
outp << "<pre class=\"#{get_css_class("Background")}\" #{pre_style}>"
|
||||||
line.each do |token|
|
|
||||||
fragment = "<span class=\"#{get_css_class(token[:type])}\">#{token[:value]}</span>"
|
|
||||||
outp << fragment
|
|
||||||
end
|
|
||||||
end
|
|
||||||
outp << "</code></pre>"
|
|
||||||
end
|
end
|
||||||
output
|
outp << "<code class=\"#{get_css_class("Background")}\">"
|
||||||
|
outp << line_label(i) if line_numbers?
|
||||||
|
tokenizer.each do |token|
|
||||||
|
outp << "<span class=\"#{get_css_class(token[:type])}\">#{HTML.escape(token[:value])}</span>"
|
||||||
|
if token[:value].ends_with? "\n"
|
||||||
|
i += 1
|
||||||
|
outp << line_label(i) if line_numbers?
|
||||||
|
end
|
||||||
|
end
|
||||||
|
outp << "</code></pre>"
|
||||||
end
|
end
|
||||||
|
|
||||||
# ameba:disable Metrics/CyclomaticComplexity
|
# ameba:disable Metrics/CyclomaticComplexity
|
||||||
@ -104,15 +111,17 @@ module Tartrazine
|
|||||||
|
|
||||||
# Given a token type, return the CSS class to use.
|
# Given a token type, return the CSS class to use.
|
||||||
def get_css_class(token : String) : String
|
def get_css_class(token : String) : String
|
||||||
return class_prefix + Abbreviations[token] if theme.styles.has_key?(token)
|
if !theme.styles.has_key? token
|
||||||
|
# Themes don't contain information for each specific
|
||||||
# Themes don't contain information for each specific
|
# token type. However, they may contain information
|
||||||
# token type. However, they may contain information
|
# for a parent style. Worst case, we go to the root
|
||||||
# for a parent style. Worst case, we go to the root
|
# (Background) style.
|
||||||
# (Background) style.
|
parent = theme.style_parents(token).reverse.find { |dad|
|
||||||
class_prefix + Abbreviations[theme.style_parents(token).reverse.find { |parent|
|
theme.styles.has_key?(dad)
|
||||||
theme.styles.has_key?(parent)
|
}
|
||||||
}]
|
theme.styles[token] = theme.styles[parent]
|
||||||
|
end
|
||||||
|
class_prefix + Abbreviations[token]
|
||||||
end
|
end
|
||||||
|
|
||||||
# Is this line in the highlighted ranges?
|
# Is this line in the highlighted ranges?
|
||||||
|
@ -4,8 +4,15 @@ module Tartrazine
|
|||||||
class Json < Formatter
|
class Json < Formatter
|
||||||
property name = "json"
|
property name = "json"
|
||||||
|
|
||||||
def format(text : String, lexer : Lexer, _theme : Theme) : String
|
def format(text : String, lexer : BaseLexer) : String
|
||||||
lexer.tokenize(text).to_json
|
outp = String::Builder.new("")
|
||||||
|
format(text, lexer, outp)
|
||||||
|
outp.to_s
|
||||||
|
end
|
||||||
|
|
||||||
|
def format(text : String, lexer : BaseLexer, io : IO) : Nil
|
||||||
|
tokenizer = lexer.tokenizer(text)
|
||||||
|
io << Tartrazine::Lexer.collapse_tokens(tokenizer.to_a).to_json
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -1,8 +1,12 @@
|
|||||||
require "yaml"
|
require "yaml"
|
||||||
|
|
||||||
module Tartrazine
|
# Use linguist's heuristics to disambiguate between languages
|
||||||
# Use linguist's heuristics to disambiguate between languages
|
# This is *shamelessly* stolen from https://github.com/github-linguist/linguist
|
||||||
|
# and ported to Crystal. Deepest thanks to the authors of Linguist
|
||||||
|
# for licensing it liberally.
|
||||||
|
#
|
||||||
|
# Consider this code (c) 2017 GitHub, Inc. even if I wrote it.
|
||||||
|
module Linguist
|
||||||
class Heuristic
|
class Heuristic
|
||||||
include YAML::Serializable
|
include YAML::Serializable
|
||||||
|
|
||||||
@ -34,12 +38,13 @@ module Tartrazine
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
class Rule
|
class LangRule
|
||||||
include YAML::Serializable
|
include YAML::Serializable
|
||||||
property pattern : (String | Array(String))?
|
property pattern : (String | Array(String))?
|
||||||
property negative_pattern : (String | Array(String))?
|
property negative_pattern : (String | Array(String))?
|
||||||
property named_pattern : String?
|
property named_pattern : String?
|
||||||
property and : Array(Rule)?
|
property and : Array(LangRule)?
|
||||||
|
property language : String | Array(String)?
|
||||||
|
|
||||||
# ameba:disable Metrics/CyclomaticComplexity
|
# ameba:disable Metrics/CyclomaticComplexity
|
||||||
def match(content, named_patterns)
|
def match(content, named_patterns)
|
||||||
@ -68,17 +73,9 @@ module Tartrazine
|
|||||||
result = p_arr.any? { |pat| ::Regex.new(pat).matches?(content) }
|
result = p_arr.any? { |pat| ::Regex.new(pat).matches?(content) }
|
||||||
end
|
end
|
||||||
if and
|
if and
|
||||||
result = and.as(Array(Rule)).all?(&.match(content, named_patterns))
|
result = and.as(Array(LangRule)).all?(&.match(content, named_patterns))
|
||||||
end
|
end
|
||||||
result
|
result
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
class LangRule < Rule
|
|
||||||
include YAML::Serializable
|
|
||||||
property language : String | Array(String)
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
|
||||||
# h = Tartrazine::Heuristic.from_yaml(File.read("heuristics/heuristics.yml"))
|
|
||||||
# p! h.run(ARGV[0], File.read(ARGV[0]))
|
|
||||||
|
312
src/lexer.cr
312
src/lexer.cr
@ -4,111 +4,169 @@ require "./constants/lexers"
|
|||||||
module Tartrazine
|
module Tartrazine
|
||||||
class LexerFiles
|
class LexerFiles
|
||||||
extend BakedFileSystem
|
extend BakedFileSystem
|
||||||
|
|
||||||
bake_folder "../lexers", __DIR__
|
bake_folder "../lexers", __DIR__
|
||||||
end
|
end
|
||||||
|
|
||||||
# Get the lexer object for a language name
|
# Get the lexer object for a language name
|
||||||
# FIXME: support mimetypes
|
# FIXME: support mimetypes
|
||||||
def self.lexer(name : String? = nil, filename : String? = nil) : Lexer
|
def self.lexer(name : String? = nil, filename : String? = nil) : BaseLexer
|
||||||
if name.nil? && filename.nil?
|
return lexer_by_name(name) if name && name != "autodetect"
|
||||||
|
return lexer_by_filename(filename) if filename
|
||||||
|
|
||||||
|
Lexer.from_xml(LexerFiles.get("/#{LEXERS_BY_NAME["plaintext"]}.xml").gets_to_end)
|
||||||
|
end
|
||||||
|
|
||||||
|
private def self.lexer_by_name(name : String) : BaseLexer
|
||||||
|
lexer_file_name = LEXERS_BY_NAME.fetch(name.downcase, nil)
|
||||||
|
return create_delegating_lexer(name) if lexer_file_name.nil? && name.includes? "+"
|
||||||
|
raise Exception.new("Unknown lexer: #{name}") if lexer_file_name.nil?
|
||||||
|
|
||||||
|
Lexer.from_xml(LexerFiles.get("/#{lexer_file_name}.xml").gets_to_end)
|
||||||
|
end
|
||||||
|
|
||||||
|
private def self.lexer_by_filename(filename : String) : BaseLexer
|
||||||
|
candidates = Set(String).new
|
||||||
|
LEXERS_BY_FILENAME.each do |k, v|
|
||||||
|
candidates += v.to_set if File.match?(k, File.basename(filename))
|
||||||
|
end
|
||||||
|
|
||||||
|
case candidates.size
|
||||||
|
when 0
|
||||||
lexer_file_name = LEXERS_BY_NAME["plaintext"]
|
lexer_file_name = LEXERS_BY_NAME["plaintext"]
|
||||||
elsif name && name != "autodetect"
|
when 1
|
||||||
lexer_file_name = LEXERS_BY_NAME[name.downcase]
|
lexer_file_name = candidates.first
|
||||||
else
|
else
|
||||||
# Guess by filename
|
lexer_file_name = self.lexer_by_content(filename)
|
||||||
candidates = Set(String).new
|
begin
|
||||||
LEXERS_BY_FILENAME.each do |k, v|
|
return self.lexer(lexer_file_name)
|
||||||
candidates += v.to_set if File.match?(k, File.basename(filename.to_s))
|
rescue ex : Exception
|
||||||
end
|
raise Exception.new("Multiple lexers match the filename: #{candidates.to_a.join(", ")}, heuristics suggest #{lexer_file_name} but there is no matching lexer.")
|
||||||
case candidates.size
|
|
||||||
when 0
|
|
||||||
lexer_file_name = LEXERS_BY_NAME["plaintext"]
|
|
||||||
when 1
|
|
||||||
lexer_file_name = candidates.first
|
|
||||||
else
|
|
||||||
raise Exception.new("Multiple lexers match the filename: #{candidates.to_a.join(", ")}")
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
Lexer.from_xml(LexerFiles.get("/#{lexer_file_name}.xml").gets_to_end)
|
Lexer.from_xml(LexerFiles.get("/#{lexer_file_name}.xml").gets_to_end)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
private def self.lexer_by_content(fname : String) : String?
|
||||||
|
h = Linguist::Heuristic.from_yaml(LexerFiles.get("/heuristics.yml").gets_to_end)
|
||||||
|
result = h.run(fname, File.read(fname))
|
||||||
|
case result
|
||||||
|
when Nil
|
||||||
|
raise Exception.new "No lexer found for #{fname}"
|
||||||
|
when String
|
||||||
|
result.as(String)
|
||||||
|
when Array(String)
|
||||||
|
result.first
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
private def self.create_delegating_lexer(name : String) : BaseLexer
|
||||||
|
language, root = name.split("+", 2)
|
||||||
|
language_lexer = lexer(language)
|
||||||
|
root_lexer = lexer(root)
|
||||||
|
DelegatingLexer.new(language_lexer, root_lexer)
|
||||||
|
end
|
||||||
|
|
||||||
# Return a list of all lexers
|
# Return a list of all lexers
|
||||||
def self.lexers : Array(String)
|
def self.lexers : Array(String)
|
||||||
LEXERS_BY_NAME.keys.sort!
|
LEXERS_BY_NAME.keys.sort!
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# A token, the output of the tokenizer
|
||||||
|
alias Token = NamedTuple(type: String, value: String)
|
||||||
|
|
||||||
|
abstract class BaseTokenizer
|
||||||
|
end
|
||||||
|
|
||||||
|
class Tokenizer < BaseTokenizer
|
||||||
|
include Iterator(Token)
|
||||||
|
property lexer : BaseLexer
|
||||||
|
property text : Bytes
|
||||||
|
property pos : Int32 = 0
|
||||||
|
@dq = Deque(Token).new
|
||||||
|
property state_stack = ["root"]
|
||||||
|
|
||||||
|
def initialize(@lexer : BaseLexer, text : String, secondary = false)
|
||||||
|
# Respect the `ensure_nl` config option
|
||||||
|
if text.size > 0 && text[-1] != '\n' && @lexer.config[:ensure_nl] && !secondary
|
||||||
|
text += "\n"
|
||||||
|
end
|
||||||
|
@text = text.to_slice
|
||||||
|
end
|
||||||
|
|
||||||
|
def next : Iterator::Stop | Token
|
||||||
|
if @dq.size > 0
|
||||||
|
return @dq.shift
|
||||||
|
end
|
||||||
|
if pos == @text.size
|
||||||
|
return stop
|
||||||
|
end
|
||||||
|
|
||||||
|
matched = false
|
||||||
|
while @pos < @text.size
|
||||||
|
@lexer.states[@state_stack.last].rules.each do |rule|
|
||||||
|
matched, new_pos, new_tokens = rule.match(@text, @pos, self)
|
||||||
|
if matched
|
||||||
|
@pos = new_pos
|
||||||
|
split_tokens(new_tokens).each { |token| @dq << token }
|
||||||
|
break
|
||||||
|
end
|
||||||
|
end
|
||||||
|
if !matched
|
||||||
|
if @text[@pos] == 10u8
|
||||||
|
@dq << {type: "Text", value: "\n"}
|
||||||
|
@state_stack = ["root"]
|
||||||
|
else
|
||||||
|
@dq << {type: "Error", value: String.new(@text[@pos..@pos])}
|
||||||
|
end
|
||||||
|
@pos += 1
|
||||||
|
break
|
||||||
|
end
|
||||||
|
end
|
||||||
|
self.next
|
||||||
|
end
|
||||||
|
|
||||||
|
# If a token contains a newline, split it into two tokens
|
||||||
|
def split_tokens(tokens : Array(Token)) : Array(Token)
|
||||||
|
split_tokens = [] of Token
|
||||||
|
tokens.each do |token|
|
||||||
|
if token[:value].includes?("\n")
|
||||||
|
values = token[:value].split("\n")
|
||||||
|
values.each_with_index do |value, index|
|
||||||
|
value += "\n" if index < values.size - 1
|
||||||
|
split_tokens << {type: token[:type], value: value}
|
||||||
|
end
|
||||||
|
else
|
||||||
|
split_tokens << token
|
||||||
|
end
|
||||||
|
end
|
||||||
|
split_tokens
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
abstract class BaseLexer
|
||||||
|
property config = {
|
||||||
|
name: "",
|
||||||
|
priority: 0.0,
|
||||||
|
case_insensitive: false,
|
||||||
|
dot_all: false,
|
||||||
|
not_multiline: false,
|
||||||
|
ensure_nl: false,
|
||||||
|
}
|
||||||
|
property states = {} of String => State
|
||||||
|
|
||||||
|
def tokenizer(text : String, secondary = false) : BaseTokenizer
|
||||||
|
Tokenizer.new(self, text, secondary)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
# This implements a lexer for Pygments RegexLexers as expressed
|
# This implements a lexer for Pygments RegexLexers as expressed
|
||||||
# in Chroma's XML serialization.
|
# in Chroma's XML serialization.
|
||||||
#
|
#
|
||||||
# For explanations on what actions and states do
|
# For explanations on what actions and states do
|
||||||
# the Pygments documentation is a good place to start.
|
# the Pygments documentation is a good place to start.
|
||||||
# https://pygments.org/docs/lexerdevelopment/
|
# https://pygments.org/docs/lexerdevelopment/
|
||||||
class Lexer
|
class Lexer < BaseLexer
|
||||||
property config = {
|
|
||||||
name: "",
|
|
||||||
aliases: [] of String,
|
|
||||||
filenames: [] of String,
|
|
||||||
mime_types: [] of String,
|
|
||||||
priority: 0.0,
|
|
||||||
case_insensitive: false,
|
|
||||||
dot_all: false,
|
|
||||||
not_multiline: false,
|
|
||||||
ensure_nl: false,
|
|
||||||
}
|
|
||||||
property xml : String = ""
|
|
||||||
|
|
||||||
property states = {} of String => State
|
|
||||||
|
|
||||||
property state_stack = ["root"]
|
|
||||||
|
|
||||||
# Turn the text into a list of tokens. The `usingself` parameter
|
|
||||||
# is true when the lexer is being used to tokenize a string
|
|
||||||
# from a larger text that is already being tokenized.
|
|
||||||
# So, when it's true, we don't modify the text.
|
|
||||||
def tokenize(text : String, usingself = false) : Array(Token)
|
|
||||||
@state_stack = ["root"]
|
|
||||||
tokens = [] of Token
|
|
||||||
pos = 0
|
|
||||||
matched = false
|
|
||||||
|
|
||||||
# Respect the `ensure_nl` config option
|
|
||||||
if text.size > 0 && text[-1] != '\n' && config[:ensure_nl] && !usingself
|
|
||||||
text += "\n"
|
|
||||||
end
|
|
||||||
|
|
||||||
text_bytes = text.to_slice
|
|
||||||
# Loop through the text, applying rules
|
|
||||||
while pos < text_bytes.size
|
|
||||||
state = states[@state_stack.last]
|
|
||||||
# Log.trace { "Stack is #{@state_stack} State is #{state.name}, pos is #{pos}, text is #{text[pos..pos + 10]}" }
|
|
||||||
state.rules.each do |rule|
|
|
||||||
matched, new_pos, new_tokens = rule.match(text_bytes, pos, self)
|
|
||||||
if matched
|
|
||||||
# Move position forward, save the tokens,
|
|
||||||
# tokenize from the new position
|
|
||||||
# Log.trace { "MATCHED: #{rule.xml}" }
|
|
||||||
pos = new_pos
|
|
||||||
tokens += new_tokens
|
|
||||||
break
|
|
||||||
end
|
|
||||||
# Log.trace { "NOT MATCHED: #{rule.xml}" }
|
|
||||||
end
|
|
||||||
# If no rule matches, emit an error token
|
|
||||||
unless matched
|
|
||||||
if text_bytes[pos] == 10u8
|
|
||||||
# at EOL, reset state to "root"
|
|
||||||
tokens << {type: "Text", value: "\n"}
|
|
||||||
@state_stack = ["root"]
|
|
||||||
else
|
|
||||||
tokens << {type: "Error", value: String.new(text_bytes[pos..pos])}
|
|
||||||
end
|
|
||||||
pos += 1
|
|
||||||
end
|
|
||||||
end
|
|
||||||
Lexer.collapse_tokens(tokens)
|
|
||||||
end
|
|
||||||
|
|
||||||
# Collapse consecutive tokens of the same type for easier comparison
|
# Collapse consecutive tokens of the same type for easier comparison
|
||||||
# and smaller output
|
# and smaller output
|
||||||
def self.collapse_tokens(tokens : Array(Tartrazine::Token)) : Array(Tartrazine::Token)
|
def self.collapse_tokens(tokens : Array(Tartrazine::Token)) : Array(Tartrazine::Token)
|
||||||
@ -131,34 +189,8 @@ module Tartrazine
|
|||||||
result
|
result
|
||||||
end
|
end
|
||||||
|
|
||||||
# Group tokens into lines, splitting them when a newline is found
|
|
||||||
def group_tokens_in_lines(tokens : Array(Token)) : Array(Array(Token))
|
|
||||||
split_tokens = [] of Token
|
|
||||||
tokens.each do |token|
|
|
||||||
if token[:value].includes?("\n")
|
|
||||||
values = token[:value].split("\n")
|
|
||||||
values.each_with_index do |value, index|
|
|
||||||
value += "\n" if index < values.size - 1
|
|
||||||
split_tokens << {type: token[:type], value: value}
|
|
||||||
end
|
|
||||||
else
|
|
||||||
split_tokens << token
|
|
||||||
end
|
|
||||||
end
|
|
||||||
lines = [Array(Token).new]
|
|
||||||
split_tokens.each do |token|
|
|
||||||
lines.last << token
|
|
||||||
if token[:value].includes?("\n")
|
|
||||||
lines << Array(Token).new
|
|
||||||
end
|
|
||||||
end
|
|
||||||
lines
|
|
||||||
end
|
|
||||||
|
|
||||||
# ameba:disable Metrics/CyclomaticComplexity
|
|
||||||
def self.from_xml(xml : String) : Lexer
|
def self.from_xml(xml : String) : Lexer
|
||||||
l = Lexer.new
|
l = Lexer.new
|
||||||
l.xml = xml
|
|
||||||
lexer = XML.parse(xml).first_element_child
|
lexer = XML.parse(xml).first_element_child
|
||||||
if lexer
|
if lexer
|
||||||
config = lexer.children.find { |node|
|
config = lexer.children.find { |node|
|
||||||
@ -167,9 +199,6 @@ module Tartrazine
|
|||||||
if config
|
if config
|
||||||
l.config = {
|
l.config = {
|
||||||
name: xml_to_s(config, name) || "",
|
name: xml_to_s(config, name) || "",
|
||||||
aliases: xml_to_a(config, _alias) || [] of String,
|
|
||||||
filenames: xml_to_a(config, filename) || [] of String,
|
|
||||||
mime_types: xml_to_a(config, mime_type) || [] of String,
|
|
||||||
priority: xml_to_f(config, priority) || 0.0,
|
priority: xml_to_f(config, priority) || 0.0,
|
||||||
not_multiline: xml_to_s(config, not_multiline) == "true",
|
not_multiline: xml_to_s(config, not_multiline) == "true",
|
||||||
dot_all: xml_to_s(config, dot_all) == "true",
|
dot_all: xml_to_s(config, dot_all) == "true",
|
||||||
@ -219,12 +248,66 @@ module Tartrazine
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# A lexer that takes two lexers as arguments. A root lexer
|
||||||
|
# and a language lexer. Everything is scalled using the
|
||||||
|
# language lexer, afterwards all `Other` tokens are lexed
|
||||||
|
# using the root lexer.
|
||||||
|
#
|
||||||
|
# This is useful for things like template languages, where
|
||||||
|
# you have Jinja + HTML or Jinja + CSS and so on.
|
||||||
|
class DelegatingLexer < BaseLexer
|
||||||
|
property language_lexer : BaseLexer
|
||||||
|
property root_lexer : BaseLexer
|
||||||
|
|
||||||
|
def initialize(@language_lexer : BaseLexer, @root_lexer : BaseLexer)
|
||||||
|
end
|
||||||
|
|
||||||
|
def tokenizer(text : String, secondary = false) : DelegatingTokenizer
|
||||||
|
DelegatingTokenizer.new(self, text, secondary)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# This Tokenizer works with a DelegatingLexer. It first tokenizes
|
||||||
|
# using the language lexer, and "Other" tokens are tokenized using
|
||||||
|
# the root lexer.
|
||||||
|
class DelegatingTokenizer < BaseTokenizer
|
||||||
|
include Iterator(Token)
|
||||||
|
@dq = Deque(Token).new
|
||||||
|
@language_tokenizer : BaseTokenizer
|
||||||
|
|
||||||
|
def initialize(@lexer : DelegatingLexer, text : String, secondary = false)
|
||||||
|
# Respect the `ensure_nl` config option
|
||||||
|
if text.size > 0 && text[-1] != '\n' && @lexer.config[:ensure_nl] && !secondary
|
||||||
|
text += "\n"
|
||||||
|
end
|
||||||
|
@language_tokenizer = @lexer.language_lexer.tokenizer(text, true)
|
||||||
|
end
|
||||||
|
|
||||||
|
def next : Iterator::Stop | Token
|
||||||
|
if @dq.size > 0
|
||||||
|
return @dq.shift
|
||||||
|
end
|
||||||
|
token = @language_tokenizer.next
|
||||||
|
if token.is_a? Iterator::Stop
|
||||||
|
return stop
|
||||||
|
elsif token.as(Token).[:type] == "Other"
|
||||||
|
root_tokenizer = @lexer.root_lexer.tokenizer(token.as(Token).[:value], true)
|
||||||
|
root_tokenizer.each do |root_token|
|
||||||
|
@dq << root_token
|
||||||
|
end
|
||||||
|
else
|
||||||
|
@dq << token.as(Token)
|
||||||
|
end
|
||||||
|
self.next
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
# A Lexer state. A state has a name and a list of rules.
|
# A Lexer state. A state has a name and a list of rules.
|
||||||
# The state machine has a state stack containing references
|
# The state machine has a state stack containing references
|
||||||
# to states to decide which rules to apply.
|
# to states to decide which rules to apply.
|
||||||
class State
|
struct State
|
||||||
property name : String = ""
|
property name : String = ""
|
||||||
property rules = [] of Rule
|
property rules = [] of BaseRule
|
||||||
|
|
||||||
def +(other : State)
|
def +(other : State)
|
||||||
new_state = State.new
|
new_state = State.new
|
||||||
@ -233,7 +316,4 @@ module Tartrazine
|
|||||||
new_state
|
new_state
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
# A token, the output of the tokenizer
|
|
||||||
alias Token = NamedTuple(type: String, value: String)
|
|
||||||
end
|
end
|
||||||
|
27
src/main.cr
27
src/main.cr
@ -1,18 +1,6 @@
|
|||||||
require "docopt"
|
require "docopt"
|
||||||
require "./**"
|
require "./**"
|
||||||
|
|
||||||
# Performance data (in milliseconds):
|
|
||||||
#
|
|
||||||
# Docopt parsing: 0.5
|
|
||||||
# Instantiating a theme: 0.1
|
|
||||||
# Instantiating a formatter: 1.0
|
|
||||||
# Instantiating a lexer: 2.0
|
|
||||||
# Tokenizing crycco.cr: 16.0
|
|
||||||
# Formatting: 0.5
|
|
||||||
# I/O: 1.5
|
|
||||||
# ---------------------------------
|
|
||||||
# Total: 21.6
|
|
||||||
|
|
||||||
HELP = <<-HELP
|
HELP = <<-HELP
|
||||||
tartrazine: a syntax highlighting tool
|
tartrazine: a syntax highlighting tool
|
||||||
|
|
||||||
@ -32,7 +20,8 @@ Usage:
|
|||||||
Options:
|
Options:
|
||||||
-f <formatter> Format to use (html, terminal, json)
|
-f <formatter> Format to use (html, terminal, json)
|
||||||
-t <theme> Theme to use, see --list-themes [default: default-dark]
|
-t <theme> Theme to use, see --list-themes [default: default-dark]
|
||||||
-l <lexer> Lexer (language) to use, see --list-lexers [default: autodetect]
|
-l <lexer> Lexer (language) to use, see --list-lexers. Use more than
|
||||||
|
one lexer with "+" (e.g. jinja+yaml) [default: autodetect]
|
||||||
-o <output> Output file. Default is stdout.
|
-o <output> Output file. Default is stdout.
|
||||||
--standalone Generate a standalone HTML file, which includes
|
--standalone Generate a standalone HTML file, which includes
|
||||||
all style information. If not given, it will generate just
|
all style information. If not given, it will generate just
|
||||||
@ -89,20 +78,20 @@ if options["-f"]
|
|||||||
|
|
||||||
if formatter.is_a?(Tartrazine::Html) && options["--css"]
|
if formatter.is_a?(Tartrazine::Html) && options["--css"]
|
||||||
File.open("#{options["-t"].as(String)}.css", "w") do |outf|
|
File.open("#{options["-t"].as(String)}.css", "w") do |outf|
|
||||||
outf.puts formatter.style_defs
|
outf << formatter.style_defs
|
||||||
end
|
end
|
||||||
exit 0
|
exit 0
|
||||||
end
|
end
|
||||||
|
|
||||||
lexer = Tartrazine.lexer(name: options["-l"].as(String), filename: options["FILE"].as(String))
|
lexer = Tartrazine.lexer(name: options["-l"].as(String), filename: options["FILE"].as(String))
|
||||||
|
|
||||||
input = File.open(options["FILE"].as(String)).gets_to_end
|
input = File.open(options["FILE"].as(String)).gets_to_end
|
||||||
output = formatter.format(input, lexer)
|
|
||||||
|
|
||||||
if options["-o"].nil?
|
if options["-o"].nil?
|
||||||
puts output
|
outf = STDOUT
|
||||||
else
|
else
|
||||||
File.open(options["-o"].as(String), "w") do |outf|
|
outf = File.open(options["-o"].as(String), "w")
|
||||||
outf.puts output
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
formatter.format(input, lexer, outf)
|
||||||
|
outf.close
|
||||||
end
|
end
|
||||||
|
69
src/rules.cr
69
src/rules.cr
@ -15,28 +15,11 @@ module Tartrazine
|
|||||||
alias Match = BytesRegex::Match
|
alias Match = BytesRegex::Match
|
||||||
alias MatchData = Array(Match)
|
alias MatchData = Array(Match)
|
||||||
|
|
||||||
class Rule
|
abstract struct BaseRule
|
||||||
property pattern : Regex = Regex.new ""
|
abstract def match(text : Bytes, pos : Int32, tokenizer : Tokenizer) : Tuple(Bool, Int32, Array(Token))
|
||||||
property actions : Array(Action) = [] of Action
|
abstract def initialize(node : XML::Node)
|
||||||
|
|
||||||
def match(text : Bytes, pos, lexer) : Tuple(Bool, Int32, Array(Token))
|
@actions : Array(Action) = [] of Action
|
||||||
match = pattern.match(text, pos)
|
|
||||||
# We don't match if the match doesn't move the cursor
|
|
||||||
# because that causes infinite loops
|
|
||||||
return false, pos, [] of Token if match.empty? || match[0].size == 0
|
|
||||||
tokens = [] of Token
|
|
||||||
actions.each do |action|
|
|
||||||
tokens += action.emit(match, lexer)
|
|
||||||
end
|
|
||||||
return true, pos + match[0].size, tokens
|
|
||||||
end
|
|
||||||
|
|
||||||
def initialize(node : XML::Node, multiline, dotall, ignorecase)
|
|
||||||
pattern = node["pattern"]
|
|
||||||
pattern = "(?m)" + pattern if multiline
|
|
||||||
@pattern = Regex.new(pattern, multiline, dotall, ignorecase, true)
|
|
||||||
add_actions(node)
|
|
||||||
end
|
|
||||||
|
|
||||||
def add_actions(node : XML::Node)
|
def add_actions(node : XML::Node)
|
||||||
node.children.each do |child|
|
node.children.each do |child|
|
||||||
@ -46,14 +29,36 @@ module Tartrazine
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
struct Rule < BaseRule
|
||||||
|
property pattern : Regex = Regex.new ""
|
||||||
|
|
||||||
|
def match(text : Bytes, pos, tokenizer) : Tuple(Bool, Int32, Array(Token))
|
||||||
|
match = pattern.match(text, pos)
|
||||||
|
|
||||||
|
# No match
|
||||||
|
return false, pos, [] of Token if match.size == 0
|
||||||
|
return true, pos + match[0].size, @actions.flat_map(&.emit(match, tokenizer))
|
||||||
|
end
|
||||||
|
|
||||||
|
def initialize(node : XML::Node)
|
||||||
|
end
|
||||||
|
|
||||||
|
def initialize(node : XML::Node, multiline, dotall, ignorecase)
|
||||||
|
pattern = node["pattern"]
|
||||||
|
pattern = "(?m)" + pattern if multiline
|
||||||
|
@pattern = Regex.new(pattern, multiline, dotall, ignorecase, true)
|
||||||
|
add_actions(node)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
# This rule includes another state. If any of the rules of the
|
# This rule includes another state. If any of the rules of the
|
||||||
# included state matches, this rule matches.
|
# included state matches, this rule matches.
|
||||||
class IncludeStateRule < Rule
|
struct IncludeStateRule < BaseRule
|
||||||
property state : String = ""
|
@state : String = ""
|
||||||
|
|
||||||
def match(text, pos, lexer) : Tuple(Bool, Int32, Array(Token))
|
def match(text : Bytes, pos : Int32, tokenizer : Tokenizer) : Tuple(Bool, Int32, Array(Token))
|
||||||
lexer.states[state].rules.each do |rule|
|
tokenizer.@lexer.states[@state].rules.each do |rule|
|
||||||
matched, new_pos, new_tokens = rule.match(text, pos, lexer)
|
matched, new_pos, new_tokens = rule.match(text, pos, tokenizer)
|
||||||
return true, new_pos, new_tokens if matched
|
return true, new_pos, new_tokens if matched
|
||||||
end
|
end
|
||||||
return false, pos, [] of Token
|
return false, pos, [] of Token
|
||||||
@ -69,13 +74,11 @@ module Tartrazine
|
|||||||
end
|
end
|
||||||
|
|
||||||
# This rule always matches, unconditionally
|
# This rule always matches, unconditionally
|
||||||
class UnconditionalRule < Rule
|
struct UnconditionalRule < BaseRule
|
||||||
def match(text, pos, lexer) : Tuple(Bool, Int32, Array(Token))
|
NO_MATCH = [] of Match
|
||||||
tokens = [] of Token
|
|
||||||
actions.each do |action|
|
def match(text, pos, tokenizer) : Tuple(Bool, Int32, Array(Token))
|
||||||
tokens += action.emit([] of Match, lexer)
|
return true, pos, @actions.flat_map(&.emit(NO_MATCH, tokenizer))
|
||||||
end
|
|
||||||
return true, pos, tokens
|
|
||||||
end
|
end
|
||||||
|
|
||||||
def initialize(node : XML::Node)
|
def initialize(node : XML::Node)
|
||||||
|
@ -9,7 +9,7 @@ require "xml"
|
|||||||
module Tartrazine
|
module Tartrazine
|
||||||
alias Color = Sixteen::Color
|
alias Color = Sixteen::Color
|
||||||
|
|
||||||
class ThemeFiles
|
struct ThemeFiles
|
||||||
extend BakedFileSystem
|
extend BakedFileSystem
|
||||||
bake_folder "../styles", __DIR__
|
bake_folder "../styles", __DIR__
|
||||||
end
|
end
|
||||||
@ -39,7 +39,7 @@ module Tartrazine
|
|||||||
themes.to_a.sort!
|
themes.to_a.sort!
|
||||||
end
|
end
|
||||||
|
|
||||||
class Style
|
struct Style
|
||||||
# These properties are tri-state.
|
# These properties are tri-state.
|
||||||
# true means it's set
|
# true means it's set
|
||||||
# false means it's not set
|
# false means it's not set
|
||||||
@ -79,7 +79,7 @@ module Tartrazine
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
class Theme
|
struct Theme
|
||||||
property name : String = ""
|
property name : String = ""
|
||||||
|
|
||||||
property styles = {} of String => Style
|
property styles = {} of String => Style
|
||||||
|
Loading…
x
Reference in New Issue
Block a user