9 Commits

47 changed files with 15051 additions and 717 deletions

View File

@@ -1,9 +1,9 @@
# This configuration file was generated by `ameba --gen-config`
# on 2024-09-11 00:56:14 UTC using Ameba version 1.6.1.
# on 2024-08-12 22:00:49 UTC using Ameba version 1.6.1.
# The point is for the user to remove these configuration records
# one by one as the reported problems are removed from the code base.
# Problems found: 4
# Problems found: 2
# Run `ameba --only Documentation/DocumentationAdmonition` for details
Documentation/DocumentationAdmonition:
Description: Reports documentation admonitions
@@ -11,10 +11,111 @@ Documentation/DocumentationAdmonition:
Excluded:
- src/lexer.cr
- src/actions.cr
- spec/examples/crystal/lexer_spec.cr
Admonitions:
- TODO
- FIXME
- BUG
Enabled: true
Severity: Warning
# Problems found: 22
# Run `ameba --only Lint/MissingBlockArgument` for details
Lint/MissingBlockArgument:
Description: Disallows yielding method definitions without block argument
Excluded:
- pygments/tests/examplefiles/cr/test.cr
Enabled: true
Severity: Warning
# Problems found: 1
# Run `ameba --only Lint/NotNil` for details
Lint/NotNil:
Description: Identifies usage of `not_nil!` calls
Excluded:
- pygments/tests/examplefiles/cr/test.cr
Enabled: true
Severity: Warning
# Problems found: 34
# Run `ameba --only Lint/ShadowingOuterLocalVar` for details
Lint/ShadowingOuterLocalVar:
Description: Disallows the usage of the same name as outer local variables for block
or proc arguments
Excluded:
- pygments/tests/examplefiles/cr/test.cr
Enabled: true
Severity: Warning
# Problems found: 1
# Run `ameba --only Lint/UnreachableCode` for details
Lint/UnreachableCode:
Description: Reports unreachable code
Excluded:
- pygments/tests/examplefiles/cr/test.cr
Enabled: true
Severity: Warning
# Problems found: 6
# Run `ameba --only Lint/UselessAssign` for details
Lint/UselessAssign:
Description: Disallows useless variable assignments
ExcludeTypeDeclarations: false
Excluded:
- pygments/tests/examplefiles/cr/test.cr
Enabled: true
Severity: Warning
# Problems found: 3
# Run `ameba --only Naming/BlockParameterName` for details
Naming/BlockParameterName:
Description: Disallows non-descriptive block parameter names
MinNameLength: 3
AllowNamesEndingInNumbers: true
Excluded:
- pygments/tests/examplefiles/cr/test.cr
AllowedNames:
- _
- e
- i
- j
- k
- v
- x
- y
- ex
- io
- ws
- op
- tx
- id
- ip
- k1
- k2
- v1
- v2
ForbiddenNames: []
Enabled: true
Severity: Convention
# Problems found: 1
# Run `ameba --only Naming/RescuedExceptionsVariableName` for details
Naming/RescuedExceptionsVariableName:
Description: Makes sure that rescued exceptions variables are named as expected
Excluded:
- pygments/tests/examplefiles/cr/test.cr
AllowedNames:
- e
- ex
- exception
- error
Enabled: true
Severity: Convention
# Problems found: 6
# Run `ameba --only Naming/TypeNames` for details
Naming/TypeNames:
Description: Enforces type names in camelcase manner
Excluded:
- pygments/tests/examplefiles/cr/test.cr
Enabled: true
Severity: Convention

View File

@@ -1,26 +0,0 @@
name: Tests
on:
# This can't yet run automatically, because tests fail because of
# different versions of chroma. Need to get the same one in my
# local env and in CI
workflow_dispatch:
push:
permissions:
contents: read
jobs:
build:
runs-on: ubuntu-latest
steps:
- name: Download source
uses: actions/checkout@v4
- name: Install Crystal
uses: crystal-lang/install-crystal@v1
- name: Run tests
run: |
wget https://github.com/alecthomas/chroma/releases/download/v2.14.0/chroma-2.14.0-linux-amd64.tar.gz
tar xzvf chroma-2.14.0*gz
mkdir ~/.local/bin -p
sudo mv chroma ~/.local/bin
shards install
crystal tool format --check
crystal spec -v

View File

@@ -1,30 +0,0 @@
name: Coverage
on:
workflow_dispatch:
schedule:
- cron: "0 1 * * *"
permissions:
contents: read
jobs:
build:
runs-on: ubuntu-latest
steps:
- name: Download source
uses: actions/checkout@v4
- name: Install Crystal
uses: crystal-lang/install-crystal@v1
- name: Run tests using kcov
run: |
sudo apt update && sudo apt install kcov
wget https://github.com/alecthomas/chroma/releases/download/v2.14.0/chroma-2.14.0-linux-amd64.tar.gz
tar xzvf chroma-2.14.0*gz
mkdir ~/.local/bin -p
sudo mv chroma ~/.local/bin
shards install
crystal build src/run_tests.cr
kcov --clean --include-path=./src $PWD/coverage ./run_tests
curl -Os https://uploader.codecov.io/latest/linux/codecov
chmod +x codecov
./codecov -t ${CODECOV_TOKEN} -s coverage
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}

2
.gitignore vendored
View File

@@ -10,5 +10,3 @@ shard.lock
.crystal/
venv/
.croupier
coverage/
run_tests

View File

@@ -2,29 +2,17 @@
All notable changes to this project will be documented in this file.
## [0.7.0] - 2024-09-10
### 🚀 Features
- Higher level API (`to_html` and `to_ansi`)
- Use the native crystal highlighter
## [0.6.4] - 2024-08-28
### 🐛 Bug Fixes
- Ameba
- Variable bame in Hacefile
- Make it easier to import the Ansi formatter
- Renamed BaseLexer to Lexer and Lexer to RegexLexer to make API nicer
- Make install work
### 📚 Documentation
- Mention AUR package
### 🧪 Testing
- Add CI workflows
### ⚙️ Miscellaneous Tasks
- Pre-commit hooks
@@ -33,10 +21,6 @@ All notable changes to this project will be documented in this file.
- Force conventional commit messages
- Force conventional commit messages
- Updated pre-commit
- *(ignore)* Fix tests
- Added badges
- Added badges
- *(ignore)* Removed random file
### Build
@@ -46,7 +30,6 @@ All notable changes to this project will be documented in this file.
### Bump
- Release v0.6.4
- Release v0.6.4
## [0.6.1] - 2024-08-25

View File

@@ -35,10 +35,10 @@ tasks:
phony: true
always_run: true
dependencies:
- bin/{{NAME}}
- bin/hace
commands: |
rm ${HOME}/.local/bin/{{NAME}} -f
cp bin/{{NAME}} ${HOME}/.local/bin/{{NAME}}
rm ${HOME}/.local/bin/{{NAME}}
cp bin/hace ${HOME}/.local/bin/{{NAME}}
static:
outputs:

View File

@@ -1,8 +1,5 @@
# TARTRAZINE
[![Tests](https://github.com/ralsina/tartrazine/actions/workflows/ci.yml/badge.svg)](https://github.com/ralsina/tartrazine/actions/workflows/ci.yml)
[![codecov](https://codecov.io/gh/ralsina/tartrazine/branch/main/graph/badge.svg?token=52XBPNL99F)](https://codecov.io/gh/ralsina/tartrazine)
Tartrazine is a library to syntax-highlight code. It is
a port of [Pygments](https://pygments.org/) to
[Crystal](https://crystal-lang.org/).
@@ -45,43 +42,18 @@ $ tartrazine whatever.c -t catppuccin-macchiato --line-numbers \
## Usage as a Library
Add to your `shard.yml`:
```yaml
dependencies:
tartrazine:
github: ralsina/tartrazine
```
This is the high level API:
This works:
```crystal
require "tartrazine"
html = Tartrazine.to_html(
"puts \"Hello, world!\"",
language: "crystal",
theme: "catppuccin-macchiato",
standalone: true,
line_numbers: true
)
```
This does more or less the same thing, but more manually:
```crystal
lexer = Tartrazine.lexer("crystal")
formatter = Tartrazine::Html.new (
theme: Tartrazine.theme("catppuccin-macchiato"),
line_numbers: true,
standalone: true,
)
puts formatter.format("puts \"Hello, world!\"", lexer)
theme = Tartrazine.theme("catppuccin-macchiato")
formatter = Tartrazine::Html.new
formatter.theme = theme
puts formatter.format(File.read(ARGV[0]), lexer)
```
The reason you may want to use the manual version is to reuse
the lexer and formatter objects for performance reasons.
## Contributing
1. Fork it (<https://github.com/ralsina/tartrazine/fork>)

View File

@@ -67,7 +67,6 @@ commit_parsers = [
{ message = "^chore\\(deps.*\\)", skip = true },
{ message = "^chore\\(pr\\)", skip = true },
{ message = "^chore\\(pull\\)", skip = true },
{ message = "^chore\\(ignore\\)", skip = true },
{ message = "^chore|^ci", group = "<!-- 7 -->⚙️ Miscellaneous Tasks" },
{ body = ".*security", group = "<!-- 8 -->🛡️ Security" },
{ message = "^revert", group = "<!-- 9 -->◀️ Revert" },

762
lexers/crystal.xml Normal file
View File

@@ -0,0 +1,762 @@
<lexer>
<config>
<name>Crystal</name>
<alias>cr</alias>
<alias>crystal</alias>
<filename>*.cr</filename>
<mime_type>text/x-crystal</mime_type>
<dot_all>true</dot_all>
</config>
<rules>
<state name="pa-intp-string">
<rule pattern="\\[\(]">
<token type="LiteralStringOther"/>
</rule>
<rule pattern="\(">
<token type="LiteralStringOther"/>
<push/>
</rule>
<rule pattern="\)">
<token type="LiteralStringOther"/>
<pop depth="1"/>
</rule>
<rule>
<include state="string-intp-escaped"/>
</rule>
<rule pattern="[\\#()]">
<token type="LiteralStringOther"/>
</rule>
<rule pattern="[^\\#()]+">
<token type="LiteralStringOther"/>
</rule>
</state>
<state name="ab-regex">
<rule pattern="\\[\\&lt;&gt;]">
<token type="LiteralStringRegex"/>
</rule>
<rule pattern="&lt;">
<token type="LiteralStringRegex"/>
<push/>
</rule>
<rule pattern="&gt;[imsx]*">
<token type="LiteralStringRegex"/>
<pop depth="1"/>
</rule>
<rule>
<include state="string-intp"/>
</rule>
<rule pattern="[\\#&lt;&gt;]">
<token type="LiteralStringRegex"/>
</rule>
<rule pattern="[^\\#&lt;&gt;]+">
<token type="LiteralStringRegex"/>
</rule>
</state>
<state name="cb-regex">
<rule pattern="\\[\\{}]">
<token type="LiteralStringRegex"/>
</rule>
<rule pattern="\{">
<token type="LiteralStringRegex"/>
<push/>
</rule>
<rule pattern="\}[imsx]*">
<token type="LiteralStringRegex"/>
<pop depth="1"/>
</rule>
<rule>
<include state="string-intp"/>
</rule>
<rule pattern="[\\#{}]">
<token type="LiteralStringRegex"/>
</rule>
<rule pattern="[^\\#{}]+">
<token type="LiteralStringRegex"/>
</rule>
</state>
<state name="simple-backtick">
<rule>
<include state="string-intp-escaped"/>
</rule>
<rule pattern="[^\\`#]+">
<token type="LiteralStringBacktick"/>
</rule>
<rule pattern="[\\#]">
<token type="LiteralStringBacktick"/>
</rule>
<rule pattern="`">
<token type="LiteralStringBacktick"/>
<pop depth="1"/>
</rule>
</state>
<state name="string-intp">
<rule pattern="#\{">
<token type="LiteralStringInterpol"/>
<push state="in-intp"/>
</rule>
</state>
<state name="interpolated-regex">
<rule>
<include state="string-intp"/>
</rule>
<rule pattern="[\\#]">
<token type="LiteralStringRegex"/>
</rule>
<rule pattern="[^\\#]+">
<token type="LiteralStringRegex"/>
</rule>
</state>
<state name="cb-string">
<rule pattern="\\[\\{}]">
<token type="LiteralStringOther"/>
</rule>
<rule pattern="\{">
<token type="LiteralStringOther"/>
<push/>
</rule>
<rule pattern="\}">
<token type="LiteralStringOther"/>
<pop depth="1"/>
</rule>
<rule pattern="[\\#{}]">
<token type="LiteralStringOther"/>
</rule>
<rule pattern="[^\\#{}]+">
<token type="LiteralStringOther"/>
</rule>
</state>
<state name="in-macro-control">
<rule pattern="\{%">
<token type="LiteralStringInterpol"/>
<push/>
</rule>
<rule pattern="%\}">
<token type="LiteralStringInterpol"/>
<pop depth="1"/>
</rule>
<rule pattern="for\b|in\b">
<token type="Keyword"/>
</rule>
<rule>
<include state="root"/>
</rule>
</state>
<state name="interpolated-string">
<rule>
<include state="string-intp"/>
</rule>
<rule pattern="[\\#]">
<token type="LiteralStringOther"/>
</rule>
<rule pattern="[^\\#]+">
<token type="LiteralStringOther"/>
</rule>
</state>
<state name="in-macro-expr">
<rule pattern="\{\{">
<token type="LiteralStringInterpol"/>
<push/>
</rule>
<rule pattern="\}\}">
<token type="LiteralStringInterpol"/>
<pop depth="1"/>
</rule>
<rule>
<include state="root"/>
</rule>
</state>
<state name="simple-string">
<rule>
<include state="string-intp-escaped"/>
</rule>
<rule pattern="[^\\&#34;#]+">
<token type="LiteralStringDouble"/>
</rule>
<rule pattern="[\\#]">
<token type="LiteralStringDouble"/>
</rule>
<rule pattern="&#34;">
<token type="LiteralStringDouble"/>
<pop depth="1"/>
</rule>
</state>
<state name="cb-intp-string">
<rule pattern="\\[\{]">
<token type="LiteralStringOther"/>
</rule>
<rule pattern="\{">
<token type="LiteralStringOther"/>
<push/>
</rule>
<rule pattern="\}">
<token type="LiteralStringOther"/>
<pop depth="1"/>
</rule>
<rule>
<include state="string-intp-escaped"/>
</rule>
<rule pattern="[\\#{}]">
<token type="LiteralStringOther"/>
</rule>
<rule pattern="[^\\#{}]+">
<token type="LiteralStringOther"/>
</rule>
</state>
<state name="string-intp-escaped">
<rule>
<include state="string-intp"/>
</rule>
<rule>
<include state="string-escaped"/>
</rule>
</state>
<state name="sb-regex">
<rule pattern="\\[\\\[\]]">
<token type="LiteralStringRegex"/>
</rule>
<rule pattern="\[">
<token type="LiteralStringRegex"/>
<push/>
</rule>
<rule pattern="\][imsx]*">
<token type="LiteralStringRegex"/>
<pop depth="1"/>
</rule>
<rule>
<include state="string-intp"/>
</rule>
<rule pattern="[\\#\[\]]">
<token type="LiteralStringRegex"/>
</rule>
<rule pattern="[^\\#\[\]]+">
<token type="LiteralStringRegex"/>
</rule>
</state>
<state name="classname">
<rule pattern="[A-Z_]\w*">
<token type="NameClass"/>
</rule>
<rule pattern="(\()(\s*)([A-Z_]\w*)(\s*)(\))">
<bygroups>
<token type="Punctuation"/>
<token type="Text"/>
<token type="NameClass"/>
<token type="Text"/>
<token type="Punctuation"/>
</bygroups>
</rule>
<rule>
<pop depth="1"/>
</rule>
</state>
<state name="string-escaped">
<rule pattern="\\([\\befnstv#&#34;\&#39;]|x[a-fA-F0-9]{1,2}|[0-7]{1,3})">
<token type="LiteralStringEscape"/>
</rule>
</state>
<state name="sb-intp-string">
<rule pattern="\\[\[]">
<token type="LiteralStringOther"/>
</rule>
<rule pattern="\[">
<token type="LiteralStringOther"/>
<push/>
</rule>
<rule pattern="\]">
<token type="LiteralStringOther"/>
<pop depth="1"/>
</rule>
<rule>
<include state="string-intp-escaped"/>
</rule>
<rule pattern="[\\#\[\]]">
<token type="LiteralStringOther"/>
</rule>
<rule pattern="[^\\#\[\]]+">
<token type="LiteralStringOther"/>
</rule>
</state>
<state name="pa-regex">
<rule pattern="\\[\\()]">
<token type="LiteralStringRegex"/>
</rule>
<rule pattern="\(">
<token type="LiteralStringRegex"/>
<push/>
</rule>
<rule pattern="\)[imsx]*">
<token type="LiteralStringRegex"/>
<pop depth="1"/>
</rule>
<rule>
<include state="string-intp"/>
</rule>
<rule pattern="[\\#()]">
<token type="LiteralStringRegex"/>
</rule>
<rule pattern="[^\\#()]+">
<token type="LiteralStringRegex"/>
</rule>
</state>
<state name="in-attr">
<rule pattern="\[">
<token type="Operator"/>
<push/>
</rule>
<rule pattern="\]">
<token type="Operator"/>
<pop depth="1"/>
</rule>
<rule>
<include state="root"/>
</rule>
</state>
<state name="ab-intp-string">
<rule pattern="\\[&lt;]">
<token type="LiteralStringOther"/>
</rule>
<rule pattern="&lt;">
<token type="LiteralStringOther"/>
<push/>
</rule>
<rule pattern="&gt;">
<token type="LiteralStringOther"/>
<pop depth="1"/>
</rule>
<rule>
<include state="string-intp-escaped"/>
</rule>
<rule pattern="[\\#&lt;&gt;]">
<token type="LiteralStringOther"/>
</rule>
<rule pattern="[^\\#&lt;&gt;]+">
<token type="LiteralStringOther"/>
</rule>
</state>
<state name="in-intp">
<rule pattern="\{">
<token type="LiteralStringInterpol"/>
<push/>
</rule>
<rule pattern="\}">
<token type="LiteralStringInterpol"/>
<pop depth="1"/>
</rule>
<rule>
<include state="root"/>
</rule>
</state>
<state name="end-part">
<rule pattern=".+">
<token type="CommentPreproc"/>
<pop depth="1"/>
</rule>
</state>
<state name="root">
<rule pattern="#.*?$">
<token type="CommentSingle"/>
</rule>
<rule pattern="(instance_sizeof|pointerof|protected|abstract|require|private|include|unless|typeof|sizeof|return|extend|ensure|rescue|ifdef|super|break|begin|until|while|elsif|yield|next|when|else|then|case|with|end|asm|if|do|as|of)\b">
<token type="Keyword"/>
</rule>
<rule pattern="(false|true|nil)\b">
<token type="KeywordConstant"/>
</rule>
<rule pattern="(module|lib)(\s+)([a-zA-Z_]\w*(?:::[a-zA-Z_]\w*)*)">
<bygroups>
<token type="Keyword"/>
<token type="Text"/>
<token type="NameNamespace"/>
</bygroups>
</rule>
<rule pattern="(def|fun|macro)(\s+)((?:[a-zA-Z_]\w*::)*)">
<bygroups>
<token type="Keyword"/>
<token type="Text"/>
<token type="NameNamespace"/>
</bygroups>
<push state="funcname"/>
</rule>
<rule pattern="def(?=[*%&amp;^`~+-/\[&lt;&gt;=])">
<token type="Keyword"/>
<push state="funcname"/>
</rule>
<rule pattern="(class|struct|union|type|alias|enum)(\s+)((?:[a-zA-Z_]\w*::)*)">
<bygroups>
<token type="Keyword"/>
<token type="Text"/>
<token type="NameNamespace"/>
</bygroups>
<push state="classname"/>
</rule>
<rule pattern="(self|out|uninitialized)\b|(is_a|responds_to)\?">
<token type="KeywordPseudo"/>
</rule>
<rule pattern="(def_equals_and_hash|assert_responds_to|forward_missing_to|def_equals|property|def_hash|parallel|delegate|debugger|getter|record|setter|spawn|pp)\b">
<token type="NameBuiltinPseudo"/>
</rule>
<rule pattern="getter[!?]|property[!?]|__(DIR|FILE|LINE)__\b">
<token type="NameBuiltinPseudo"/>
</rule>
<rule pattern="(?&lt;!\.)(get_stack_top|StaticArray|Concurrent|with_color|Reference|Scheduler|read_line|Exception|at_exit|Pointer|Channel|Float64|sprintf|Float32|Process|Object|Struct|caller|UInt16|UInt32|UInt64|system|future|Number|printf|String|Symbol|Int32|Range|Slice|Regex|Mutex|sleep|Array|Class|raise|Tuple|Deque|delay|Float|Int16|print|abort|Value|UInt8|Int64|puts|Proc|File|Void|exit|fork|Bool|Char|gets|lazy|loop|main|rand|Enum|Int8|Time|Hash|Set|Box|Nil|Dir|Int|p)\b">
<token type="NameBuiltin"/>
</rule>
<rule pattern="(?&lt;!\w)(&lt;&lt;-?)([&#34;`\&#39;]?)([a-zA-Z_]\w*)(\2)(.*?\n)">
<token type="LiteralStringHeredoc"/>
</rule>
<rule pattern="(&lt;&lt;-?)(&#34;|\&#39;)()(\2)(.*?\n)">
<token type="LiteralStringHeredoc"/>
</rule>
<rule pattern="__END__">
<token type="CommentPreproc"/>
<push state="end-part"/>
</rule>
<rule pattern="(?:^|(?&lt;=[=&lt;&gt;~!:])|(?&lt;=(?:\s|;)when\s)|(?&lt;=(?:\s|;)or\s)|(?&lt;=(?:\s|;)and\s)|(?&lt;=\.index\s)|(?&lt;=\.scan\s)|(?&lt;=\.sub\s)|(?&lt;=\.sub!\s)|(?&lt;=\.gsub\s)|(?&lt;=\.gsub!\s)|(?&lt;=\.match\s)|(?&lt;=(?:\s|;)if\s)|(?&lt;=(?:\s|;)elsif\s)|(?&lt;=^when\s)|(?&lt;=^index\s)|(?&lt;=^scan\s)|(?&lt;=^sub\s)|(?&lt;=^gsub\s)|(?&lt;=^sub!\s)|(?&lt;=^gsub!\s)|(?&lt;=^match\s)|(?&lt;=^if\s)|(?&lt;=^elsif\s))(\s*)(/)">
<bygroups>
<token type="Text"/>
<token type="LiteralStringRegex"/>
</bygroups>
<push state="multiline-regex"/>
</rule>
<rule pattern="(?&lt;=\(|,|\[)/">
<token type="LiteralStringRegex"/>
<push state="multiline-regex"/>
</rule>
<rule pattern="(\s+)(/)(?![\s=])">
<bygroups>
<token type="Text"/>
<token type="LiteralStringRegex"/>
</bygroups>
<push state="multiline-regex"/>
</rule>
<rule pattern="(0o[0-7]+(?:_[0-7]+)*(?:_?[iu][0-9]+)?)\b(\s*)([/?])?">
<bygroups>
<token type="LiteralNumberOct"/>
<token type="Text"/>
<token type="Operator"/>
</bygroups>
</rule>
<rule pattern="(0x[0-9A-Fa-f]+(?:_[0-9A-Fa-f]+)*(?:_?[iu][0-9]+)?)\b(\s*)([/?])?">
<bygroups>
<token type="LiteralNumberHex"/>
<token type="Text"/>
<token type="Operator"/>
</bygroups>
</rule>
<rule pattern="(0b[01]+(?:_[01]+)*(?:_?[iu][0-9]+)?)\b(\s*)([/?])?">
<bygroups>
<token type="LiteralNumberBin"/>
<token type="Text"/>
<token type="Operator"/>
</bygroups>
</rule>
<rule pattern="((?:0(?![0-9])|[1-9][\d_]*)(?:\.\d[\d_]*)(?:e[+-]?[0-9]+)?(?:_?f[0-9]+)?)(\s*)([/?])?">
<bygroups>
<token type="LiteralNumberFloat"/>
<token type="Text"/>
<token type="Operator"/>
</bygroups>
</rule>
<rule pattern="((?:0(?![0-9])|[1-9][\d_]*)(?:\.\d[\d_]*)?(?:e[+-]?[0-9]+)(?:_?f[0-9]+)?)(\s*)([/?])?">
<bygroups>
<token type="LiteralNumberFloat"/>
<token type="Text"/>
<token type="Operator"/>
</bygroups>
</rule>
<rule pattern="((?:0(?![0-9])|[1-9][\d_]*)(?:\.\d[\d_]*)?(?:e[+-]?[0-9]+)?(?:_?f[0-9]+))(\s*)([/?])?">
<bygroups>
<token type="LiteralNumberFloat"/>
<token type="Text"/>
<token type="Operator"/>
</bygroups>
</rule>
<rule pattern="(0\b|[1-9][\d]*(?:_\d+)*(?:_?[iu][0-9]+)?)\b(\s*)([/?])?">
<bygroups>
<token type="LiteralNumberInteger"/>
<token type="Text"/>
<token type="Operator"/>
</bygroups>
</rule>
<rule pattern="@@[a-zA-Z_]\w*">
<token type="NameVariableClass"/>
</rule>
<rule pattern="@[a-zA-Z_]\w*">
<token type="NameVariableInstance"/>
</rule>
<rule pattern="\$\w+">
<token type="NameVariableGlobal"/>
</rule>
<rule pattern="\$[!@&amp;`\&#39;+~=/\\,;.&lt;&gt;_*$?:&#34;^-]">
<token type="NameVariableGlobal"/>
</rule>
<rule pattern="\$-[0adFiIlpvw]">
<token type="NameVariableGlobal"/>
</rule>
<rule pattern="::">
<token type="Operator"/>
</rule>
<rule>
<include state="strings"/>
</rule>
<rule pattern="\?(\\[MC]-)*(\\([\\befnrtv#&#34;\&#39;]|x[a-fA-F0-9]{1,2}|[0-7]{1,3})|\S)(?!\w)">
<token type="LiteralStringChar"/>
</rule>
<rule pattern="[A-Z][A-Z_]+\b">
<token type="NameConstant"/>
</rule>
<rule pattern="\{%">
<token type="LiteralStringInterpol"/>
<push state="in-macro-control"/>
</rule>
<rule pattern="\{\{">
<token type="LiteralStringInterpol"/>
<push state="in-macro-expr"/>
</rule>
<rule pattern="(@\[)(\s*)([A-Z]\w*)">
<bygroups>
<token type="Operator"/>
<token type="Text"/>
<token type="NameDecorator"/>
</bygroups>
<push state="in-attr"/>
</rule>
<rule pattern="(\.|::)(\[\]\?|&lt;=&gt;|===|\[\]=|&gt;&gt;|&amp;&amp;|\*\*|\[\]|\|\||&gt;=|=~|!~|&lt;&lt;|&lt;=|!=|==|&lt;|/|=|-|\+|&gt;|\*|&amp;|%|\^|!|\||~)">
<bygroups>
<token type="Operator"/>
<token type="NameOperator"/>
</bygroups>
</rule>
<rule pattern="(\.|::)([a-zA-Z_]\w*[!?]?|[*%&amp;^`~+\-/\[&lt;&gt;=])">
<bygroups>
<token type="Operator"/>
<token type="Name"/>
</bygroups>
</rule>
<rule pattern="[a-zA-Z_]\w*(?:[!?](?!=))?">
<token type="Name"/>
</rule>
<rule pattern="(\[|\]\??|\*\*|&lt;=&gt;?|&gt;=|&lt;&lt;?|&gt;&gt;?|=~|===|!~|&amp;&amp;?|\|\||\.{1,3})">
<token type="Operator"/>
</rule>
<rule pattern="[-+/*%=&lt;&gt;&amp;!^|~]=?">
<token type="Operator"/>
</rule>
<rule pattern="[(){};,/?:\\]">
<token type="Punctuation"/>
</rule>
<rule pattern="\s+">
<token type="Text"/>
</rule>
</state>
<state name="multiline-regex">
<rule>
<include state="string-intp"/>
</rule>
<rule pattern="\\\\">
<token type="LiteralStringRegex"/>
</rule>
<rule pattern="\\/">
<token type="LiteralStringRegex"/>
</rule>
<rule pattern="[\\#]">
<token type="LiteralStringRegex"/>
</rule>
<rule pattern="[^\\/#]+">
<token type="LiteralStringRegex"/>
</rule>
<rule pattern="/[imsx]*">
<token type="LiteralStringRegex"/>
<pop depth="1"/>
</rule>
</state>
<state name="ab-string">
<rule pattern="\\[\\&lt;&gt;]">
<token type="LiteralStringOther"/>
</rule>
<rule pattern="&lt;">
<token type="LiteralStringOther"/>
<push/>
</rule>
<rule pattern="&gt;">
<token type="LiteralStringOther"/>
<pop depth="1"/>
</rule>
<rule pattern="[\\#&lt;&gt;]">
<token type="LiteralStringOther"/>
</rule>
<rule pattern="[^\\#&lt;&gt;]+">
<token type="LiteralStringOther"/>
</rule>
</state>
<state name="pa-string">
<rule pattern="\\[\\()]">
<token type="LiteralStringOther"/>
</rule>
<rule pattern="\(">
<token type="LiteralStringOther"/>
<push/>
</rule>
<rule pattern="\)">
<token type="LiteralStringOther"/>
<pop depth="1"/>
</rule>
<rule pattern="[\\#()]">
<token type="LiteralStringOther"/>
</rule>
<rule pattern="[^\\#()]+">
<token type="LiteralStringOther"/>
</rule>
</state>
<state name="strings">
<rule pattern="\:@{0,2}[a-zA-Z_]\w*[!?]?">
<token type="LiteralStringSymbol"/>
</rule>
<rule pattern="\:@{0,2}(\[\]\?|&lt;=&gt;|===|\[\]=|&gt;&gt;|&amp;&amp;|\*\*|\[\]|\|\||&gt;=|=~|!~|&lt;&lt;|&lt;=|!=|==|&lt;|/|=|-|\+|&gt;|\*|&amp;|%|\^|!|\||~)">
<token type="LiteralStringSymbol"/>
</rule>
<rule pattern=":&#39;(\\\\|\\&#39;|[^&#39;])*&#39;">
<token type="LiteralStringSymbol"/>
</rule>
<rule pattern="&#39;(\\\\|\\&#39;|[^&#39;]|\\[^&#39;\\]+)&#39;">
<token type="LiteralStringChar"/>
</rule>
<rule pattern=":&#34;">
<token type="LiteralStringSymbol"/>
<push state="simple-sym"/>
</rule>
<rule pattern="([a-zA-Z_]\w*)(:)(?!:)">
<bygroups>
<token type="LiteralStringSymbol"/>
<token type="Punctuation"/>
</bygroups>
</rule>
<rule pattern="&#34;">
<token type="LiteralStringDouble"/>
<push state="simple-string"/>
</rule>
<rule pattern="(?&lt;!\.)`">
<token type="LiteralStringBacktick"/>
<push state="simple-backtick"/>
</rule>
<rule pattern="%\{">
<token type="LiteralStringOther"/>
<push state="cb-intp-string"/>
</rule>
<rule pattern="%[wi]\{">
<token type="LiteralStringOther"/>
<push state="cb-string"/>
</rule>
<rule pattern="%r\{">
<token type="LiteralStringRegex"/>
<push state="cb-regex"/>
</rule>
<rule pattern="%\[">
<token type="LiteralStringOther"/>
<push state="sb-intp-string"/>
</rule>
<rule pattern="%[wi]\[">
<token type="LiteralStringOther"/>
<push state="sb-string"/>
</rule>
<rule pattern="%r\[">
<token type="LiteralStringRegex"/>
<push state="sb-regex"/>
</rule>
<rule pattern="%\(">
<token type="LiteralStringOther"/>
<push state="pa-intp-string"/>
</rule>
<rule pattern="%[wi]\(">
<token type="LiteralStringOther"/>
<push state="pa-string"/>
</rule>
<rule pattern="%r\(">
<token type="LiteralStringRegex"/>
<push state="pa-regex"/>
</rule>
<rule pattern="%&lt;">
<token type="LiteralStringOther"/>
<push state="ab-intp-string"/>
</rule>
<rule pattern="%[wi]&lt;">
<token type="LiteralStringOther"/>
<push state="ab-string"/>
</rule>
<rule pattern="%r&lt;">
<token type="LiteralStringRegex"/>
<push state="ab-regex"/>
</rule>
<rule pattern="(%r([\W_]))((?:\\\2|(?!\2).)*)(\2[imsx]*)">
<token type="LiteralString"/>
</rule>
<rule pattern="(%[wi]([\W_]))((?:\\\2|(?!\2).)*)(\2)">
<token type="LiteralString"/>
</rule>
<rule pattern="(?&lt;=[-+/*%=&lt;&gt;&amp;!^|~,(])(\s*)(%([\t ])(?:(?:\\\3|(?!\3).)*)\3)">
<bygroups>
<token type="Text"/>
<token type="LiteralStringOther"/>
<token type="None"/>
</bygroups>
</rule>
<rule pattern="^(\s*)(%([\t ])(?:(?:\\\3|(?!\3).)*)\3)">
<bygroups>
<token type="Text"/>
<token type="LiteralStringOther"/>
<token type="None"/>
</bygroups>
</rule>
<rule pattern="(%([\[{(&lt;]))((?:\\\2|(?!\2).)*)(\2)">
<token type="LiteralString"/>
</rule>
</state>
<state name="sb-string">
<rule pattern="\\[\\\[\]]">
<token type="LiteralStringOther"/>
</rule>
<rule pattern="\[">
<token type="LiteralStringOther"/>
<push/>
</rule>
<rule pattern="\]">
<token type="LiteralStringOther"/>
<pop depth="1"/>
</rule>
<rule pattern="[\\#\[\]]">
<token type="LiteralStringOther"/>
</rule>
<rule pattern="[^\\#\[\]]+">
<token type="LiteralStringOther"/>
</rule>
</state>
<state name="funcname">
<rule pattern="(?:([a-zA-Z_]\w*)(\.))?([a-zA-Z_]\w*[!?]?|\*\*?|[-+]@?|[/%&amp;|^`~]|\[\]=?|&lt;&lt;|&gt;&gt;|&lt;=?&gt;|&gt;=?|===?)">
<bygroups>
<token type="NameClass"/>
<token type="Operator"/>
<token type="NameFunction"/>
</bygroups>
<pop depth="1"/>
</rule>
<rule>
<pop depth="1"/>
</rule>
</state>
<state name="simple-sym">
<rule>
<include state="string-escaped"/>
</rule>
<rule pattern="[^\\&#34;#]+">
<token type="LiteralStringSymbol"/>
</rule>
<rule pattern="[\\#]">
<token type="LiteralStringSymbol"/>
</rule>
<rule pattern="&#34;">
<token type="LiteralStringSymbol"/>
<pop depth="1"/>
</rule>
</state>
</rules>
</lexer>

View File

@@ -1,5 +1,5 @@
name: tartrazine
version: 0.7.0
version: 0.6.4
authors:
- Roberto Alsina <roberto.alsina@gmail.com>

View File

@@ -1 +0,0 @@
.e {color: #aa0000;background-color: #ffaaaa;}.b {background-color: #f0f3f3;tab-size: 8;}.k {color: #006699;font-weight: bold;}.kp {font-weight: 600;}.kt {color: #007788;}.na {color: #330099;}.nb {color: #336666;}.nc {color: #00aa88;font-weight: bold;}.nc {color: #336600;}.nd {color: #9999ff;}.ne {color: #999999;font-weight: bold;}.ne {color: #cc0000;font-weight: bold;}.nf {color: #cc00ff;}.nl {color: #9999ff;}.nn {color: #00ccff;font-weight: bold;}.nt {color: #330099;font-weight: bold;}.nv {color: #003333;}.ls {color: #cc3300;}.lsd {font-style: italic;}.lse {color: #cc3300;font-weight: bold;}.lsi {color: #aa0000;}.lso {color: #cc3300;}.lsr {color: #33aaaa;}.lss {color: #ffcc33;}.ln {color: #ff6600;}.o {color: #555555;}.ow {color: #000000;font-weight: bold;}.c {color: #0099ff;font-style: italic;}.cs {font-weight: bold;}.cp {color: #009999;font-style: normal;}.gd {background-color: #ffcccc;border: 1px solid #cc0000;}.ge {font-style: italic;}.ge {color: #ff0000;}.gh {color: #003300;font-weight: bold;}.gi {background-color: #ccffcc;border: 1px solid #00cc00;}.go {color: #aaaaaa;}.gp {color: #000099;font-weight: bold;}.gs {font-weight: bold;}.gs {color: #003300;font-weight: bold;}.gt {color: #99cc66;}.gu {text-decoration: underline;}.tw {color: #bbbbbb;}.lh {}

View File

@@ -1 +0,0 @@
.b {color: #b7b7b7;background-color: #101010;font-weight: bold;tab-size: 8;}.lh {color: #8eaaaa;background-color: #232323;}.t {color: #b7b7b7;}.e {color: #de6e6e;}.c {color: #333333;}.cp {color: #876c4f;}.cpf {color: #5f8787;}.k {color: #d69094;}.kt {color: #de6e6e;}.na {color: #8eaaaa;}.nb {color: #de6e6e;}.nbp {color: #de6e6e;}.nc {color: #8eaaaa;}.nc {color: #dab083;}.nd {color: #dab083;}.nf {color: #8eaaaa;}.nn {color: #8eaaaa;}.nt {color: #d69094;}.nv {color: #8eaaaa;}.nvi {color: #de6e6e;}.ln {color: #dab083;}.o {color: #60a592;}.ow {color: #d69094;}.l {color: #5f8787;}.ls {color: #5f8787;}.lsi {color: #876c4f;}.lsr {color: #60a592;}.lss {color: #dab083;}

View File

@@ -1,413 +0,0 @@
require "./constants/lexers"
require "./heuristics"
require "baked_file_system"
require "crystal/syntax_highlighter"
module Tartrazine
class LexerFiles
extend BakedFileSystem
bake_folder "../lexers", __DIR__
end
# Get the lexer object for a language name
# FIXME: support mimetypes
def self.lexer(name : String? = nil, filename : String? = nil, mimetype : String? = nil) : BaseLexer
return lexer_by_name(name) if name && name != "autodetect"
return lexer_by_filename(filename) if filename
return lexer_by_mimetype(mimetype) if mimetype
RegexLexer.from_xml(LexerFiles.get("/#{LEXERS_BY_NAME["plaintext"]}.xml").gets_to_end)
end
private def self.lexer_by_mimetype(mimetype : String) : BaseLexer
lexer_file_name = LEXERS_BY_MIMETYPE.fetch(mimetype, nil)
raise Exception.new("Unknown mimetype: #{mimetype}") if lexer_file_name.nil?
RegexLexer.from_xml(LexerFiles.get("/#{lexer_file_name}.xml").gets_to_end)
end
private def self.lexer_by_name(name : String) : BaseLexer
return CrystalLexer.new if name == "crystal"
lexer_file_name = LEXERS_BY_NAME.fetch(name.downcase, nil)
return create_delegating_lexer(name) if lexer_file_name.nil? && name.includes? "+"
raise Exception.new("Unknown lexer: #{name}") if lexer_file_name.nil?
RegexLexer.from_xml(LexerFiles.get("/#{lexer_file_name}.xml").gets_to_end)
end
private def self.lexer_by_filename(filename : String) : BaseLexer
if filename.ends_with?(".cr")
return CrystalLexer.new
end
candidates = Set(String).new
LEXERS_BY_FILENAME.each do |k, v|
candidates += v.to_set if File.match?(k, File.basename(filename))
end
case candidates.size
when 0
lexer_file_name = LEXERS_BY_NAME["plaintext"]
when 1
lexer_file_name = candidates.first
else
lexer_file_name = self.lexer_by_content(filename)
begin
return self.lexer(lexer_file_name)
rescue ex : Exception
raise Exception.new("Multiple lexers match the filename: #{candidates.to_a.join(", ")}, heuristics suggest #{lexer_file_name} but there is no matching lexer.")
end
end
RegexLexer.from_xml(LexerFiles.get("/#{lexer_file_name}.xml").gets_to_end)
end
private def self.lexer_by_content(fname : String) : String?
h = Linguist::Heuristic.from_yaml(LexerFiles.get("/heuristics.yml").gets_to_end)
result = h.run(fname, File.read(fname))
case result
when Nil
raise Exception.new "No lexer found for #{fname}"
when String
result.as(String)
when Array(String)
result.first
end
end
private def self.create_delegating_lexer(name : String) : BaseLexer
language, root = name.split("+", 2)
language_lexer = lexer(language)
root_lexer = lexer(root)
DelegatingLexer.new(language_lexer, root_lexer)
end
# Return a list of all lexers
def self.lexers : Array(String)
LEXERS_BY_NAME.keys.sort!
end
# A token, the output of the tokenizer
alias Token = NamedTuple(type: String, value: String)
abstract class BaseTokenizer
end
class Tokenizer < BaseTokenizer
include Iterator(Token)
property lexer : BaseLexer
property text : Bytes
property pos : Int32 = 0
@dq = Deque(Token).new
property state_stack = ["root"]
def initialize(@lexer : BaseLexer, text : String, secondary = false)
# Respect the `ensure_nl` config option
if text.size > 0 && text[-1] != '\n' && @lexer.config[:ensure_nl] && !secondary
text += "\n"
end
@text = text.to_slice
end
def next : Iterator::Stop | Token
if @dq.size > 0
return @dq.shift
end
if pos == @text.size
return stop
end
matched = false
while @pos < @text.size
@lexer.states[@state_stack.last].rules.each do |rule|
matched, new_pos, new_tokens = rule.match(@text, @pos, self)
if matched
@pos = new_pos
split_tokens(new_tokens).each { |token| @dq << token }
break
end
end
if !matched
if @text[@pos] == 10u8
@dq << {type: "Text", value: "\n"}
@state_stack = ["root"]
else
@dq << {type: "Error", value: String.new(@text[@pos..@pos])}
end
@pos += 1
break
end
end
self.next
end
# If a token contains a newline, split it into two tokens
def split_tokens(tokens : Array(Token)) : Array(Token)
split_tokens = [] of Token
tokens.each do |token|
if token[:value].includes?("\n")
values = token[:value].split("\n")
values.each_with_index do |value, index|
value += "\n" if index < values.size - 1
split_tokens << {type: token[:type], value: value}
end
else
split_tokens << token
end
end
split_tokens
end
end
alias BaseLexer = Lexer
abstract class Lexer
property config = {
name: "",
priority: 0.0,
case_insensitive: false,
dot_all: false,
not_multiline: false,
ensure_nl: false,
}
property states = {} of String => State
def tokenizer(text : String, secondary = false) : BaseTokenizer
Tokenizer.new(self, text, secondary)
end
end
# This implements a lexer for Pygments RegexLexers as expressed
# in Chroma's XML serialization.
#
# For explanations on what actions and states do
# the Pygments documentation is a good place to start.
# https://pygments.org/docs/lexerdevelopment/
class RegexLexer < BaseLexer
# Collapse consecutive tokens of the same type for easier comparison
# and smaller output
def self.collapse_tokens(tokens : Array(Tartrazine::Token)) : Array(Tartrazine::Token)
result = [] of Tartrazine::Token
tokens = tokens.reject { |token| token[:value] == "" }
tokens.each do |token|
if result.empty?
result << token
next
end
last = result.last
if last[:type] == token[:type]
new_token = {type: last[:type], value: last[:value] + token[:value]}
result.pop
result << new_token
else
result << token
end
end
result
end
def self.from_xml(xml : String) : Lexer
l = RegexLexer.new
lexer = XML.parse(xml).first_element_child
if lexer
config = lexer.children.find { |node|
node.name == "config"
}
if config
l.config = {
name: xml_to_s(config, name) || "",
priority: xml_to_f(config, priority) || 0.0,
not_multiline: xml_to_s(config, not_multiline) == "true",
dot_all: xml_to_s(config, dot_all) == "true",
case_insensitive: xml_to_s(config, case_insensitive) == "true",
ensure_nl: xml_to_s(config, ensure_nl) == "true",
}
end
rules = lexer.children.find { |node|
node.name == "rules"
}
if rules
# Rules contains states 🤷
rules.children.select { |node|
node.name == "state"
}.each do |state_node|
state = State.new
state.name = state_node["name"]
if l.states.has_key?(state.name)
raise Exception.new("Duplicate state: #{state.name}")
else
l.states[state.name] = state
end
# And states contain rules 🤷
state_node.children.select { |node|
node.name == "rule"
}.each do |rule_node|
case rule_node["pattern"]?
when nil
if rule_node.first_element_child.try &.name == "include"
rule = IncludeStateRule.new(rule_node)
else
rule = UnconditionalRule.new(rule_node)
end
else
rule = Rule.new(rule_node,
multiline: !l.config[:not_multiline],
dotall: l.config[:dot_all],
ignorecase: l.config[:case_insensitive])
end
state.rules << rule
end
end
end
end
l
end
end
# A lexer that takes two lexers as arguments. A root lexer
# and a language lexer. Everything is scalled using the
# language lexer, afterwards all `Other` tokens are lexed
# using the root lexer.
#
# This is useful for things like template languages, where
# you have Jinja + HTML or Jinja + CSS and so on.
class DelegatingLexer < Lexer
property language_lexer : BaseLexer
property root_lexer : BaseLexer
def initialize(@language_lexer : BaseLexer, @root_lexer : BaseLexer)
end
def tokenizer(text : String, secondary = false) : DelegatingTokenizer
DelegatingTokenizer.new(self, text, secondary)
end
end
# This Tokenizer works with a DelegatingLexer. It first tokenizes
# using the language lexer, and "Other" tokens are tokenized using
# the root lexer.
class DelegatingTokenizer < BaseTokenizer
include Iterator(Token)
@dq = Deque(Token).new
@language_tokenizer : BaseTokenizer
def initialize(@lexer : DelegatingLexer, text : String, secondary = false)
# Respect the `ensure_nl` config option
if text.size > 0 && text[-1] != '\n' && @lexer.config[:ensure_nl] && !secondary
text += "\n"
end
@language_tokenizer = @lexer.language_lexer.tokenizer(text, true)
end
def next : Iterator::Stop | Token
if @dq.size > 0
return @dq.shift
end
token = @language_tokenizer.next
if token.is_a? Iterator::Stop
return stop
elsif token.as(Token).[:type] == "Other"
root_tokenizer = @lexer.root_lexer.tokenizer(token.as(Token).[:value], true)
root_tokenizer.each do |root_token|
@dq << root_token
end
else
@dq << token.as(Token)
end
self.next
end
end
# A Lexer state. A state has a name and a list of rules.
# The state machine has a state stack containing references
# to states to decide which rules to apply.
struct State
property name : String = ""
property rules = [] of BaseRule
def +(other : State)
new_state = State.new
new_state.name = Random.base58(8)
new_state.rules = rules + other.rules
new_state
end
end
class CustomCrystalHighlighter < Crystal::SyntaxHighlighter
@tokens = [] of Token
def render_delimiter(&block)
@tokens << {type: "LiteralString", value: block.call.to_s}
end
def render_interpolation(&block)
@tokens << {type: "LiteralStringInterpol", value: "\#{"}
@tokens << {type: "Text", value: block.call.to_s}
@tokens << {type: "LiteralStringInterpol", value: "}"}
end
def render_string_array(&block)
@tokens << {type: "LiteralString", value: block.call.to_s}
end
# ameba:disable Metrics/CyclomaticComplexity
def render(type : TokenType, value : String)
case type
when .comment?
@tokens << {type: "Comment", value: value}
when .number?
@tokens << {type: "LiteralNumber", value: value}
when .char?
@tokens << {type: "LiteralStringChar", value: value}
when .symbol?
@tokens << {type: "LiteralStringSymbol", value: value}
when .const?
@tokens << {type: "NameConstant", value: value}
when .string?
@tokens << {type: "LiteralString", value: value}
when .ident?
@tokens << {type: "NameVariable", value: value}
when .keyword?, .self?
@tokens << {type: "NameKeyword", value: value}
when .primitive_literal?
@tokens << {type: "Literal", value: value}
when .operator?
@tokens << {type: "Operator", value: value}
when Crystal::SyntaxHighlighter::TokenType::DELIMITED_TOKEN, Crystal::SyntaxHighlighter::TokenType::DELIMITER_START, Crystal::SyntaxHighlighter::TokenType::DELIMITER_END
@tokens << {type: "LiteralString", value: value}
else
@tokens << {type: "Text", value: value}
end
end
end
class CrystalTokenizer < Tartrazine::BaseTokenizer
include Iterator(Token)
@hl = CustomCrystalHighlighter.new
@lexer : BaseLexer
@iter : Iterator(Token)
# delegate next, to: @iter
def initialize(@lexer : BaseLexer, text : String, secondary = false)
# Respect the `ensure_nl` config option
if text.size > 0 && text[-1] != '\n' && @lexer.config[:ensure_nl] && !secondary
text += "\n"
end
# Just do the tokenizing
@hl.highlight(text)
@iter = @hl.@tokens.each
end
def next : Iterator::Stop | Token
@iter.next
end
end
class CrystalLexer < BaseLexer
def tokenizer(text : String, secondary = false) : BaseTokenizer
CrystalTokenizer.new(self, text, secondary)
end
end
end

File diff suppressed because one or more lines are too long

View File

@@ -1,11 +0,0 @@
from flask import Flask, request
app = Flask("{{name}}")
@app.route('/')
def handle():
return "Hello World from Flask!"
@app.route('/ping')
def ping():
return "OK"

View File

@@ -1 +0,0 @@
[{"type":"KeywordNamespace","value":"from"},{"type":"Text","value":" "},{"type":"NameNamespace","value":"flask"},{"type":"Text","value":" "},{"type":"KeywordNamespace","value":"import"},{"type":"Text","value":" "},{"type":"Name","value":"Flask"},{"type":"Punctuation","value":","},{"type":"Text","value":" "},{"type":"Name","value":"request"},{"type":"Text","value":"\n\n"},{"type":"Name","value":"app"},{"type":"Text","value":" "},{"type":"Operator","value":"="},{"type":"Text","value":" "},{"type":"Name","value":"Flask"},{"type":"Punctuation","value":"("},{"type":"LiteralStringDouble","value":"\""},{"type":"CommentPreproc","value":"{{"},{"type":"NameVariable","value":"name"},{"type":"CommentPreproc","value":"}}"},{"type":"LiteralStringDouble","value":"\")"},{"type":"Text","value":"\n\n"},{"type":"NameDecorator","value":"@app.route"},{"type":"Punctuation","value":"("},{"type":"LiteralStringSingle","value":"'/'"},{"type":"Punctuation","value":")"},{"type":"Text","value":"\n"},{"type":"Keyword","value":"def"},{"type":"Text","value":" "},{"type":"NameFunction","value":"handle"},{"type":"Punctuation","value":"():"},{"type":"Text","value":"\n "},{"type":"Keyword","value":"return"},{"type":"Text","value":" "},{"type":"LiteralStringDouble","value":"\"Hello World from Flask!\""},{"type":"Text","value":"\n\n"},{"type":"NameDecorator","value":"@app.route"},{"type":"Punctuation","value":"("},{"type":"LiteralStringSingle","value":"'/ping'"},{"type":"Punctuation","value":")"},{"type":"Text","value":"\n"},{"type":"Keyword","value":"def"},{"type":"Text","value":" "},{"type":"NameFunction","value":"ping"},{"type":"Punctuation","value":"():"},{"type":"Text","value":"\n "},{"type":"Keyword","value":"return"},{"type":"Text","value":" "},{"type":"LiteralStringDouble","value":"\"OK\""},{"type":"Text","value":"\n"}]

View File

@@ -3,12 +3,6 @@ require "./spec_helper"
# These are the testcases from Pygments
testcases = Dir.glob("#{__DIR__}/tests/**/*txt").sort
# These are custom testcases
examples = Dir.glob("#{__DIR__}/examples/**/*.*").reject(&.ends_with? ".json").sort!
# CSS Stylesheets
css_files = Dir.glob("#{__DIR__}/css/*.css")
# These lexers don't load because of parsing issues
failing_lexers = {
"webgpu_shading_language",
@@ -34,7 +28,6 @@ bad_in_chroma = {
"#{__DIR__}/tests/octave/test_multilinecomment.txt",
"#{__DIR__}/tests/php/test_string_escaping_run.txt",
"#{__DIR__}/tests/python_2/test_cls_builtin.txt",
"#{__DIR__}/tests/bqn/test_syntax_roles.txt", # This one only fails in CI
}
known_bad = {
@@ -57,14 +50,6 @@ not_my_fault = {
describe Tartrazine do
describe "Lexer" do
examples.each do |example|
it "parses #{example}".split("/")[-2...].join("/") do
lexer = Tartrazine.lexer(name: File.basename(File.dirname(example)).downcase)
text = File.read(example)
expected = Array(Tartrazine::Token).from_json(File.read("#{example}.json"))
Tartrazine::RegexLexer.collapse_tokens(lexer.tokenizer(text).to_a).should eq expected
end
end
testcases.each do |testcase|
if known_bad.includes?(testcase)
pending "parses #{testcase}".split("/")[-2...].join("/") do
@@ -83,48 +68,12 @@ describe Tartrazine do
end
end
end
describe "formatter" do
css_files.each do |css_file|
it "generates #{css_file}" do
css = File.read(css_file)
theme = Tartrazine.theme(File.basename(css_file, ".css"))
formatter = Tartrazine::Html.new(theme: theme)
formatter.style_defs.strip.should eq css.strip
end
end
end
describe "to_html" do
it "should do basic highlighting" do
html = Tartrazine.to_html("puts 'Hello, World!'", "ruby", standalone: false)
html.should eq(
"<pre class=\"b\" ><code class=\"b\"><span class=\"nb\">puts</span><span class=\"t\"> </span><span class=\"lss\">&#39;Hello, World!&#39;</span></code></pre>"
)
end
end
describe "to_ansi" do
it "should do basic highlighting" do
ansi = Tartrazine.to_ansi("puts 'Hello, World!'", "ruby")
if ENV.fetch("CI", nil)
# In Github Actions there is no terminal so these don't
# really work
ansi.should eq(
"puts 'Hello, World!'"
)
else
ansi.should eq(
"\e[38;2;171;70;66mputs\e[0m\e[38;2;216;216;216m \e[0m'Hello, World!'"
)
end
end
end
end
# Helper that creates lexer and tokenizes
def tokenize(lexer_name, text)
tokenizer = Tartrazine.lexer(lexer_name).tokenizer(text)
Tartrazine::RegexLexer.collapse_tokens(tokenizer.to_a)
Tartrazine::Lexer.collapse_tokens(tokenizer.to_a)
end
# Helper that tokenizes using chroma to validate the lexer
@@ -136,5 +85,5 @@ def chroma_tokenize(lexer_name, text)
["-f", "json", "-l", lexer_name],
input: input, output: output
)
Tartrazine::RegexLexer.collapse_tokens(Array(Tartrazine::Token).from_json(output.to_s))
Tartrazine::Lexer.collapse_tokens(Array(Tartrazine::Token).from_json(output.to_s))
end

View File

@@ -0,0 +1,16 @@
---input---
@[FOO::Bar::Baz(opt: "xx")]
---tokens---
'@[' Operator
'FOO::Bar::Baz' Name.Decorator
'(' Punctuation
'opt' Literal.String.Symbol
':' Punctuation
' ' Text.Whitespace
'"' Literal.String.Double
'xx' Literal.String.Double
'"' Literal.String.Double
')' Punctuation
']' Operator
'\n' Text.Whitespace

View File

@@ -0,0 +1,11 @@
---input---
[5][5]?
---tokens---
'[' Operator
'5' Literal.Number.Integer
']' Operator
'[' Operator
'5' Literal.Number.Integer
']?' Operator
'\n' Text.Whitespace

View File

@@ -0,0 +1,25 @@
---input---
'a'
'я'
'\u{1234}'
'
'
'abc'
---tokens---
"'a'" Literal.String.Char
'\n' Text.Whitespace
"'я'" Literal.String.Char
'\n' Text.Whitespace
"'\\u{1234}'" Literal.String.Char
'\n' Text.Whitespace
"'\n'" Literal.String.Char
'\n' Text.Whitespace
"'" Error
'abc' Name
"'" Error
'\n' Text.Whitespace

View File

@@ -0,0 +1,14 @@
---input---
HTTP
HTTP::Server.new
---tokens---
'HTTP' Name.Constant
'\n' Text.Whitespace
'HTTP' Name
'::' Operator
'Server' Name
'.' Operator
'new' Name
'\n' Text.Whitespace

View File

@@ -0,0 +1,27 @@
---input---
%()
%[]
%{}
%<>
%||
---tokens---
'%(' Literal.String.Other
')' Literal.String.Other
'\n' Text.Whitespace
'%[' Literal.String.Other
']' Literal.String.Other
'\n' Text.Whitespace
'%{' Literal.String.Other
'}' Literal.String.Other
'\n' Text.Whitespace
'%<' Literal.String.Other
'>' Literal.String.Other
'\n' Text.Whitespace
'%|' Literal.String.Other
'|' Literal.String.Other
'\n' Text.Whitespace

View File

@@ -0,0 +1,19 @@
---input---
str.gsub(%r{\\\\}, "/")
---tokens---
'str' Name
'.' Operator
'gsub' Name
'(' Punctuation
'%r{' Literal.String.Regex
'\\\\' Literal.String.Regex
'\\\\' Literal.String.Regex
'}' Literal.String.Regex
',' Punctuation
' ' Text.Whitespace
'"' Literal.String.Double
'/' Literal.String.Double
'"' Literal.String.Double
')' Punctuation
'\n' Text.Whitespace

View File

@@ -0,0 +1,9 @@
---input---
"\#{a + b}"
---tokens---
'"' Literal.String.Double
'\\#' Literal.String.Escape
'{a + b}' Literal.String.Double
'"' Literal.String.Double
'\n' Text.Whitespace

View File

@@ -0,0 +1,56 @@
---input---
"A#{ (3..5).group_by { |x| x/2}.map do |k,v| "#{k}" end.join }" + "Z"
---tokens---
'"' Literal.String.Double
'A' Literal.String.Double
'#{' Literal.String.Interpol
' ' Text.Whitespace
'(' Punctuation
'3' Literal.Number.Integer
'..' Operator
'5' Literal.Number.Integer
')' Punctuation
'.' Operator
'group_by' Name
' ' Text.Whitespace
'{' Literal.String.Interpol
' ' Text.Whitespace
'|' Operator
'x' Name
'|' Operator
' ' Text.Whitespace
'x' Name
'/' Operator
'2' Literal.Number.Integer
'}' Literal.String.Interpol
'.' Operator
'map' Name
' ' Text.Whitespace
'do' Keyword
' ' Text.Whitespace
'|' Operator
'k' Name
',' Punctuation
'v' Name
'|' Operator
' ' Text.Whitespace
'"' Literal.String.Double
'#{' Literal.String.Interpol
'k' Name
'}' Literal.String.Interpol
'"' Literal.String.Double
' ' Text.Whitespace
'end' Keyword
'.' Operator
'join' Name
' ' Text.Whitespace
'}' Literal.String.Interpol
'"' Literal.String.Double
' ' Text.Whitespace
'+' Operator
' ' Text.Whitespace
'"' Literal.String.Double
'Z' Literal.String.Double
'"' Literal.String.Double
'\n' Text.Whitespace

View File

@@ -0,0 +1,58 @@
---input---
@[Link("some")]
lib LibSome
@[CallConvention("X86_StdCall")]
fun foo="some.foo"(thing : Void*) : LibC::Int
end
---tokens---
'@[' Operator
'Link' Name.Decorator
'(' Punctuation
'"' Literal.String.Double
'some' Literal.String.Double
'"' Literal.String.Double
')' Punctuation
']' Operator
'\n' Text.Whitespace
'lib' Keyword
' ' Text.Whitespace
'LibSome' Name.Namespace
'\n' Text.Whitespace
'@[' Operator
'CallConvention' Name.Decorator
'(' Punctuation
'"' Literal.String.Double
'X86_StdCall' Literal.String.Double
'"' Literal.String.Double
')' Punctuation
']' Operator
'\n' Text.Whitespace
'fun' Keyword
' ' Text.Whitespace
'foo' Name.Function
'=' Operator
'"' Literal.String.Double
'some.foo' Literal.String.Double
'"' Literal.String.Double
'(' Punctuation
'thing' Name
' ' Text.Whitespace
':' Punctuation
' ' Text.Whitespace
'Void' Name
'*' Operator
')' Punctuation
' ' Text.Whitespace
':' Punctuation
' ' Text.Whitespace
'LibC' Name
'::' Operator
'Int' Name
'\n' Text.Whitespace
'end' Keyword
'\n' Text.Whitespace

View File

@@ -0,0 +1,76 @@
---input---
def<=>(other : self) : Int
{%for field in %w(first_name middle_name last_name)%}
cmp={{field.id}}<=>other.{{field.id}}
return cmp if cmp!=0
{%end%}
0
end
---tokens---
'def' Keyword
'<=>' Name.Function
'(' Punctuation
'other' Name
' ' Text.Whitespace
':' Punctuation
' ' Text.Whitespace
'self' Keyword
')' Punctuation
' ' Text.Whitespace
':' Punctuation
' ' Text.Whitespace
'Int' Name
'\n' Text.Whitespace
'{%' Literal.String.Interpol
'for' Keyword
' ' Text.Whitespace
'field' Name
' ' Text.Whitespace
'in' Keyword
' ' Text.Whitespace
'%w(' Literal.String.Other
'first_name middle_name last_name' Literal.String.Other
')' Literal.String.Other
'%}' Literal.String.Interpol
'\n' Text.Whitespace
'cmp' Name
'=' Operator
'{{' Literal.String.Interpol
'field' Name
'.' Operator
'id' Name
'}}' Literal.String.Interpol
'<=>' Operator
'other' Name
'.' Operator
'{{' Literal.String.Interpol
'field' Name
'.' Operator
'id' Name
'}}' Literal.String.Interpol
'\n' Text.Whitespace
'return' Keyword
' ' Text.Whitespace
'cmp' Name
' ' Text.Whitespace
'if' Keyword
' ' Text.Whitespace
'cmp' Name
'!=' Operator
'0' Literal.Number.Integer
'\n' Text.Whitespace
'{%' Literal.String.Interpol
'end' Keyword
'%}' Literal.String.Interpol
'\n' Text.Whitespace
'0' Literal.Number.Integer
'\n' Text.Whitespace
'end' Keyword
'\n' Text.Whitespace

View File

@@ -0,0 +1,84 @@
---input---
# Integers
0
1
1_000_000
1u8
11231231231121312i64
# Floats
0.0
1.0_f32
1_f32
0f64
1e+4
1e111
1_234.567_890
# Error
01
0b2
0x129g2
0o12358
---tokens---
'# Integers' Comment.Single
'\n' Text.Whitespace
'0' Literal.Number.Integer
'\n' Text.Whitespace
'1' Literal.Number.Integer
'\n' Text.Whitespace
'1_000_000' Literal.Number.Integer
'\n' Text.Whitespace
'1u8' Literal.Number.Integer
'\n' Text.Whitespace
'11231231231121312i64' Literal.Number.Integer
'\n\n' Text.Whitespace
'# Floats' Comment.Single
'\n' Text.Whitespace
'0.0' Literal.Number.Float
'\n' Text.Whitespace
'1.0_f32' Literal.Number.Float
'\n' Text.Whitespace
'1_f32' Literal.Number.Float
'\n' Text.Whitespace
'0f64' Literal.Number.Float
'\n' Text.Whitespace
'1e+4' Literal.Number.Float
'\n' Text.Whitespace
'1e111' Literal.Number.Float
'\n' Text.Whitespace
'1_234.567_890' Literal.Number.Float
'\n\n' Text.Whitespace
'# Error' Comment.Single
'\n' Text.Whitespace
'0' Error
'1' Literal.Number.Integer
'\n' Text.Whitespace
'0' Error
'b2' Name
'\n' Text.Whitespace
'0' Error
'x129g2' Name
'\n' Text.Whitespace
'0' Error
'o12358' Name
'\n' Text.Whitespace

View File

@@ -0,0 +1,18 @@
---input---
([] of Int32).[]?(5)
---tokens---
'(' Punctuation
'[' Operator
']' Operator
' ' Text.Whitespace
'of' Keyword
' ' Text.Whitespace
'Int32' Name
')' Punctuation
'.' Operator
'[]?' Name.Operator
'(' Punctuation
'5' Literal.Number.Integer
')' Punctuation
'\n' Text.Whitespace

View File

@@ -0,0 +1,41 @@
---input---
%(hello ("world"))
%[hello ["world"]]
%{hello "world"}
%<hello <"world">>
%|hello "world"|
---tokens---
'%(' Literal.String.Other
'hello ' Literal.String.Other
'(' Literal.String.Other
'"world"' Literal.String.Other
')' Literal.String.Other
')' Literal.String.Other
'\n' Text.Whitespace
'%[' Literal.String.Other
'hello ' Literal.String.Other
'[' Literal.String.Other
'"world"' Literal.String.Other
']' Literal.String.Other
']' Literal.String.Other
'\n' Text.Whitespace
'%{' Literal.String.Other
'hello "world"' Literal.String.Other
'}' Literal.String.Other
'\n' Text.Whitespace
'%<' Literal.String.Other
'hello ' Literal.String.Other
'<' Literal.String.Other
'"world"' Literal.String.Other
'>' Literal.String.Other
'>' Literal.String.Other
'\n' Text.Whitespace
'%|' Literal.String.Other
'hello "world"' Literal.String.Other
'|' Literal.String.Other
'\n' Text.Whitespace

View File

@@ -0,0 +1,31 @@
---input---
%Q(hello \n #{name})
%q(hello \n #{name})
%w(foo\nbar baz)
---tokens---
'%Q(' Literal.String.Other
'hello ' Literal.String.Other
'\\n' Literal.String.Escape
' ' Literal.String.Other
'#{' Literal.String.Interpol
'name' Name
'}' Literal.String.Interpol
')' Literal.String.Other
'\n' Text.Whitespace
'%q(' Literal.String.Other
'hello ' Literal.String.Other
'\\' Literal.String.Other
'n ' Literal.String.Other
'#' Literal.String.Other
'{name}' Literal.String.Other
')' Literal.String.Other
'\n' Text.Whitespace
'%w(' Literal.String.Other
'foo' Literal.String.Other
'\\' Literal.String.Other
'nbar baz' Literal.String.Other
')' Literal.String.Other
'\n' Text.Whitespace

View File

@@ -0,0 +1,20 @@
---input---
record Cls do
def_equals s
end
---tokens---
'record' Name.Builtin.Pseudo
' ' Text.Whitespace
'Cls' Name
' ' Text.Whitespace
'do' Keyword
'\n' Text.Whitespace
'def_equals' Name.Builtin.Pseudo
' ' Text.Whitespace
's' Name
'\n' Text.Whitespace
'end' Keyword
'\n' Text.Whitespace

View File

@@ -0,0 +1,50 @@
---input---
def f(x : T, line = __LINE__) forall T
if x.is_a?(String)
pp! x
end
end
---tokens---
'def' Keyword
' ' Text.Whitespace
'f' Name.Function
'(' Punctuation
'x' Name
' ' Text.Whitespace
':' Punctuation
' ' Text.Whitespace
'T' Name
',' Punctuation
' ' Text.Whitespace
'line' Name
' ' Text.Whitespace
'=' Operator
' ' Text.Whitespace
'__LINE__' Keyword.Pseudo
')' Punctuation
' ' Text.Whitespace
'forall' Keyword.Pseudo
' ' Text.Whitespace
'T' Name
'\n' Text.Whitespace
'if' Keyword
' ' Text.Whitespace
'x' Name
'.is_a?' Keyword.Pseudo
'(' Punctuation
'String' Name
')' Punctuation
'\n' Text.Whitespace
'pp!' Name.Builtin.Pseudo
' ' Text.Whitespace
'x' Name
'\n' Text.Whitespace
'end' Keyword
'\n' Text.Whitespace
'end' Keyword
'\n' Text.Whitespace

View File

@@ -0,0 +1,8 @@
---input---
1...3
---tokens---
'1' Literal.Number.Integer
'...' Operator
'3' Literal.Number.Integer
'\n' Text.Whitespace

View File

@@ -0,0 +1,10 @@
---input---
1 .. 3
---tokens---
'1' Literal.Number.Integer
' ' Text.Whitespace
'..' Operator
' ' Text.Whitespace
'3' Literal.Number.Integer
'\n' Text.Whitespace

View File

@@ -0,0 +1,58 @@
---input---
"a\nz"
"a\az"
"a\xffz"
"a\u1234z"
"a\000z"
"a\u{0}z"
"a\u{10AfF9}z"
---tokens---
'"' Literal.String.Double
'a' Literal.String.Double
'\\n' Literal.String.Escape
'z' Literal.String.Double
'"' Literal.String.Double
'\n' Text.Whitespace
'"' Literal.String.Double
'a' Literal.String.Double
'\\a' Literal.String.Escape
'z' Literal.String.Double
'"' Literal.String.Double
'\n' Text.Whitespace
'"' Literal.String.Double
'a' Literal.String.Double
'\\xff' Literal.String.Escape
'z' Literal.String.Double
'"' Literal.String.Double
'\n' Text.Whitespace
'"' Literal.String.Double
'a' Literal.String.Double
'\\u1234' Literal.String.Escape
'z' Literal.String.Double
'"' Literal.String.Double
'\n' Text.Whitespace
'"' Literal.String.Double
'a' Literal.String.Double
'\\000' Literal.String.Escape
'z' Literal.String.Double
'"' Literal.String.Double
'\n' Text.Whitespace
'"' Literal.String.Double
'a' Literal.String.Double
'\\u{0}' Literal.String.Escape
'z' Literal.String.Double
'"' Literal.String.Double
'\n' Text.Whitespace
'"' Literal.String.Double
'a' Literal.String.Double
'\\u{10AfF9}' Literal.String.Escape
'z' Literal.String.Double
'"' Literal.String.Double
'\n' Text.Whitespace

View File

@@ -0,0 +1,20 @@
---input---
:sym_bol
:あ
:question?
:"symbol"
---tokens---
':sym_bol' Literal.String.Symbol
'\n' Text.Whitespace
':あ' Literal.String.Symbol
'\n' Text.Whitespace
':question?' Literal.String.Symbol
'\n' Text.Whitespace
':"' Literal.String.Symbol
'symbol' Literal.String.Symbol
'"' Literal.String.Symbol
'\n' Text.Whitespace

View File

@@ -1,15 +1,6 @@
require "../formatter"
module Tartrazine
def self.to_ansi(text : String, language : String,
theme : String = "default-dark",
line_numbers : Bool = false) : String
Tartrazine::Ansi.new(
theme: Tartrazine.theme(theme),
line_numbers: line_numbers
).format(text, Tartrazine.lexer(name: language))
end
class Ansi < Formatter
property? line_numbers : Bool = false

View File

@@ -3,17 +3,6 @@ require "../formatter"
require "html"
module Tartrazine
def self.to_html(text : String, language : String,
theme : String = "default-dark",
standalone : Bool = true,
line_numbers : Bool = false) : String
Tartrazine::Html.new(
theme: Tartrazine.theme(theme),
standalone: standalone,
line_numbers: line_numbers
).format(text, Tartrazine.lexer(name: language))
end
class Html < Formatter
# property line_number_in_table : Bool = false
# property with_classes : Bool = true

View File

@@ -12,7 +12,7 @@ module Tartrazine
def format(text : String, lexer : BaseLexer, io : IO) : Nil
tokenizer = lexer.tokenizer(text)
io << Tartrazine::RegexLexer.collapse_tokens(tokenizer.to_a).to_json
io << Tartrazine::Lexer.collapse_tokens(tokenizer.to_a).to_json
end
end
end

View File

@@ -1,7 +1,5 @@
require "./constants/lexers"
require "./heuristics"
require "baked_file_system"
require "crystal/syntax_highlighter"
require "./constants/lexers"
module Tartrazine
class LexerFiles
@@ -16,30 +14,25 @@ module Tartrazine
return lexer_by_filename(filename) if filename
return lexer_by_mimetype(mimetype) if mimetype
RegexLexer.from_xml(LexerFiles.get("/#{LEXERS_BY_NAME["plaintext"]}.xml").gets_to_end)
Lexer.from_xml(LexerFiles.get("/#{LEXERS_BY_NAME["plaintext"]}.xml").gets_to_end)
end
private def self.lexer_by_mimetype(mimetype : String) : BaseLexer
lexer_file_name = LEXERS_BY_MIMETYPE.fetch(mimetype, nil)
raise Exception.new("Unknown mimetype: #{mimetype}") if lexer_file_name.nil?
RegexLexer.from_xml(LexerFiles.get("/#{lexer_file_name}.xml").gets_to_end)
Lexer.from_xml(LexerFiles.get("/#{lexer_file_name}.xml").gets_to_end)
end
private def self.lexer_by_name(name : String) : BaseLexer
return CrystalLexer.new if name == "crystal"
lexer_file_name = LEXERS_BY_NAME.fetch(name.downcase, nil)
return create_delegating_lexer(name) if lexer_file_name.nil? && name.includes? "+"
raise Exception.new("Unknown lexer: #{name}") if lexer_file_name.nil?
RegexLexer.from_xml(LexerFiles.get("/#{lexer_file_name}.xml").gets_to_end)
Lexer.from_xml(LexerFiles.get("/#{lexer_file_name}.xml").gets_to_end)
end
private def self.lexer_by_filename(filename : String) : BaseLexer
if filename.ends_with?(".cr")
return CrystalLexer.new
end
candidates = Set(String).new
LEXERS_BY_FILENAME.each do |k, v|
candidates += v.to_set if File.match?(k, File.basename(filename))
@@ -59,7 +52,7 @@ module Tartrazine
end
end
RegexLexer.from_xml(LexerFiles.get("/#{lexer_file_name}.xml").gets_to_end)
Lexer.from_xml(LexerFiles.get("/#{lexer_file_name}.xml").gets_to_end)
end
private def self.lexer_by_content(fname : String) : String?
@@ -159,9 +152,7 @@ module Tartrazine
end
end
alias BaseLexer = Lexer
abstract class Lexer
abstract class BaseLexer
property config = {
name: "",
priority: 0.0,
@@ -183,7 +174,7 @@ module Tartrazine
# For explanations on what actions and states do
# the Pygments documentation is a good place to start.
# https://pygments.org/docs/lexerdevelopment/
class RegexLexer < BaseLexer
class Lexer < BaseLexer
# Collapse consecutive tokens of the same type for easier comparison
# and smaller output
def self.collapse_tokens(tokens : Array(Tartrazine::Token)) : Array(Tartrazine::Token)
@@ -207,7 +198,7 @@ module Tartrazine
end
def self.from_xml(xml : String) : Lexer
l = RegexLexer.new
l = Lexer.new
lexer = XML.parse(xml).first_element_child
if lexer
config = lexer.children.find { |node|
@@ -272,7 +263,7 @@ module Tartrazine
#
# This is useful for things like template languages, where
# you have Jinja + HTML or Jinja + CSS and so on.
class DelegatingLexer < Lexer
class DelegatingLexer < BaseLexer
property language_lexer : BaseLexer
property root_lexer : BaseLexer
@@ -333,81 +324,4 @@ module Tartrazine
new_state
end
end
class CustomCrystalHighlighter < Crystal::SyntaxHighlighter
@tokens = [] of Token
def render_delimiter(&block)
@tokens << {type: "LiteralString", value: block.call.to_s}
end
def render_interpolation(&block)
@tokens << {type: "LiteralStringInterpol", value: "\#{"}
@tokens << {type: "Text", value: block.call.to_s}
@tokens << {type: "LiteralStringInterpol", value: "}"}
end
def render_string_array(&block)
@tokens << {type: "LiteralString", value: block.call.to_s}
end
# ameba:disable Metrics/CyclomaticComplexity
def render(type : TokenType, value : String)
case type
when .comment?
@tokens << {type: "Comment", value: value}
when .number?
@tokens << {type: "LiteralNumber", value: value}
when .char?
@tokens << {type: "LiteralStringChar", value: value}
when .symbol?
@tokens << {type: "LiteralStringSymbol", value: value}
when .const?
@tokens << {type: "NameConstant", value: value}
when .string?
@tokens << {type: "LiteralString", value: value}
when .ident?
@tokens << {type: "NameVariable", value: value}
when .keyword?, .self?
@tokens << {type: "NameKeyword", value: value}
when .primitive_literal?
@tokens << {type: "Literal", value: value}
when .operator?
@tokens << {type: "Operator", value: value}
when Crystal::SyntaxHighlighter::TokenType::DELIMITED_TOKEN, Crystal::SyntaxHighlighter::TokenType::DELIMITER_START, Crystal::SyntaxHighlighter::TokenType::DELIMITER_END
@tokens << {type: "LiteralString", value: value}
else
@tokens << {type: "Text", value: value}
end
end
end
class CrystalTokenizer < Tartrazine::BaseTokenizer
include Iterator(Token)
@hl = CustomCrystalHighlighter.new
@lexer : BaseLexer
@iter : Iterator(Token)
# delegate next, to: @iter
def initialize(@lexer : BaseLexer, text : String, secondary = false)
# Respect the `ensure_nl` config option
if text.size > 0 && text[-1] != '\n' && @lexer.config[:ensure_nl] && !secondary
text += "\n"
end
# Just do the tokenizing
@hl.highlight(text)
@iter = @hl.@tokens.each
end
def next : Iterator::Stop | Token
@iter.next
end
end
class CrystalLexer < BaseLexer
def tokenizer(text : String, secondary = false) : BaseTokenizer
CrystalTokenizer.new(self, text, secondary)
end
end
end

View File

@@ -1,5 +1,5 @@
require "docopt"
require "./tartrazine"
require "./**"
HELP = <<-HELP
tartrazine: a syntax highlighting tool

View File

@@ -17,6 +17,7 @@ module Tartrazine
abstract struct BaseRule
abstract def match(text : Bytes, pos : Int32, tokenizer : Tokenizer) : Tuple(Bool, Int32, Array(Token))
abstract def initialize(node : XML::Node)
@actions : Array(Action) = [] of Action
@@ -39,6 +40,9 @@ module Tartrazine
return true, pos + match[0].size, @actions.flat_map(&.emit(match, tokenizer))
end
def initialize(node : XML::Node)
end
def initialize(node : XML::Node, multiline, dotall, ignorecase)
pattern = node["pattern"]
pattern = "(?m)" + pattern if multiline

View File

@@ -1 +1 @@
require "../spec/tartrazine_spec.cr"
require "../spec/**"

View File

@@ -84,6 +84,27 @@ module Tartrazine
property styles = {} of String => Style
# Get the style for a token.
def style(token)
styles[token] = Style.new unless styles.has_key?(token)
s = styles[token]
# We already got the data from the style hierarchy
return s if s.complete?
# Form the hierarchy of parent styles
parents = style_parents(token)
s = parents.map do |parent|
styles[parent]
end.reduce(s) do |acc, style|
acc + style
end
s.complete = true
styles[token] = s
s
end
def style_parents(token)
parents = ["Background"]
parts = token.underscore.split("_").map(&.capitalize)

View File

@@ -1,6 +1,5 @@
require "./actions"
require "./formatter"
require "./formatters/**"
require "./rules"
require "./styles"
require "./tartrazine"

13485
x2.html Normal file

File diff suppressed because it is too large Load Diff