mirror of
https://github.com/ralsina/tartrazine.git
synced 2025-07-01 12:27:08 -03:00
Compare commits
169 Commits
v0.1.1
...
d1ce83a9c8
Author | SHA1 | Date | |
---|---|---|---|
d1ce83a9c8 | |||
aabe028767 | |||
ed61a84553 | |||
b7e4aaa1f9 | |||
0f6b9b0117 | |||
b81e9c4405 | |||
9c85c6cf18 | |||
d4e189e596 | |||
71fb699f96 | |||
5fe309f24c | |||
62db71ae4d | |||
fff6cad5ac | |||
44e6af8546 | |||
9e2585a875 | |||
c16b139fa3 | |||
e11775040c | |||
30bc8cccba | |||
1638c253cb | |||
c374f52aee | |||
96fd9bdfe9 | |||
0423811c5d | |||
3d9d3ab5cf | |||
92a97490f1 | |||
22decedf3a | |||
8b34a1659d | |||
3bf8172b89 | |||
4432da2893 | |||
6a6827f26a | |||
766f9b4708 | |||
9d49ff78d6 | |||
fb924543a0 | |||
09d4b7b02e | |||
08e81683ca | |||
9c70fbf389 | |||
d26393d8c9 | |||
c95658320c | |||
ca988defc1 | |||
687c6c81df | |||
ac8b7e3800 | |||
e288a55812 | |||
11cb5fc48e | |||
bf2f08c1d0 | |||
84980459cf | |||
c011bd8347 | |||
6a38f2f5fb | |||
c4a2d1a752 | |||
358be51e27 | |||
2cff0fea48 | |||
40202eb2d6 | |||
3ed4a7eab8 | |||
6f797f999a | |||
b762307660 | |||
eb0cc089a9 | |||
88f2aace20 | |||
fe943fa399 | |||
08f8138e05 | |||
3c0b3c38e2 | |||
a1318501a5 | |||
daf24189bf | |||
3d3f9fcc24 | |||
a583b7359e | |||
de2a4a1996 | |||
31334ac802 | |||
6d64491938 | |||
fb693bb221 | |||
c6824a99df | |||
4dd2e925b0 | |||
7bda19cdea | |||
0e7dafe711 | |||
082241eb0f | |||
df88047ca8 | |||
5a3b50d7a3 | |||
a5926af518 | |||
fc9f834bc8 | |||
58fd42d936 | |||
5a88a51f3e | |||
fd7c6fa4b3 | |||
6264bfc754 | |||
38196d6e96 | |||
c6cd74e339 | |||
17c66a6572 | |||
cd7e150aae | |||
176b8e9bc9 | |||
d8ddf5d8b6 | |||
06556877ef | |||
3d5d073471 | |||
a2884c4c78 | |||
bd3df10d2c | |||
0f3b7fc3c5 | |||
7f4296e9d7 | |||
f883065092 | |||
746abe53ea | |||
90971e8f1b | |||
057879c6ee | |||
215d53e173 | |||
f435d7df21 | |||
5b0a1789dc | |||
76ef1fea41 | |||
3ebedec6c1 | |||
57e63f2308 | |||
4a598a575b | |||
9042138053 | |||
fa647e898a | |||
ad92929a10 | |||
bb952a44b8 | |||
ae03e4612e | |||
471b2f5050 | |||
5a3b08e716 | |||
9ebb9f2765 | |||
7538fc76aa | |||
788577b226 | |||
1f01146b1f | |||
9041b763ea | |||
ada30915c3 | |||
78eff45ea0 | |||
e817aedd60 | |||
20d6b65346 | |||
cb09dff9f1 | |||
b589726352 | |||
a3a7b5bd9a | |||
58e8dac038 | |||
f72a40f095 | |||
bf257a5b82 | |||
029495590c | |||
115debdec6 | |||
4612db58fe | |||
f45a86c83a | |||
27008640a6 | |||
7db8fdc9e4 | |||
ad664d9f93 | |||
0626c8619f | |||
3725201f8a | |||
6f64b76c44 | |||
5218af6855 | |||
c898f395a1 | |||
56e49328fb | |||
8d7faf2098 | |||
2e87762f1b | |||
88f5674917 | |||
ce6f3d29b5 | |||
46d6d3f467 | |||
78ddc69937 | |||
b1ad7b64c0 | |||
cbedf8a8db | |||
ec8c53c823 | |||
e3a1ce37b4 | |||
b4f38e00e1 | |||
08daabe1c3 | |||
e8d405fc99 | |||
e295256573 | |||
e40c8b586c | |||
bc34f93cc5 | |||
f64c91801e | |||
8e29500fcf | |||
f2e638ce3b | |||
84ee7e6934 | |||
89d212b71c | |||
a92d2501f7 | |||
6b44bcb5ad | |||
86a5894429 | |||
be12e0f4f1 | |||
96dcb7e15e | |||
d1762f477a | |||
f98f44365f | |||
d0c2b1764a | |||
e6a292ade0 | |||
4ced996f90 | |||
fd5af6ba3b | |||
47237eecc3 |
21
.ameba.yml
21
.ameba.yml
@ -1,15 +1,15 @@
|
||||
# This configuration file was generated by `ameba --gen-config`
|
||||
# on 2024-08-04 23:09:09 UTC using Ameba version 1.6.1.
|
||||
# on 2024-09-21 14:59:30 UTC using Ameba version 1.6.1.
|
||||
# The point is for the user to remove these configuration records
|
||||
# one by one as the reported problems are removed from the code base.
|
||||
|
||||
# Problems found: 2
|
||||
# Problems found: 3
|
||||
# Run `ameba --only Documentation/DocumentationAdmonition` for details
|
||||
Documentation/DocumentationAdmonition:
|
||||
Description: Reports documentation admonitions
|
||||
Timezone: UTC
|
||||
Excluded:
|
||||
- src/tartrazine.cr
|
||||
- src/lexer.cr
|
||||
- src/actions.cr
|
||||
Admonitions:
|
||||
- TODO
|
||||
@ -17,3 +17,18 @@ Documentation/DocumentationAdmonition:
|
||||
- BUG
|
||||
Enabled: true
|
||||
Severity: Warning
|
||||
|
||||
# Problems found: 1
|
||||
# Run `ameba --only Lint/SpecFilename` for details
|
||||
Lint/SpecFilename:
|
||||
Description: Enforces spec filenames to have `_spec` suffix
|
||||
Excluded:
|
||||
- spec/examples/crystal/hello.cr
|
||||
IgnoredDirs:
|
||||
- spec/support
|
||||
- spec/fixtures
|
||||
- spec/data
|
||||
IgnoredFilenames:
|
||||
- spec_helper
|
||||
Enabled: true
|
||||
Severity: Warning
|
||||
|
26
.github/workflows/ci.yml
vendored
Normal file
26
.github/workflows/ci.yml
vendored
Normal file
@ -0,0 +1,26 @@
|
||||
name: Tests
|
||||
on:
|
||||
# This can't yet run automatically, because tests fail because of
|
||||
# different versions of chroma. Need to get the same one in my
|
||||
# local env and in CI
|
||||
workflow_dispatch:
|
||||
push:
|
||||
permissions:
|
||||
contents: read
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- name: Download source
|
||||
uses: actions/checkout@v4
|
||||
- name: Install Crystal
|
||||
uses: crystal-lang/install-crystal@v1
|
||||
- name: Run tests
|
||||
run: |
|
||||
wget https://github.com/alecthomas/chroma/releases/download/v2.14.0/chroma-2.14.0-linux-amd64.tar.gz
|
||||
tar xzvf chroma-2.14.0*gz
|
||||
mkdir ~/.local/bin -p
|
||||
sudo mv chroma ~/.local/bin
|
||||
shards install
|
||||
crystal tool format --check
|
||||
crystal spec -v
|
30
.github/workflows/coverage.yml
vendored
Normal file
30
.github/workflows/coverage.yml
vendored
Normal file
@ -0,0 +1,30 @@
|
||||
name: Coverage
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
- cron: "0 1 * * *"
|
||||
permissions:
|
||||
contents: read
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- name: Download source
|
||||
uses: actions/checkout@v4
|
||||
- name: Install Crystal
|
||||
uses: crystal-lang/install-crystal@v1
|
||||
- name: Run tests using kcov
|
||||
run: |
|
||||
sudo apt update && sudo apt upgrade && sudo apt install -y kcov
|
||||
wget https://github.com/alecthomas/chroma/releases/download/v2.14.0/chroma-2.14.0-linux-amd64.tar.gz
|
||||
tar xzvf chroma-2.14.0*gz
|
||||
mkdir ~/.local/bin -p
|
||||
sudo mv chroma ~/.local/bin
|
||||
shards install
|
||||
crystal build src/run_tests.cr
|
||||
kcov --clean --include-path=./src $PWD/coverage ./run_tests
|
||||
curl -Os https://uploader.codecov.io/latest/linux/codecov
|
||||
chmod +x codecov
|
||||
./codecov -t ${CODECOV_TOKEN} -s coverage
|
||||
env:
|
||||
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
|
8
.gitignore
vendored
8
.gitignore
vendored
@ -7,3 +7,11 @@ chroma/
|
||||
pygments/
|
||||
shard.lock
|
||||
.vscode/
|
||||
.crystal/
|
||||
venv/
|
||||
.croupier
|
||||
coverage/
|
||||
run_tests
|
||||
|
||||
# We use the internal crystal lexer
|
||||
lexers/crystal.xml
|
||||
|
3
.md.rb
Normal file
3
.md.rb
Normal file
@ -0,0 +1,3 @@
|
||||
exclude_rule 'MD033' # Inline HTML
|
||||
exclude_rule 'MD005' # 3-space indent for lists
|
||||
exclude_rule 'MD024' # Repeated headings
|
35
.pre-commit-config.yaml
Normal file
35
.pre-commit-config.yaml
Normal file
@ -0,0 +1,35 @@
|
||||
# See https://pre-commit.com for more information
|
||||
# See https://pre-commit.com/hooks.html for more hooks
|
||||
|
||||
repos:
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
rev: v4.6.0
|
||||
hooks:
|
||||
- id: trailing-whitespace
|
||||
- id: end-of-file-fixer
|
||||
- id: check-yaml
|
||||
- id: check-added-large-files
|
||||
- id: check-merge-conflict
|
||||
- repo: https://github.com/jumanjihouse/pre-commit-hooks
|
||||
rev: 3.0.0
|
||||
hooks:
|
||||
- id: shellcheck
|
||||
- id: markdownlint
|
||||
exclude: '^content'
|
||||
- repo: https://github.com/mrtazz/checkmake
|
||||
rev: 0.2.2
|
||||
hooks:
|
||||
- id: checkmake
|
||||
exclude: lexers/makefile.xml
|
||||
- repo: https://github.com/python-jsonschema/check-jsonschema
|
||||
rev: 0.29.2
|
||||
hooks:
|
||||
- id: check-github-workflows
|
||||
- repo: https://github.com/commitizen-tools/commitizen
|
||||
rev: v3.29.0 # automatically updated by Commitizen
|
||||
hooks:
|
||||
- id: commitizen
|
||||
- id: commitizen-branch
|
||||
stages:
|
||||
- post-commit
|
||||
- push
|
142
CHANGELOG.md
Normal file
142
CHANGELOG.md
Normal file
@ -0,0 +1,142 @@
|
||||
# Changelog
|
||||
|
||||
All notable changes to this project will be documented in this file.
|
||||
|
||||
## [0.12.0] - 2025-01-21
|
||||
|
||||
### 🚀 Features
|
||||
|
||||
- Bumped to latest chroma release
|
||||
|
||||
### ⚙️ Miscellaneous Tasks
|
||||
|
||||
- Pin ubuntu version in CI
|
||||
- Mark more mcfunction tests as bad
|
||||
|
||||
### Build
|
||||
|
||||
- Automate AUR release
|
||||
|
||||
## [0.11.1] - 2024-10-14
|
||||
|
||||
### 🐛 Bug Fixes
|
||||
|
||||
- Support choosing lexers when used as a library
|
||||
|
||||
## [0.11.0] - 2024-10-14
|
||||
|
||||
### 🚀 Features
|
||||
|
||||
- Support selecting only some themes
|
||||
|
||||
## [0.10.0] - 2024-09-26
|
||||
|
||||
### 🚀 Features
|
||||
|
||||
- Optional conditional baking of lexers
|
||||
|
||||
### 🐛 Bug Fixes
|
||||
|
||||
- Strip binaries for release artifacts
|
||||
- Fix metadata to show crystal
|
||||
|
||||
## [0.9.1] - 2024-09-22
|
||||
|
||||
### 🐛 Bug Fixes
|
||||
|
||||
- Terminal formatter was skipping things that it could highlight
|
||||
- Bug in high-level API for png formatter
|
||||
|
||||
### 🧪 Testing
|
||||
|
||||
- Added minimal tests for svg and png formatters
|
||||
|
||||
## [0.9.0] - 2024-09-21
|
||||
|
||||
### 🚀 Features
|
||||
|
||||
- PNG writer based on Stumpy libs
|
||||
|
||||
### ⚙️ Miscellaneous Tasks
|
||||
|
||||
- Clean
|
||||
- Detect version bump in release script
|
||||
- Improve changelog handling
|
||||
|
||||
## [0.8.0] - 2024-09-21
|
||||
|
||||
### 🚀 Features
|
||||
|
||||
- SVG formatter
|
||||
|
||||
### 🐛 Bug Fixes
|
||||
|
||||
- HTML formatter was setting bold wrong
|
||||
|
||||
### 📚 Documentation
|
||||
|
||||
- Added instructions to add as a dependency
|
||||
|
||||
### 🧪 Testing
|
||||
|
||||
- Add basic tests for crystal and delegating lexers
|
||||
- Added tests for CSS generation
|
||||
|
||||
### ⚙ Miscellaneous Tasks
|
||||
|
||||
- Fix example code in README
|
||||
|
||||
## [0.7.0] - 2024-09-10
|
||||
|
||||
### 🚀 Features
|
||||
|
||||
- Higher level API (`to_html` and `to_ansi`)
|
||||
- Use the native crystal highlighter
|
||||
|
||||
### 🐛 Bug Fixes
|
||||
|
||||
- Ameba
|
||||
- Variable bame in Hacefile
|
||||
- Make it easier to import the Ansi formatter
|
||||
- Renamed BaseLexer to Lexer and Lexer to RegexLexer to make API nicer
|
||||
- Make install work
|
||||
|
||||
### 📚 Documentation
|
||||
|
||||
- Mention AUR package
|
||||
|
||||
### 🧪 Testing
|
||||
|
||||
- Add CI workflows
|
||||
|
||||
### ⚙️ Miscellaneous Tasks
|
||||
|
||||
- Pre-commit hooks
|
||||
- Git-cliff config
|
||||
- Started changelog
|
||||
- Force conventional commit messages
|
||||
- Force conventional commit messages
|
||||
- Updated pre-commit
|
||||
- *(ignore)* Fix tests
|
||||
- Added badges
|
||||
- Added badges
|
||||
- *(ignore)* Removed random file
|
||||
|
||||
### Build
|
||||
|
||||
- Switch from Makefile to Hacefile
|
||||
- Added do_release script
|
||||
- Fix markdown check
|
||||
|
||||
### Bump
|
||||
|
||||
- Release v0.6.4
|
||||
- Release v0.6.4
|
||||
|
||||
## [0.6.1] - 2024-08-25
|
||||
|
||||
### 📚 Documentation
|
||||
|
||||
- Improve readme and help message
|
||||
|
||||
<!-- generated by git-cliff -->
|
137
Hacefile.yml
Normal file
137
Hacefile.yml
Normal file
@ -0,0 +1,137 @@
|
||||
variables:
|
||||
FLAGS: "-d --error-trace"
|
||||
NAME: "tartrazine"
|
||||
|
||||
tasks:
|
||||
build:
|
||||
default: true
|
||||
dependencies:
|
||||
- src
|
||||
- shard.lock
|
||||
- shard.yml
|
||||
- Hacefile.yml
|
||||
- lexers/*xml
|
||||
- styles/*xml
|
||||
outputs:
|
||||
- bin/{{NAME}}
|
||||
commands: |
|
||||
shards build {{FLAGS}}
|
||||
|
||||
get-deps:
|
||||
dependencies:
|
||||
- shard.yml
|
||||
outputs:
|
||||
- shard.lock
|
||||
commands: |
|
||||
shards install
|
||||
|
||||
build-release:
|
||||
phony: true
|
||||
always_run: true
|
||||
commands: |
|
||||
hace build FLAGS="--release"
|
||||
|
||||
install:
|
||||
phony: true
|
||||
always_run: true
|
||||
dependencies:
|
||||
- bin/{{NAME}}
|
||||
commands: |
|
||||
rm ${HOME}/.local/bin/{{NAME}} -f
|
||||
cp bin/{{NAME}} ${HOME}/.local/bin/{{NAME}}
|
||||
|
||||
static:
|
||||
outputs:
|
||||
- bin/{{NAME}}-static-linux-amd64
|
||||
- bin/{{NAME}}-static-linux-arm64
|
||||
commands: |
|
||||
hace clean
|
||||
./build_static.sh
|
||||
|
||||
test:
|
||||
dependencies:
|
||||
- src
|
||||
- spec
|
||||
- shard.lock
|
||||
- shard.yml
|
||||
commands: |
|
||||
crystal spec -v --error-trace
|
||||
phony: true
|
||||
always_run: true
|
||||
|
||||
lint:
|
||||
dependencies:
|
||||
- src
|
||||
- spec
|
||||
- shard.lock
|
||||
- shard.yml
|
||||
commands: |
|
||||
crystal tool format src/*.cr spec/*.cr
|
||||
ameba --fix
|
||||
always_run: true
|
||||
phony: true
|
||||
|
||||
docs:
|
||||
dependencies:
|
||||
- src
|
||||
- shard.lock
|
||||
- shard.yml
|
||||
- README.md
|
||||
commands: |
|
||||
crystal docs
|
||||
outputs:
|
||||
- docs/index.html
|
||||
|
||||
pre-commit:
|
||||
default: true
|
||||
outputs:
|
||||
- .git/hooks/commit-msg
|
||||
- .git/hooks/pre-commit
|
||||
dependencies:
|
||||
- .pre-commit-config.yaml
|
||||
commands: |
|
||||
pre-commit install --hook-type commit-msg
|
||||
pre-commit install
|
||||
|
||||
clean:
|
||||
phony: true
|
||||
always_run: true
|
||||
commands: |
|
||||
rm -rf shard.lock bin lib
|
||||
|
||||
coverage:
|
||||
dependencies:
|
||||
- src
|
||||
- spec
|
||||
- shard.lock
|
||||
- shard.yml
|
||||
commands: |
|
||||
shards install
|
||||
crystal build -o bin/run_tests src/run_tests.cr
|
||||
rm -rf coverage/
|
||||
mkdir coverage
|
||||
kcov --clean --include-path=./src ${PWD}/coverage ./bin/run_tests
|
||||
outputs:
|
||||
- coverage/index.html
|
||||
|
||||
loc:
|
||||
phony: true
|
||||
always_run: true
|
||||
dependencies:
|
||||
- src
|
||||
commands: |
|
||||
tokei src -e src/constants/
|
||||
|
||||
aur:
|
||||
phony: true
|
||||
always_run: true
|
||||
commands: |
|
||||
rm -rf aur-{{NAME}}
|
||||
git clone ssh://aur@aur.archlinux.org/{{NAME}}.git aur-{{NAME}}
|
||||
sed s/pkgver=.*/pkgver=$(shards version)/ -i aur-{{NAME}}/PKGBUILD
|
||||
sed s/pkgrel=.*/pkgrel=1/ -i aur-{{NAME}}/PKGBUILD
|
||||
cd aur-{{NAME}} && updpkgsums && makepkg --printsrcinfo > .SRCINFO
|
||||
cd aur-{{NAME}} && makepkg -fsr
|
||||
cd aur-{{NAME}} && git add PKGBUILD .SRCINFO
|
||||
cd aur-{{NAME}} && git commit -a -m "Update to $(shards version)"
|
||||
cd aur-{{NAME}} && git push
|
7
Makefile
7
Makefile
@ -1,7 +0,0 @@
|
||||
build: $(wildcard src/**/*.cr) $(wildcard lexers/*xml) $(wildcard styles/*xml) shard.yml
|
||||
shards build -Dstrict_multi_assign -Dno_number_autocast
|
||||
release: $(wildcard src/**/*.cr) $(wildcard lexers/*xml) $(wildcard styles/*xml) shard.yml
|
||||
shards build --release
|
||||
static: $(wildcard src/**/*.cr) $(wildcard lexers/*xml) $(wildcard styles/*xml) shard.yml
|
||||
shards build --release --static
|
||||
strip bin/tartrazine
|
201
README.md
201
README.md
@ -1,57 +1,159 @@
|
||||
# TARTRAZINE
|
||||
|
||||
[](https://github.com/ralsina/tartrazine/actions/workflows/ci.yml)
|
||||
[](https://codecov.io/gh/ralsina/tartrazine)
|
||||
|
||||
Tartrazine is a library to syntax-highlight code. It is
|
||||
a port of [Pygments](https://pygments.org/) to
|
||||
[Crystal](https://crystal-lang.org/). Kind of.
|
||||
[Crystal](https://crystal-lang.org/).
|
||||
|
||||
It's not currently usable because it's not finished, but:
|
||||
It also provides a CLI tool which can be used to highlight many things in many styles.
|
||||
|
||||
* The lexers work for the implemented languages
|
||||
* The provided styles work
|
||||
* There is a very very simple HTML formatter
|
||||
|
||||
# A port of what? Why "kind of"?
|
||||
|
||||
Because I did not read the Pygments code. And this is actually
|
||||
based on [Chroma](https://github.com/alecthomas/chroma) ...
|
||||
although I did not read that code either.
|
||||
|
||||
Chroma has taken most of the Pygments lexers and turned them into
|
||||
XML descriptions. What I did was take those XML files from Chroma
|
||||
and a pile of test cases from Pygments, and I slapped them together
|
||||
until the tests passed and my code produced the same output as
|
||||
Chroma. Think of it as *extreme TDD*.
|
||||
|
||||
Currently the pass rate for tests in the supported languages
|
||||
is `96.8%`, which is *not bad for a couple days hacking*.
|
||||
|
||||
This only covers the RegexLexers, which are the most common ones,
|
||||
but it means the supported languages are a subset of Chroma's, which
|
||||
is a subset of Pygments'.
|
||||
|
||||
Currently Tartrazine supports ... 241 languages.
|
||||
|
||||
It has 332 themes (64 from Chroma, the rest are base16 themes via
|
||||
[Sixteen](https://github.com/ralsina/sixteen)
|
||||
Currently Tartrazine supports 247 languages and has 331 themes (63 from Chroma,
|
||||
the rest are base16 themes via [Sixteen](https://github.com/ralsina/sixteen)
|
||||
|
||||
## Installation
|
||||
|
||||
This will have a CLI tool that can be installed, but it's not
|
||||
there yet.
|
||||
If you are using Arch: Use yay or your favourite AUR helper, package name is `tartrazine`.
|
||||
|
||||
From prebuilt binaries:
|
||||
|
||||
## Usage
|
||||
Each release provides statically-linked binaries that should
|
||||
work on any Linux. Get them from the [releases page](https://github.com/ralsina/tartrazine/releases)
|
||||
and put them in your PATH.
|
||||
|
||||
This works:
|
||||
To build from source:
|
||||
|
||||
1. Clone this repo
|
||||
2. Run `make` to build the `tartrazine` binary
|
||||
3. Copy the binary somewhere in your PATH.
|
||||
|
||||
## Usage as a CLI tool
|
||||
|
||||
Show a syntax highlighted version of a C source file in your terminal:
|
||||
|
||||
```shell
|
||||
tartrazine whatever.c -l c -t catppuccin-macchiato --line-numbers -f terminal
|
||||
```
|
||||
|
||||
Generate a standalone HTML file from a C source file with the syntax highlighted:
|
||||
|
||||
```shell
|
||||
$ tartrazine whatever.c -t catppuccin-macchiato --line-numbers \
|
||||
--standalone -f html -o whatever.html
|
||||
```
|
||||
|
||||
## Usage as a Library
|
||||
|
||||
Add to your `shard.yml`:
|
||||
|
||||
```yaml
|
||||
dependencies:
|
||||
tartrazine:
|
||||
github: ralsina/tartrazine
|
||||
```
|
||||
|
||||
This is the high level API:
|
||||
|
||||
```crystal
|
||||
require "tartrazine"
|
||||
|
||||
lexer = Tartrazine.lexer("crystal")
|
||||
theme = Tartrazine.theme("catppuccin-macchiato")
|
||||
puts Tartrazine::Html.new.format(File.read(ARGV[0]), lexer, theme)
|
||||
html = Tartrazine.to_html(
|
||||
"puts \"Hello, world!\"",
|
||||
language: "crystal",
|
||||
theme: "catppuccin-macchiato",
|
||||
standalone: true,
|
||||
line_numbers: true
|
||||
)
|
||||
```
|
||||
|
||||
This does more or less the same thing, but more manually:
|
||||
|
||||
```crystal
|
||||
lexer = Tartrazine.lexer("crystal")
|
||||
formatter = Tartrazine::Html.new(
|
||||
theme: Tartrazine.theme("catppuccin-macchiato"),
|
||||
line_numbers: true,
|
||||
standalone: true,
|
||||
)
|
||||
puts formatter.format("puts \"Hello, world!\"", lexer)
|
||||
```
|
||||
|
||||
The reason you may want to use the manual version is to reuse
|
||||
the lexer and formatter objects for performance reasons.
|
||||
|
||||
## Choosing what Lexers you want
|
||||
|
||||
By default Tartrazine will support all its lexers by embedding
|
||||
them in the binary. This makes the binary large. If you are
|
||||
using it as a library, you may want to just include a selection of lexers. To do that:
|
||||
|
||||
* Pass the `-Dnolexers` flag to the compiler
|
||||
* Set the `TT_LEXERS` environment variable to a
|
||||
comma-separated list of lexers you want to include.
|
||||
|
||||
|
||||
This builds a binary with only the python, markdown, bash and yaml lexers (enough to highlight this `README.md`):
|
||||
|
||||
```bash
|
||||
> TT_LEXERS=python,markdown,bash,yaml shards build -Dnolexers -d --error-trace
|
||||
Dependencies are satisfied
|
||||
Building: tartrazine
|
||||
```
|
||||
|
||||
## Choosing what themes you want
|
||||
|
||||
Themes come from two places, tartrazine itself and [Sixteen](https://github.com/ralsina/sixteen).
|
||||
|
||||
To only embed selected themes, build your project with the `-Dnothemes` option, and
|
||||
you can set two environment variables to control which themes are included:
|
||||
|
||||
* `TT_THEMES` is a comma-separated list of themes to include from tartrazine (see the styles directory in the source)
|
||||
* `SIXTEEN_THEMES` is a comma-separated list of themes to include from Sixteen (see the base16 directory in the sixteen source)
|
||||
|
||||
For example (using the tartrazine CLI as the project):
|
||||
|
||||
```bash
|
||||
$ TT_THEMES=colorful,autumn SIXTEEN_THEMES=pasque,pico shards build -Dnothemes
|
||||
Dependencies are satisfied
|
||||
Building: tartrazine
|
||||
|
||||
$ ./bin/tartrazine --list-themes
|
||||
autumn
|
||||
colorful
|
||||
pasque
|
||||
pico
|
||||
```
|
||||
|
||||
Be careful not to build without any themes at all, nothing will work.
|
||||
|
||||
## Templates for standalone HTML output
|
||||
|
||||
If you are using the HTML formatter, you can pass a template to use for the output. The template is a string where the following placeholders will be replaced:
|
||||
|
||||
* `{{style_defs}}` will be replaced by the CSS styles needed for the theme
|
||||
* `{{code}}` will be replaced by the highlighted code
|
||||
|
||||
This is an example template that changes the padding around the code:
|
||||
|
||||
```jinja2
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<style>
|
||||
{{style_defs}}
|
||||
pre {
|
||||
padding: 1em;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
{{body}}
|
||||
</body>
|
||||
</html>
|
||||
```
|
||||
|
||||
|
||||
## Contributing
|
||||
|
||||
1. Fork it (<https://github.com/ralsina/tartrazine/fork>)
|
||||
@ -63,3 +165,30 @@ puts Tartrazine::Html.new.format(File.read(ARGV[0]), lexer, theme)
|
||||
## Contributors
|
||||
|
||||
- [Roberto Alsina](https://github.com/ralsina) - creator and maintainer
|
||||
|
||||
## A port of what, and why "kind of"
|
||||
|
||||
Pygments is a staple of the Python ecosystem, and it's great.
|
||||
It lets you highlight code in many languages, and it has many
|
||||
themes. Chroma is "Pygments for Go", it's actually a port of
|
||||
Pygments to Go, and it's great too.
|
||||
|
||||
I wanted that in Crystal, so I started this project. But I did
|
||||
not read much of the Pygments code. Or much of Chroma's.
|
||||
|
||||
Chroma has taken most of the Pygments lexers and turned them into
|
||||
XML descriptions. What I did was take those XML files from Chroma
|
||||
and a pile of test cases from Pygments, and I slapped them together
|
||||
until the tests passed and my code produced the same output as
|
||||
Chroma. Think of it as [*extreme TDD*](https://ralsina.me/weblog/posts/tartrazine-reimplementing-pygments.html)
|
||||
|
||||
Currently the pass rate for tests in the supported languages
|
||||
is `96.8%`, which is *not bad for a couple days hacking*.
|
||||
|
||||
This only covers the RegexLexers, which are the most common ones,
|
||||
but it means the supported languages are a subset of Chroma's, which
|
||||
is a subset of Pygments' and DelegatingLexers (useful for things like template languages)
|
||||
|
||||
Then performance was bad, so I hacked and hacked and made it significantly
|
||||
[faster than chroma](https://ralsina.me/weblog/posts/a-tale-of-optimization.html)
|
||||
which is fun.
|
||||
|
14
TODO.md
14
TODO.md
@ -2,6 +2,14 @@
|
||||
|
||||
## TODO
|
||||
|
||||
* Implement styles
|
||||
* Implement formatters
|
||||
* Implement lexer loader that respects aliases, etc
|
||||
* ✅ Implement styles
|
||||
* ✅ Implement formatters
|
||||
* ✅ Implement CLI
|
||||
* ✅ Implement lexer loader that respects aliases
|
||||
* ✅ Implement lexer loader by file extension
|
||||
* ✅ Add --line-numbers to terminal formatter
|
||||
* ✅ Implement lexer loader by mime type
|
||||
* ✅ Implement Delegating lexers
|
||||
* ✅ Add RstLexer
|
||||
* Add Mako template lexer
|
||||
* ✅ Implement heuristic lexer detection
|
||||
|
@ -7,10 +7,10 @@ docker run --rm --privileged \
|
||||
|
||||
# Build for AMD64
|
||||
docker build . -f Dockerfile.static -t tartrazine-builder
|
||||
docker run -ti --rm -v "$PWD":/app --user="$UID" tartrazine-builder /bin/sh -c "cd /app && rm -rf lib shard.lock && make static"
|
||||
docker run -ti --rm -v "$PWD":/app --user="$UID" tartrazine-builder /bin/sh -c "cd /app && rm -rf lib shard.lock && shards build --static --release && strip bin/tartrazine"
|
||||
mv bin/tartrazine bin/tartrazine-static-linux-amd64
|
||||
|
||||
# Build for ARM64
|
||||
docker build . -f Dockerfile.static --platform linux/arm64 -t tartrazine-builder
|
||||
docker run -ti --rm -v "$PWD":/app --platform linux/arm64 --user="$UID" tartrazine-builder /bin/sh -c "cd /app && rm -rf lib shard.lock && make static"
|
||||
docker run -ti --rm -v "$PWD":/app --platform linux/arm64 --user="$UID" tartrazine-builder /bin/sh -c "cd /app && rm -rf lib shard.lock && shards build --static --release && strip bin/tartrazine"
|
||||
mv bin/tartrazine bin/tartrazine-static-linux-arm64
|
||||
|
79
cliff.toml
Normal file
79
cliff.toml
Normal file
@ -0,0 +1,79 @@
|
||||
# git-cliff ~ default configuration file
|
||||
# https://git-cliff.org/docs/configuration
|
||||
#
|
||||
# Lines starting with "#" are comments.
|
||||
# Configuration options are organized into tables and keys.
|
||||
# See documentation for more information on available options.
|
||||
|
||||
[changelog]
|
||||
# template for the changelog header
|
||||
header = """
|
||||
# Changelog\n
|
||||
All notable changes to this project will be documented in this file.\n
|
||||
"""
|
||||
# template for the changelog body
|
||||
# https://keats.github.io/tera/docs/#introduction
|
||||
body = """
|
||||
{% if version %}\
|
||||
## [{{ version | trim_start_matches(pat="v") }}] - {{ timestamp | date(format="%Y-%m-%d") }}
|
||||
{% else %}\
|
||||
## [unreleased]
|
||||
{% endif %}\
|
||||
{% for group, commits in commits | group_by(attribute="group") %}
|
||||
### {{ group | striptags | trim | upper_first }}
|
||||
{% for commit in commits %}
|
||||
- {% if commit.scope %}*({{ commit.scope }})* {% endif %}\
|
||||
{% if commit.breaking %}[**breaking**] {% endif %}\
|
||||
{{ commit.message | upper_first }}\
|
||||
{% endfor %}
|
||||
{% endfor %}\n
|
||||
"""
|
||||
# template for the changelog footer
|
||||
footer = """
|
||||
<!-- generated by git-cliff -->
|
||||
"""
|
||||
# remove the leading and trailing s
|
||||
trim = true
|
||||
# postprocessors
|
||||
postprocessors = [
|
||||
# { pattern = '<REPO>', replace = "https://github.com/orhun/git-cliff" }, # replace repository URL
|
||||
]
|
||||
|
||||
[git]
|
||||
# parse the commits based on https://www.conventionalcommits.org
|
||||
conventional_commits = true
|
||||
# filter out the commits that are not conventional
|
||||
filter_unconventional = true
|
||||
# process each line of a commit as an individual commit
|
||||
split_commits = false
|
||||
# regex for preprocessing the commit messages
|
||||
commit_preprocessors = [
|
||||
# Replace issue numbers
|
||||
#{ pattern = '\((\w+\s)?#([0-9]+)\)', replace = "([#${2}](<REPO>/issues/${2}))"},
|
||||
# Check spelling of the commit with https://github.com/crate-ci/typos
|
||||
# If the spelling is incorrect, it will be automatically fixed.
|
||||
#{ pattern = '.*', replace_command = 'typos --write-changes -' },
|
||||
]
|
||||
# regex for parsing and grouping commits
|
||||
commit_parsers = [
|
||||
{ message = "^feat", group = "<!-- 0 -->🚀 Features" },
|
||||
{ message = "^fix", group = "<!-- 1 -->🐛 Bug Fixes" },
|
||||
{ message = "^doc", group = "<!-- 3 -->📚 Documentation" },
|
||||
{ message = "^perf", group = "<!-- 4 -->⚡ Performance" },
|
||||
{ message = "^refactor", group = "<!-- 2 -->🚜 Refactor" },
|
||||
{ message = "^style", group = "<!-- 5 -->🎨 Styling" },
|
||||
{ message = "^test", group = "<!-- 6 -->🧪 Testing" },
|
||||
{ message = "^chore\\(release\\): prepare for", skip = true },
|
||||
{ message = "^chore\\(deps.*\\)", skip = true },
|
||||
{ message = "^chore\\(pr\\)", skip = true },
|
||||
{ message = "^chore\\(pull\\)", skip = true },
|
||||
{ message = "^chore|^ci", group = "<!-- 7 -->⚙️ Miscellaneous Tasks" },
|
||||
{ body = ".*security", group = "<!-- 8 -->🛡️ Security" },
|
||||
{ message = "^revert", group = "<!-- 9 -->◀️ Revert" },
|
||||
]
|
||||
# filter out the commits that are not matched by commit parsers
|
||||
filter_commits = false
|
||||
# sort the tags topologically
|
||||
topo_order = false
|
||||
# sort the commits inside sections by oldest/newest order
|
||||
sort_commits = "oldest"
|
15
do_release.sh
Executable file
15
do_release.sh
Executable file
@ -0,0 +1,15 @@
|
||||
#!/bin/bash
|
||||
set e
|
||||
|
||||
PKGNAME=$(basename "$PWD")
|
||||
VERSION=$(git cliff --bumped-version --unreleased |cut -dv -f2)
|
||||
|
||||
sed "s/^version:.*$/version: $VERSION/g" -i shard.yml
|
||||
git add shard.yml
|
||||
hace lint test
|
||||
git cliff --bump -u -p CHANGELOG.md
|
||||
git commit -a -m "bump: Release v$VERSION"
|
||||
hace static
|
||||
git tag "v$VERSION"
|
||||
git push --tags
|
||||
gh release create "v$VERSION" "bin/$PKGNAME-static-linux-amd64" "bin/$PKGNAME-static-linux-arm64" --title "Release v$VERSION" --notes "$(git cliff -l -s all)"
|
BIN
fonts/courier-bold-oblique.pcf.gz
Normal file
BIN
fonts/courier-bold-oblique.pcf.gz
Normal file
Binary file not shown.
BIN
fonts/courier-bold.pcf.gz
Normal file
BIN
fonts/courier-bold.pcf.gz
Normal file
Binary file not shown.
BIN
fonts/courier-oblique.pcf.gz
Normal file
BIN
fonts/courier-oblique.pcf.gz
Normal file
Binary file not shown.
BIN
fonts/courier-regular.pcf.gz
Normal file
BIN
fonts/courier-regular.pcf.gz
Normal file
Binary file not shown.
22
lexers/LICENSE-heuristics
Normal file
22
lexers/LICENSE-heuristics
Normal file
@ -0,0 +1,22 @@
|
||||
Copyright (c) 2017 GitHub, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person
|
||||
obtaining a copy of this software and associated documentation
|
||||
files (the "Software"), to deal in the Software without
|
||||
restriction, including without limitation the rights to use,
|
||||
copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the
|
||||
Software is furnished to do so, subject to the following
|
||||
conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
OTHER DEALINGS IN THE SOFTWARE.
|
129
lexers/LiquidLexer.xml
Normal file
129
lexers/LiquidLexer.xml
Normal file
@ -0,0 +1,129 @@
|
||||
|
||||
<lexer>
|
||||
<config>
|
||||
<name>liquid</name>
|
||||
<alias>liquid</alias>
|
||||
<filename>*.liquid</filename>
|
||||
</config>
|
||||
<rules>
|
||||
<state name="root">
|
||||
<rule pattern="[^{]+"><token type="Text"/></rule>
|
||||
<rule pattern="(\{%)(\s*)"><bygroups><token type="Punctuation"/><token type="TextWhitespace"/></bygroups><push state="tag-or-block"/></rule>
|
||||
<rule pattern="(\{\{)(\s*)([^\s}]+)"><bygroups><token type="Punctuation"/><token type="TextWhitespace"/><usingself state="generic"/></bygroups><push state="output"/></rule>
|
||||
<rule pattern="\{"><token type="Text"/></rule>
|
||||
</state>
|
||||
<state name="tag-or-block">
|
||||
<rule pattern="(if|unless|elsif|case)(?=\s+)"><token type="KeywordReserved"/><push state="condition"/></rule>
|
||||
<rule pattern="(when)(\s+)"><bygroups><token type="KeywordReserved"/><token type="TextWhitespace"/></bygroups><combined state="end-of-block" state="whitespace" state="generic"/></rule>
|
||||
<rule pattern="(else)(\s*)(%\})"><bygroups><token type="KeywordReserved"/><token type="TextWhitespace"/><token type="Punctuation"/></bygroups><pop depth="1"/></rule>
|
||||
<rule pattern="(capture)(\s+)([^\s%]+)(\s*)(%\})"><bygroups><token type="NameTag"/><token type="TextWhitespace"/><usingself state="variable"/><token type="TextWhitespace"/><token type="Punctuation"/></bygroups><pop depth="1"/></rule>
|
||||
<rule pattern="(comment)(\s*)(%\})"><bygroups><token type="NameTag"/><token type="TextWhitespace"/><token type="Punctuation"/></bygroups><push state="comment"/></rule>
|
||||
<rule pattern="(raw)(\s*)(%\})"><bygroups><token type="NameTag"/><token type="TextWhitespace"/><token type="Punctuation"/></bygroups><push state="raw"/></rule>
|
||||
<rule pattern="(end(case|unless|if))(\s*)(%\})"><bygroups><token type="KeywordReserved"/>None<token type="TextWhitespace"/><token type="Punctuation"/></bygroups><pop depth="1"/></rule>
|
||||
<rule pattern="(end([^\s%]+))(\s*)(%\})"><bygroups><token type="NameTag"/>None<token type="TextWhitespace"/><token type="Punctuation"/></bygroups><pop depth="1"/></rule>
|
||||
<rule pattern="(cycle)(\s+)(?:([^\s:]*)(:))?(\s*)"><bygroups><token type="NameTag"/><token type="TextWhitespace"/><usingself state="generic"/><token type="Punctuation"/><token type="TextWhitespace"/></bygroups><push state="variable-tag-markup"/></rule>
|
||||
<rule pattern="([^\s%]+)(\s*)"><bygroups><token type="NameTag"/><token type="TextWhitespace"/></bygroups><push state="tag-markup"/></rule>
|
||||
</state>
|
||||
<state name="output">
|
||||
<rule><include state="whitespace"/></rule>
|
||||
<rule pattern="\}\}"><token type="Punctuation"/><pop depth="1"/></rule>
|
||||
<rule pattern="\|"><token type="Punctuation"/><push state="filters"/></rule>
|
||||
</state>
|
||||
<state name="filters">
|
||||
<rule><include state="whitespace"/></rule>
|
||||
<rule pattern="\}\}"><token type="Punctuation"/><push state="#pop" state="#pop"/></rule>
|
||||
<rule pattern="([^\s|:]+)(:?)(\s*)"><bygroups><token type="NameFunction"/><token type="Punctuation"/><token type="TextWhitespace"/></bygroups><push state="filter-markup"/></rule>
|
||||
</state>
|
||||
<state name="filter-markup">
|
||||
<rule pattern="\|"><token type="Punctuation"/><pop depth="1"/></rule>
|
||||
<rule><include state="end-of-tag"/></rule>
|
||||
<rule><include state="default-param-markup"/></rule>
|
||||
</state>
|
||||
<state name="condition">
|
||||
<rule><include state="end-of-block"/></rule>
|
||||
<rule><include state="whitespace"/></rule>
|
||||
<rule pattern="([^\s=!><]+)(\s*)([=!><]=?)(\s*)(\S+)(\s*)(%\})"><bygroups><usingself state="generic"/><token type="TextWhitespace"/><token type="Operator"/><token type="TextWhitespace"/><usingself state="generic"/><token type="TextWhitespace"/><token type="Punctuation"/></bygroups></rule>
|
||||
<rule pattern="\b!"><token type="Operator"/></rule>
|
||||
<rule pattern="\bnot\b"><token type="OperatorWord"/></rule>
|
||||
<rule pattern="([\w.\'"]+)(\s+)(contains)(\s+)([\w.\'"]+)"><bygroups><usingself state="generic"/><token type="TextWhitespace"/><token type="OperatorWord"/><token type="TextWhitespace"/><usingself state="generic"/></bygroups></rule>
|
||||
<rule><include state="generic"/></rule>
|
||||
<rule><include state="whitespace"/></rule>
|
||||
</state>
|
||||
<state name="generic-value">
|
||||
<rule><include state="generic"/></rule>
|
||||
<rule><include state="end-at-whitespace"/></rule>
|
||||
</state>
|
||||
<state name="operator">
|
||||
<rule pattern="(\s*)((=|!|>|<)=?)(\s*)"><bygroups><token type="TextWhitespace"/><token type="Operator"/>None<token type="TextWhitespace"/></bygroups><pop depth="1"/></rule>
|
||||
<rule pattern="(\s*)(\bcontains\b)(\s*)"><bygroups><token type="TextWhitespace"/><token type="OperatorWord"/><token type="TextWhitespace"/></bygroups><pop depth="1"/></rule>
|
||||
</state>
|
||||
<state name="end-of-tag">
|
||||
<rule pattern="\}\}"><token type="Punctuation"/><pop depth="1"/></rule>
|
||||
</state>
|
||||
<state name="end-of-block">
|
||||
<rule pattern="%\}"><token type="Punctuation"/><push state="#pop" state="#pop"/></rule>
|
||||
</state>
|
||||
<state name="end-at-whitespace">
|
||||
<rule pattern="\s+"><token type="TextWhitespace"/><pop depth="1"/></rule>
|
||||
</state>
|
||||
<state name="param-markup">
|
||||
<rule><include state="whitespace"/></rule>
|
||||
<rule pattern="([^\s=:]+)(\s*)(=|:)"><bygroups><token type="NameAttribute"/><token type="TextWhitespace"/><token type="Operator"/></bygroups></rule>
|
||||
<rule pattern="(\{\{)(\s*)([^\s}])(\s*)(\}\})"><bygroups><token type="Punctuation"/><token type="TextWhitespace"/><usingself state="variable"/><token type="TextWhitespace"/><token type="Punctuation"/></bygroups></rule>
|
||||
<rule><include state="string"/></rule>
|
||||
<rule><include state="number"/></rule>
|
||||
<rule><include state="keyword"/></rule>
|
||||
<rule pattern=","><token type="Punctuation"/></rule>
|
||||
</state>
|
||||
<state name="default-param-markup">
|
||||
<rule><include state="param-markup"/></rule>
|
||||
<rule pattern="."><token type="Text"/></rule>
|
||||
</state>
|
||||
<state name="variable-param-markup">
|
||||
<rule><include state="param-markup"/></rule>
|
||||
<rule><include state="variable"/></rule>
|
||||
<rule pattern="."><token type="Text"/></rule>
|
||||
</state>
|
||||
<state name="tag-markup">
|
||||
<rule pattern="%\}"><token type="Punctuation"/><push state="#pop" state="#pop"/></rule>
|
||||
<rule><include state="default-param-markup"/></rule>
|
||||
</state>
|
||||
<state name="variable-tag-markup">
|
||||
<rule pattern="%\}"><token type="Punctuation"/><push state="#pop" state="#pop"/></rule>
|
||||
<rule><include state="variable-param-markup"/></rule>
|
||||
</state>
|
||||
<state name="keyword">
|
||||
<rule pattern="\b(false|true)\b"><token type="KeywordConstant"/></rule>
|
||||
</state>
|
||||
<state name="variable">
|
||||
<rule pattern="[a-zA-Z_]\w*"><token type="NameVariable"/></rule>
|
||||
<rule pattern="(?<=\w)\.(?=\w)"><token type="Punctuation"/></rule>
|
||||
</state>
|
||||
<state name="string">
|
||||
<rule pattern="'[^']*'"><token type="LiteralStringSingle"/></rule>
|
||||
<rule pattern=""[^"]*""><token type="LiteralStringDouble"/></rule>
|
||||
</state>
|
||||
<state name="number">
|
||||
<rule pattern="\d+\.\d+"><token type="LiteralNumberFloat"/></rule>
|
||||
<rule pattern="\d+"><token type="LiteralNumberInteger"/></rule>
|
||||
</state>
|
||||
<state name="generic">
|
||||
<rule><include state="keyword"/></rule>
|
||||
<rule><include state="string"/></rule>
|
||||
<rule><include state="number"/></rule>
|
||||
<rule><include state="variable"/></rule>
|
||||
</state>
|
||||
<state name="whitespace">
|
||||
<rule pattern="[ \t]+"><token type="TextWhitespace"/></rule>
|
||||
</state>
|
||||
<state name="comment">
|
||||
<rule pattern="(\{%)(\s*)(endcomment)(\s*)(%\})"><bygroups><token type="Punctuation"/><token type="TextWhitespace"/><token type="NameTag"/><token type="TextWhitespace"/><token type="Punctuation"/></bygroups><push state="#pop" state="#pop"/></rule>
|
||||
<rule pattern="."><token type="Comment"/></rule>
|
||||
</state>
|
||||
<state name="raw">
|
||||
<rule pattern="[^{]+"><token type="Text"/></rule>
|
||||
<rule pattern="(\{%)(\s*)(endraw)(\s*)(%\})"><bygroups><token type="Punctuation"/><token type="TextWhitespace"/><token type="NameTag"/><token type="TextWhitespace"/><token type="Punctuation"/></bygroups><pop depth="1"/></rule>
|
||||
<rule pattern="\{"><token type="Text"/></rule>
|
||||
</state>
|
||||
</rules>
|
||||
</lexer>
|
54
lexers/VelocityLexer.xml
Normal file
54
lexers/VelocityLexer.xml
Normal file
@ -0,0 +1,54 @@
|
||||
|
||||
<lexer>
|
||||
<config>
|
||||
<name>Velocity</name>
|
||||
<alias>velocity</alias>
|
||||
<filename>*.vm</filename>
|
||||
<filename>*.fhtml</filename>
|
||||
<dot_all>true</dot_all>
|
||||
</config>
|
||||
<rules>
|
||||
<state name="root">
|
||||
<rule pattern="[^{#$]+"><token type="Other"/></rule>
|
||||
<rule pattern="(#)(\*.*?\*)(#)"><bygroups><token type="CommentPreproc"/><token type="Comment"/><token type="CommentPreproc"/></bygroups></rule>
|
||||
<rule pattern="(##)(.*?$)"><bygroups><token type="CommentPreproc"/><token type="Comment"/></bygroups></rule>
|
||||
<rule pattern="(#\{?)([a-zA-Z_]\w*)(\}?)(\s?\()"><bygroups><token type="CommentPreproc"/><token type="NameFunction"/><token type="CommentPreproc"/><token type="Punctuation"/></bygroups><push state="directiveparams"/></rule>
|
||||
<rule pattern="(#\{?)([a-zA-Z_]\w*)(\}|\b)"><bygroups><token type="CommentPreproc"/><token type="NameFunction"/><token type="CommentPreproc"/></bygroups></rule>
|
||||
<rule pattern="\$!?\{?"><token type="Punctuation"/><push state="variable"/></rule>
|
||||
</state>
|
||||
<state name="variable">
|
||||
<rule pattern="[a-zA-Z_]\w*"><token type="NameVariable"/></rule>
|
||||
<rule pattern="\("><token type="Punctuation"/><push state="funcparams"/></rule>
|
||||
<rule pattern="(\.)([a-zA-Z_]\w*)"><bygroups><token type="Punctuation"/><token type="NameVariable"/></bygroups><push/></rule>
|
||||
<rule pattern="\}"><token type="Punctuation"/><pop depth="1"/></rule>
|
||||
<rule><pop depth="1"/></rule>
|
||||
</state>
|
||||
<state name="directiveparams">
|
||||
<rule pattern="(&&|\|\||==?|!=?|[-<>+*%&|^/])|\b(eq|ne|gt|lt|ge|le|not|in)\b"><token type="Operator"/></rule>
|
||||
<rule pattern="\["><token type="Operator"/><push state="rangeoperator"/></rule>
|
||||
<rule pattern="\b[a-zA-Z_]\w*\b"><token type="NameFunction"/></rule>
|
||||
<rule><include state="funcparams"/></rule>
|
||||
</state>
|
||||
<state name="rangeoperator">
|
||||
<rule pattern="\.\."><token type="Operator"/></rule>
|
||||
<rule><include state="funcparams"/></rule>
|
||||
<rule pattern="\]"><token type="Operator"/><pop depth="1"/></rule>
|
||||
</state>
|
||||
<state name="funcparams">
|
||||
<rule pattern="\$!?\{?"><token type="Punctuation"/><push state="variable"/></rule>
|
||||
<rule pattern="\s+"><token type="Text"/></rule>
|
||||
<rule pattern="[,:]"><token type="Punctuation"/></rule>
|
||||
<rule pattern=""(\\\\|\\[^\\]|[^"\\])*""><token type="LiteralStringDouble"/></rule>
|
||||
<rule pattern="'(\\\\|\\[^\\]|[^'\\])*'"><token type="LiteralStringSingle"/></rule>
|
||||
<rule pattern="0[xX][0-9a-fA-F]+[Ll]?"><token type="LiteralNumber"/></rule>
|
||||
<rule pattern="\b[0-9]+\b"><token type="LiteralNumber"/></rule>
|
||||
<rule pattern="(true|false|null)\b"><token type="KeywordConstant"/></rule>
|
||||
<rule pattern="\("><token type="Punctuation"/><push/></rule>
|
||||
<rule pattern="\)"><token type="Punctuation"/><pop depth="1"/></rule>
|
||||
<rule pattern="\{"><token type="Punctuation"/><push/></rule>
|
||||
<rule pattern="\}"><token type="Punctuation"/><pop depth="1"/></rule>
|
||||
<rule pattern="\["><token type="Punctuation"/><push/></rule>
|
||||
<rule pattern="\]"><token type="Punctuation"/><pop depth="1"/></rule>
|
||||
</state>
|
||||
</rules>
|
||||
</lexer>
|
@ -63,4 +63,3 @@
|
||||
</state>
|
||||
</rules>
|
||||
</lexer>
|
||||
|
||||
|
@ -55,4 +55,3 @@
|
||||
</state>
|
||||
</rules>
|
||||
</lexer>
|
||||
|
||||
|
165
lexers/atl.xml
Normal file
165
lexers/atl.xml
Normal file
@ -0,0 +1,165 @@
|
||||
<lexer>
|
||||
<config>
|
||||
<name>ATL</name>
|
||||
<alias>atl</alias>
|
||||
<filename>*.atl</filename>
|
||||
<mime_type>text/x-atl</mime_type>
|
||||
<dot_all>true</dot_all>
|
||||
</config>
|
||||
<rules>
|
||||
<state name="root">
|
||||
<rule pattern="(--.*?)(\n)">
|
||||
<bygroups>
|
||||
<token type="CommentSingle" />
|
||||
<token type="TextWhitespace" />
|
||||
</bygroups>
|
||||
</rule>
|
||||
<rule pattern="(and|distinct|endif|else|for|foreach|if|implies|in|let|not|or|self|super|then|thisModule|xor)\b">
|
||||
<token type="Keyword" />
|
||||
</rule>
|
||||
<rule pattern="(OclUndefined|true|false|#\w+)\b">
|
||||
<token type="KeywordConstant" />
|
||||
</rule>
|
||||
<rule pattern="(module|query|library|create|from|to|uses)\b">
|
||||
<token type="KeywordNamespace" />
|
||||
</rule>
|
||||
<rule pattern="(do)(\s*)({)">
|
||||
<bygroups>
|
||||
<token type="KeywordNamespace" />
|
||||
<token type="TextWhitespace" />
|
||||
<token type="Punctuation" />
|
||||
</bygroups>
|
||||
</rule>
|
||||
<rule pattern="(abstract|endpoint|entrypoint|lazy|unique)(\s+)">
|
||||
<bygroups>
|
||||
<token type="KeywordDeclaration" />
|
||||
<token type="TextWhitespace" />
|
||||
</bygroups>
|
||||
</rule>
|
||||
<rule pattern="(rule)(\s+)">
|
||||
<bygroups>
|
||||
<token type="KeywordNamespace" />
|
||||
<token type="TextWhitespace" />
|
||||
</bygroups>
|
||||
</rule>
|
||||
<rule pattern="(helper)(\s+)">
|
||||
<bygroups>
|
||||
<token type="KeywordNamespace" />
|
||||
<token type="TextWhitespace" />
|
||||
</bygroups>
|
||||
</rule>
|
||||
<rule pattern="(context)(\s+)">
|
||||
<bygroups>
|
||||
<token type="KeywordNamespace" />
|
||||
<token type="TextWhitespace" />
|
||||
</bygroups>
|
||||
</rule>
|
||||
<rule pattern="(def)(\s*)(:)(\s*)">
|
||||
<bygroups>
|
||||
<token type="KeywordNamespace" />
|
||||
<token type="TextWhitespace" />
|
||||
<token type="Punctuation" />
|
||||
<token type="TextWhitespace" />
|
||||
</bygroups>
|
||||
</rule>
|
||||
<rule pattern="(Bag|Boolean|Integer|OrderedSet|Real|Sequence|Set|String|Tuple)">
|
||||
<token type="KeywordType" />
|
||||
</rule>
|
||||
<rule pattern="(\w+)(\s*)(<-|<:=)">
|
||||
<bygroups>
|
||||
<token type="NameNamespace" />
|
||||
<token type="TextWhitespace" />
|
||||
<token type="Punctuation" />
|
||||
</bygroups>
|
||||
</rule>
|
||||
<rule pattern="#"">
|
||||
<token type="KeywordConstant" />
|
||||
<push state="quotedenumliteral" />
|
||||
</rule>
|
||||
<rule pattern=""">
|
||||
<token type="NameNamespace" />
|
||||
<push state="quotedname" />
|
||||
</rule>
|
||||
<rule pattern="[^\S\n]+">
|
||||
<token type="TextWhitespace" />
|
||||
</rule>
|
||||
<rule pattern="'">
|
||||
<token type="LiteralString" />
|
||||
<push state="string" />
|
||||
</rule>
|
||||
<rule
|
||||
pattern="[0-9]*\.[0-9]+">
|
||||
<token type="LiteralNumberFloat" />
|
||||
</rule>
|
||||
<rule pattern="0|[1-9][0-9]*">
|
||||
<token type="LiteralNumberInteger" />
|
||||
</rule>
|
||||
<rule pattern="[*<>+=/-]">
|
||||
<token type="Operator" />
|
||||
</rule>
|
||||
<rule pattern="([{}();:.,!|]|->)">
|
||||
<token type="Punctuation" />
|
||||
</rule>
|
||||
<rule pattern="\n">
|
||||
<token type="TextWhitespace" />
|
||||
</rule>
|
||||
<rule pattern="\w+">
|
||||
<token type="NameNamespace" />
|
||||
</rule>
|
||||
</state>
|
||||
<state name="string">
|
||||
<rule pattern="[^\\']+">
|
||||
<token type="LiteralString" />
|
||||
</rule>
|
||||
<rule pattern="\\\\">
|
||||
<token type="LiteralString" />
|
||||
</rule>
|
||||
<rule pattern="\\'">
|
||||
<token type="LiteralString" />
|
||||
</rule>
|
||||
<rule pattern="\\">
|
||||
<token type="LiteralString" />
|
||||
</rule>
|
||||
<rule pattern="'">
|
||||
<token type="LiteralString" />
|
||||
<pop depth="1" />
|
||||
</rule>
|
||||
</state>
|
||||
<state name="quotedname">
|
||||
<rule pattern="[^\\"]+">
|
||||
<token type="NameNamespace" />
|
||||
</rule>
|
||||
<rule pattern="\\\\">
|
||||
<token type="NameNamespace" />
|
||||
</rule>
|
||||
<rule pattern="\\"">
|
||||
<token type="NameNamespace" />
|
||||
</rule>
|
||||
<rule pattern="\\">
|
||||
<token type="NameNamespace" />
|
||||
</rule>
|
||||
<rule pattern=""">
|
||||
<token type="NameNamespace" />
|
||||
<pop depth="1" />
|
||||
</rule>
|
||||
</state>
|
||||
<state name="quotedenumliteral">
|
||||
<rule pattern="[^\\"]+">
|
||||
<token type="KeywordConstant" />
|
||||
</rule>
|
||||
<rule pattern="\\\\">
|
||||
<token type="KeywordConstant" />
|
||||
</rule>
|
||||
<rule pattern="\\"">
|
||||
<token type="KeywordConstant" />
|
||||
</rule>
|
||||
<rule pattern="\\">
|
||||
<token type="KeywordConstant" />
|
||||
</rule>
|
||||
<rule pattern=""">
|
||||
<token type="KeywordConstant" />
|
||||
<pop depth="1" />
|
||||
</rule>
|
||||
</state>
|
||||
</rules>
|
||||
</lexer>
|
@ -75,4 +75,3 @@
|
||||
</state>
|
||||
</rules>
|
||||
</lexer>
|
||||
|
||||
|
@ -67,4 +67,3 @@
|
||||
</state>
|
||||
</rules>
|
||||
</lexer>
|
||||
|
||||
|
21
lexers/bbcode.xml
Normal file
21
lexers/bbcode.xml
Normal file
@ -0,0 +1,21 @@
|
||||
|
||||
<lexer>
|
||||
<config>
|
||||
<name>BBCode</name>
|
||||
<alias>bbcode</alias>
|
||||
<mime_type>text/x-bbcode</mime_type>
|
||||
</config>
|
||||
<rules>
|
||||
<state name="root">
|
||||
<rule pattern="[^[]+"><token type="Text"/></rule>
|
||||
<rule pattern="\[/?\w+"><token type="Keyword"/><push state="tag"/></rule>
|
||||
<rule pattern="\["><token type="Text"/></rule>
|
||||
</state>
|
||||
<state name="tag">
|
||||
<rule pattern="\s+"><token type="Text"/></rule>
|
||||
<rule pattern="(\w+)(=)("?[^\s"\]]+"?)"><bygroups><token type="NameAttribute"/><token type="Operator"/><token type="LiteralString"/></bygroups></rule>
|
||||
<rule pattern="(=)("?[^\s"\]]+"?)"><bygroups><token type="Operator"/><token type="LiteralString"/></bygroups></rule>
|
||||
<rule pattern="\]"><token type="Keyword"/><pop depth="1"/></rule>
|
||||
</state>
|
||||
</rules>
|
||||
</lexer>
|
120
lexers/beef.xml
Normal file
120
lexers/beef.xml
Normal file
@ -0,0 +1,120 @@
|
||||
<lexer>
|
||||
<config>
|
||||
<name>Beef</name>
|
||||
<alias>beef</alias>
|
||||
<filename>*.bf</filename>
|
||||
<mime_type>text/x-beef</mime_type>
|
||||
<dot_all>true</dot_all>
|
||||
<ensure_nl>true</ensure_nl>
|
||||
</config>
|
||||
<rules>
|
||||
<state name="root">
|
||||
<rule pattern="^\s*\[.*?\]">
|
||||
<token type="NameAttribute"/>
|
||||
</rule>
|
||||
<rule pattern="[^\S\n]+">
|
||||
<token type="Text"/>
|
||||
</rule>
|
||||
<rule pattern="\\\n">
|
||||
<token type="Text"/>
|
||||
</rule>
|
||||
<rule pattern="///[^\n\r]*">
|
||||
<token type="CommentSpecial"/>
|
||||
</rule>
|
||||
<rule pattern="//[^\n\r]*">
|
||||
<token type="CommentSingle"/>
|
||||
</rule>
|
||||
<rule pattern="/[*].*?[*]/">
|
||||
<token type="CommentMultiline"/>
|
||||
</rule>
|
||||
<rule pattern="\n">
|
||||
<token type="Text"/>
|
||||
</rule>
|
||||
<rule pattern="[~!%^&*()+=|\[\]:;,.<>/?-]">
|
||||
<token type="Punctuation"/>
|
||||
</rule>
|
||||
<rule pattern="[{}]">
|
||||
<token type="Punctuation"/>
|
||||
</rule>
|
||||
<rule pattern="@"(""|[^"])*"">
|
||||
<token type="LiteralString"/>
|
||||
</rule>
|
||||
<rule pattern="\$@?"(""|[^"])*"">
|
||||
<token type="LiteralString"/>
|
||||
</rule>
|
||||
<rule pattern=""(\\\\|\\"|[^"\n])*["\n]">
|
||||
<token type="LiteralString"/>
|
||||
</rule>
|
||||
<rule pattern="'\\.'|'[^\\]'">
|
||||
<token type="LiteralStringChar"/>
|
||||
</rule>
|
||||
<rule pattern="0[xX][0-9a-fA-F]+[Ll]?|\d[_\d]*(\.\d*)?([eE][+-]?\d+)?[flFLdD]?">
|
||||
<token type="LiteralNumber"/>
|
||||
</rule>
|
||||
<rule pattern="#[ \t]*(if|endif|else|elif|define|undef|line|error|warning|region|endregion|pragma|nullable)\b">
|
||||
<token type="CommentPreproc"/>
|
||||
</rule>
|
||||
<rule pattern="\b(extern)(\s+)(alias)\b">
|
||||
<bygroups>
|
||||
<token type="Keyword"/>
|
||||
<token type="Text"/>
|
||||
<token type="Keyword"/>
|
||||
</bygroups>
|
||||
</rule>
|
||||
<rule pattern="(as|await|base|break|by|case|catch|checked|continue|default|delegate|else|event|finally|fixed|for|repeat|goto|if|in|init|is|let|lock|new|scope|on|out|params|readonly|ref|return|sizeof|stackalloc|switch|this|throw|try|typeof|unchecked|virtual|void|while|get|set|new|yield|add|remove|value|alias|ascending|descending|from|group|into|orderby|select|thenby|where|join|equals)\b">
|
||||
<token type="Keyword"/>
|
||||
</rule>
|
||||
<rule pattern="(global)(::)">
|
||||
<bygroups>
|
||||
<token type="Keyword"/>
|
||||
<token type="Punctuation"/>
|
||||
</bygroups>
|
||||
</rule>
|
||||
<rule pattern="(abstract|async|const|enum|explicit|extern|implicit|internal|operator|override|partial|extension|private|protected|public|static|sealed|unsafe|volatile)\b">
|
||||
<token type="KeywordDeclaration"/>
|
||||
</rule>
|
||||
<rule pattern="(bool|byte|char8|char16|char32|decimal|double|float|int|int8|int16|int32|int64|long|object|sbyte|short|string|uint|uint8|uint16|uint32|uint64|uint|let|var)\b\??">
|
||||
<token type="KeywordType"/>
|
||||
</rule>
|
||||
<rule pattern="(true|false|null)\b">
|
||||
<token type="KeywordConstant"/>
|
||||
</rule>
|
||||
<rule pattern="(class|struct|record|interface)(\s+)">
|
||||
<bygroups>
|
||||
<token type="Keyword"/>
|
||||
<token type="Text"/>
|
||||
</bygroups>
|
||||
<push state="class"/>
|
||||
</rule>
|
||||
<rule pattern="(namespace|using)(\s+)">
|
||||
<bygroups>
|
||||
<token type="Keyword"/>
|
||||
<token type="Text"/>
|
||||
</bygroups>
|
||||
<push state="namespace"/>
|
||||
</rule>
|
||||
<rule pattern="@?[_a-zA-Z]\w*">
|
||||
<token type="Name"/>
|
||||
</rule>
|
||||
</state>
|
||||
<state name="class">
|
||||
<rule pattern="@?[_a-zA-Z]\w*">
|
||||
<token type="NameClass"/>
|
||||
<pop depth="1"/>
|
||||
</rule>
|
||||
<rule>
|
||||
<pop depth="1"/>
|
||||
</rule>
|
||||
</state>
|
||||
<state name="namespace">
|
||||
<rule pattern="(?=\()">
|
||||
<token type="Text"/>
|
||||
<pop depth="1"/>
|
||||
</rule>
|
||||
<rule pattern="(@?[_a-zA-Z]\w*|\.)+">
|
||||
<token type="NameNamespace"/>
|
||||
<pop depth="1"/>
|
||||
</rule>
|
||||
</state>
|
||||
</rules>
|
||||
</lexer>
|
@ -1,762 +0,0 @@
|
||||
<lexer>
|
||||
<config>
|
||||
<name>Crystal</name>
|
||||
<alias>cr</alias>
|
||||
<alias>crystal</alias>
|
||||
<filename>*.cr</filename>
|
||||
<mime_type>text/x-crystal</mime_type>
|
||||
<dot_all>true</dot_all>
|
||||
</config>
|
||||
<rules>
|
||||
<state name="pa-intp-string">
|
||||
<rule pattern="\\[\(]">
|
||||
<token type="LiteralStringOther"/>
|
||||
</rule>
|
||||
<rule pattern="\(">
|
||||
<token type="LiteralStringOther"/>
|
||||
<push/>
|
||||
</rule>
|
||||
<rule pattern="\)">
|
||||
<token type="LiteralStringOther"/>
|
||||
<pop depth="1"/>
|
||||
</rule>
|
||||
<rule>
|
||||
<include state="string-intp-escaped"/>
|
||||
</rule>
|
||||
<rule pattern="[\\#()]">
|
||||
<token type="LiteralStringOther"/>
|
||||
</rule>
|
||||
<rule pattern="[^\\#()]+">
|
||||
<token type="LiteralStringOther"/>
|
||||
</rule>
|
||||
</state>
|
||||
<state name="ab-regex">
|
||||
<rule pattern="\\[\\<>]">
|
||||
<token type="LiteralStringRegex"/>
|
||||
</rule>
|
||||
<rule pattern="<">
|
||||
<token type="LiteralStringRegex"/>
|
||||
<push/>
|
||||
</rule>
|
||||
<rule pattern=">[imsx]*">
|
||||
<token type="LiteralStringRegex"/>
|
||||
<pop depth="1"/>
|
||||
</rule>
|
||||
<rule>
|
||||
<include state="string-intp"/>
|
||||
</rule>
|
||||
<rule pattern="[\\#<>]">
|
||||
<token type="LiteralStringRegex"/>
|
||||
</rule>
|
||||
<rule pattern="[^\\#<>]+">
|
||||
<token type="LiteralStringRegex"/>
|
||||
</rule>
|
||||
</state>
|
||||
<state name="cb-regex">
|
||||
<rule pattern="\\[\\{}]">
|
||||
<token type="LiteralStringRegex"/>
|
||||
</rule>
|
||||
<rule pattern="\{">
|
||||
<token type="LiteralStringRegex"/>
|
||||
<push/>
|
||||
</rule>
|
||||
<rule pattern="\}[imsx]*">
|
||||
<token type="LiteralStringRegex"/>
|
||||
<pop depth="1"/>
|
||||
</rule>
|
||||
<rule>
|
||||
<include state="string-intp"/>
|
||||
</rule>
|
||||
<rule pattern="[\\#{}]">
|
||||
<token type="LiteralStringRegex"/>
|
||||
</rule>
|
||||
<rule pattern="[^\\#{}]+">
|
||||
<token type="LiteralStringRegex"/>
|
||||
</rule>
|
||||
</state>
|
||||
<state name="simple-backtick">
|
||||
<rule>
|
||||
<include state="string-intp-escaped"/>
|
||||
</rule>
|
||||
<rule pattern="[^\\`#]+">
|
||||
<token type="LiteralStringBacktick"/>
|
||||
</rule>
|
||||
<rule pattern="[\\#]">
|
||||
<token type="LiteralStringBacktick"/>
|
||||
</rule>
|
||||
<rule pattern="`">
|
||||
<token type="LiteralStringBacktick"/>
|
||||
<pop depth="1"/>
|
||||
</rule>
|
||||
</state>
|
||||
<state name="string-intp">
|
||||
<rule pattern="#\{">
|
||||
<token type="LiteralStringInterpol"/>
|
||||
<push state="in-intp"/>
|
||||
</rule>
|
||||
</state>
|
||||
<state name="interpolated-regex">
|
||||
<rule>
|
||||
<include state="string-intp"/>
|
||||
</rule>
|
||||
<rule pattern="[\\#]">
|
||||
<token type="LiteralStringRegex"/>
|
||||
</rule>
|
||||
<rule pattern="[^\\#]+">
|
||||
<token type="LiteralStringRegex"/>
|
||||
</rule>
|
||||
</state>
|
||||
<state name="cb-string">
|
||||
<rule pattern="\\[\\{}]">
|
||||
<token type="LiteralStringOther"/>
|
||||
</rule>
|
||||
<rule pattern="\{">
|
||||
<token type="LiteralStringOther"/>
|
||||
<push/>
|
||||
</rule>
|
||||
<rule pattern="\}">
|
||||
<token type="LiteralStringOther"/>
|
||||
<pop depth="1"/>
|
||||
</rule>
|
||||
<rule pattern="[\\#{}]">
|
||||
<token type="LiteralStringOther"/>
|
||||
</rule>
|
||||
<rule pattern="[^\\#{}]+">
|
||||
<token type="LiteralStringOther"/>
|
||||
</rule>
|
||||
</state>
|
||||
<state name="in-macro-control">
|
||||
<rule pattern="\{%">
|
||||
<token type="LiteralStringInterpol"/>
|
||||
<push/>
|
||||
</rule>
|
||||
<rule pattern="%\}">
|
||||
<token type="LiteralStringInterpol"/>
|
||||
<pop depth="1"/>
|
||||
</rule>
|
||||
<rule pattern="for\b|in\b">
|
||||
<token type="Keyword"/>
|
||||
</rule>
|
||||
<rule>
|
||||
<include state="root"/>
|
||||
</rule>
|
||||
</state>
|
||||
<state name="interpolated-string">
|
||||
<rule>
|
||||
<include state="string-intp"/>
|
||||
</rule>
|
||||
<rule pattern="[\\#]">
|
||||
<token type="LiteralStringOther"/>
|
||||
</rule>
|
||||
<rule pattern="[^\\#]+">
|
||||
<token type="LiteralStringOther"/>
|
||||
</rule>
|
||||
</state>
|
||||
<state name="in-macro-expr">
|
||||
<rule pattern="\{\{">
|
||||
<token type="LiteralStringInterpol"/>
|
||||
<push/>
|
||||
</rule>
|
||||
<rule pattern="\}\}">
|
||||
<token type="LiteralStringInterpol"/>
|
||||
<pop depth="1"/>
|
||||
</rule>
|
||||
<rule>
|
||||
<include state="root"/>
|
||||
</rule>
|
||||
</state>
|
||||
<state name="simple-string">
|
||||
<rule>
|
||||
<include state="string-intp-escaped"/>
|
||||
</rule>
|
||||
<rule pattern="[^\\"#]+">
|
||||
<token type="LiteralStringDouble"/>
|
||||
</rule>
|
||||
<rule pattern="[\\#]">
|
||||
<token type="LiteralStringDouble"/>
|
||||
</rule>
|
||||
<rule pattern=""">
|
||||
<token type="LiteralStringDouble"/>
|
||||
<pop depth="1"/>
|
||||
</rule>
|
||||
</state>
|
||||
<state name="cb-intp-string">
|
||||
<rule pattern="\\[\{]">
|
||||
<token type="LiteralStringOther"/>
|
||||
</rule>
|
||||
<rule pattern="\{">
|
||||
<token type="LiteralStringOther"/>
|
||||
<push/>
|
||||
</rule>
|
||||
<rule pattern="\}">
|
||||
<token type="LiteralStringOther"/>
|
||||
<pop depth="1"/>
|
||||
</rule>
|
||||
<rule>
|
||||
<include state="string-intp-escaped"/>
|
||||
</rule>
|
||||
<rule pattern="[\\#{}]">
|
||||
<token type="LiteralStringOther"/>
|
||||
</rule>
|
||||
<rule pattern="[^\\#{}]+">
|
||||
<token type="LiteralStringOther"/>
|
||||
</rule>
|
||||
</state>
|
||||
<state name="string-intp-escaped">
|
||||
<rule>
|
||||
<include state="string-intp"/>
|
||||
</rule>
|
||||
<rule>
|
||||
<include state="string-escaped"/>
|
||||
</rule>
|
||||
</state>
|
||||
<state name="sb-regex">
|
||||
<rule pattern="\\[\\\[\]]">
|
||||
<token type="LiteralStringRegex"/>
|
||||
</rule>
|
||||
<rule pattern="\[">
|
||||
<token type="LiteralStringRegex"/>
|
||||
<push/>
|
||||
</rule>
|
||||
<rule pattern="\][imsx]*">
|
||||
<token type="LiteralStringRegex"/>
|
||||
<pop depth="1"/>
|
||||
</rule>
|
||||
<rule>
|
||||
<include state="string-intp"/>
|
||||
</rule>
|
||||
<rule pattern="[\\#\[\]]">
|
||||
<token type="LiteralStringRegex"/>
|
||||
</rule>
|
||||
<rule pattern="[^\\#\[\]]+">
|
||||
<token type="LiteralStringRegex"/>
|
||||
</rule>
|
||||
</state>
|
||||
<state name="classname">
|
||||
<rule pattern="[A-Z_]\w*">
|
||||
<token type="NameClass"/>
|
||||
</rule>
|
||||
<rule pattern="(\()(\s*)([A-Z_]\w*)(\s*)(\))">
|
||||
<bygroups>
|
||||
<token type="Punctuation"/>
|
||||
<token type="Text"/>
|
||||
<token type="NameClass"/>
|
||||
<token type="Text"/>
|
||||
<token type="Punctuation"/>
|
||||
</bygroups>
|
||||
</rule>
|
||||
<rule>
|
||||
<pop depth="1"/>
|
||||
</rule>
|
||||
</state>
|
||||
<state name="string-escaped">
|
||||
<rule pattern="\\([\\befnstv#"\']|x[a-fA-F0-9]{1,2}|[0-7]{1,3})">
|
||||
<token type="LiteralStringEscape"/>
|
||||
</rule>
|
||||
</state>
|
||||
<state name="sb-intp-string">
|
||||
<rule pattern="\\[\[]">
|
||||
<token type="LiteralStringOther"/>
|
||||
</rule>
|
||||
<rule pattern="\[">
|
||||
<token type="LiteralStringOther"/>
|
||||
<push/>
|
||||
</rule>
|
||||
<rule pattern="\]">
|
||||
<token type="LiteralStringOther"/>
|
||||
<pop depth="1"/>
|
||||
</rule>
|
||||
<rule>
|
||||
<include state="string-intp-escaped"/>
|
||||
</rule>
|
||||
<rule pattern="[\\#\[\]]">
|
||||
<token type="LiteralStringOther"/>
|
||||
</rule>
|
||||
<rule pattern="[^\\#\[\]]+">
|
||||
<token type="LiteralStringOther"/>
|
||||
</rule>
|
||||
</state>
|
||||
<state name="pa-regex">
|
||||
<rule pattern="\\[\\()]">
|
||||
<token type="LiteralStringRegex"/>
|
||||
</rule>
|
||||
<rule pattern="\(">
|
||||
<token type="LiteralStringRegex"/>
|
||||
<push/>
|
||||
</rule>
|
||||
<rule pattern="\)[imsx]*">
|
||||
<token type="LiteralStringRegex"/>
|
||||
<pop depth="1"/>
|
||||
</rule>
|
||||
<rule>
|
||||
<include state="string-intp"/>
|
||||
</rule>
|
||||
<rule pattern="[\\#()]">
|
||||
<token type="LiteralStringRegex"/>
|
||||
</rule>
|
||||
<rule pattern="[^\\#()]+">
|
||||
<token type="LiteralStringRegex"/>
|
||||
</rule>
|
||||
</state>
|
||||
<state name="in-attr">
|
||||
<rule pattern="\[">
|
||||
<token type="Operator"/>
|
||||
<push/>
|
||||
</rule>
|
||||
<rule pattern="\]">
|
||||
<token type="Operator"/>
|
||||
<pop depth="1"/>
|
||||
</rule>
|
||||
<rule>
|
||||
<include state="root"/>
|
||||
</rule>
|
||||
</state>
|
||||
<state name="ab-intp-string">
|
||||
<rule pattern="\\[<]">
|
||||
<token type="LiteralStringOther"/>
|
||||
</rule>
|
||||
<rule pattern="<">
|
||||
<token type="LiteralStringOther"/>
|
||||
<push/>
|
||||
</rule>
|
||||
<rule pattern=">">
|
||||
<token type="LiteralStringOther"/>
|
||||
<pop depth="1"/>
|
||||
</rule>
|
||||
<rule>
|
||||
<include state="string-intp-escaped"/>
|
||||
</rule>
|
||||
<rule pattern="[\\#<>]">
|
||||
<token type="LiteralStringOther"/>
|
||||
</rule>
|
||||
<rule pattern="[^\\#<>]+">
|
||||
<token type="LiteralStringOther"/>
|
||||
</rule>
|
||||
</state>
|
||||
<state name="in-intp">
|
||||
<rule pattern="\{">
|
||||
<token type="LiteralStringInterpol"/>
|
||||
<push/>
|
||||
</rule>
|
||||
<rule pattern="\}">
|
||||
<token type="LiteralStringInterpol"/>
|
||||
<pop depth="1"/>
|
||||
</rule>
|
||||
<rule>
|
||||
<include state="root"/>
|
||||
</rule>
|
||||
</state>
|
||||
<state name="end-part">
|
||||
<rule pattern=".+">
|
||||
<token type="CommentPreproc"/>
|
||||
<pop depth="1"/>
|
||||
</rule>
|
||||
</state>
|
||||
<state name="root">
|
||||
<rule pattern="#.*?$">
|
||||
<token type="CommentSingle"/>
|
||||
</rule>
|
||||
<rule pattern="(instance_sizeof|pointerof|protected|abstract|require|private|include|unless|typeof|sizeof|return|extend|ensure|rescue|ifdef|super|break|begin|until|while|elsif|yield|next|when|else|then|case|with|end|asm|if|do|as|of)\b">
|
||||
<token type="Keyword"/>
|
||||
</rule>
|
||||
<rule pattern="(false|true|nil)\b">
|
||||
<token type="KeywordConstant"/>
|
||||
</rule>
|
||||
<rule pattern="(module|lib)(\s+)([a-zA-Z_]\w*(?:::[a-zA-Z_]\w*)*)">
|
||||
<bygroups>
|
||||
<token type="Keyword"/>
|
||||
<token type="Text"/>
|
||||
<token type="NameNamespace"/>
|
||||
</bygroups>
|
||||
</rule>
|
||||
<rule pattern="(def|fun|macro)(\s+)((?:[a-zA-Z_]\w*::)*)">
|
||||
<bygroups>
|
||||
<token type="Keyword"/>
|
||||
<token type="Text"/>
|
||||
<token type="NameNamespace"/>
|
||||
</bygroups>
|
||||
<push state="funcname"/>
|
||||
</rule>
|
||||
<rule pattern="def(?=[*%&^`~+-/\[<>=])">
|
||||
<token type="Keyword"/>
|
||||
<push state="funcname"/>
|
||||
</rule>
|
||||
<rule pattern="(class|struct|union|type|alias|enum)(\s+)((?:[a-zA-Z_]\w*::)*)">
|
||||
<bygroups>
|
||||
<token type="Keyword"/>
|
||||
<token type="Text"/>
|
||||
<token type="NameNamespace"/>
|
||||
</bygroups>
|
||||
<push state="classname"/>
|
||||
</rule>
|
||||
<rule pattern="(self|out|uninitialized)\b|(is_a|responds_to)\?">
|
||||
<token type="KeywordPseudo"/>
|
||||
</rule>
|
||||
<rule pattern="(def_equals_and_hash|assert_responds_to|forward_missing_to|def_equals|property|def_hash|parallel|delegate|debugger|getter|record|setter|spawn|pp)\b">
|
||||
<token type="NameBuiltinPseudo"/>
|
||||
</rule>
|
||||
<rule pattern="getter[!?]|property[!?]|__(DIR|FILE|LINE)__\b">
|
||||
<token type="NameBuiltinPseudo"/>
|
||||
</rule>
|
||||
<rule pattern="(?<!\.)(get_stack_top|StaticArray|Concurrent|with_color|Reference|Scheduler|read_line|Exception|at_exit|Pointer|Channel|Float64|sprintf|Float32|Process|Object|Struct|caller|UInt16|UInt32|UInt64|system|future|Number|printf|String|Symbol|Int32|Range|Slice|Regex|Mutex|sleep|Array|Class|raise|Tuple|Deque|delay|Float|Int16|print|abort|Value|UInt8|Int64|puts|Proc|File|Void|exit|fork|Bool|Char|gets|lazy|loop|main|rand|Enum|Int8|Time|Hash|Set|Box|Nil|Dir|Int|p)\b">
|
||||
<token type="NameBuiltin"/>
|
||||
</rule>
|
||||
<rule pattern="(?<!\w)(<<-?)(["`\']?)([a-zA-Z_]\w*)(\2)(.*?\n)">
|
||||
<token type="LiteralStringHeredoc"/>
|
||||
</rule>
|
||||
<rule pattern="(<<-?)("|\')()(\2)(.*?\n)">
|
||||
<token type="LiteralStringHeredoc"/>
|
||||
</rule>
|
||||
<rule pattern="__END__">
|
||||
<token type="CommentPreproc"/>
|
||||
<push state="end-part"/>
|
||||
</rule>
|
||||
<rule pattern="(?:^|(?<=[=<>~!:])|(?<=(?:\s|;)when\s)|(?<=(?:\s|;)or\s)|(?<=(?:\s|;)and\s)|(?<=\.index\s)|(?<=\.scan\s)|(?<=\.sub\s)|(?<=\.sub!\s)|(?<=\.gsub\s)|(?<=\.gsub!\s)|(?<=\.match\s)|(?<=(?:\s|;)if\s)|(?<=(?:\s|;)elsif\s)|(?<=^when\s)|(?<=^index\s)|(?<=^scan\s)|(?<=^sub\s)|(?<=^gsub\s)|(?<=^sub!\s)|(?<=^gsub!\s)|(?<=^match\s)|(?<=^if\s)|(?<=^elsif\s))(\s*)(/)">
|
||||
<bygroups>
|
||||
<token type="Text"/>
|
||||
<token type="LiteralStringRegex"/>
|
||||
</bygroups>
|
||||
<push state="multiline-regex"/>
|
||||
</rule>
|
||||
<rule pattern="(?<=\(|,|\[)/">
|
||||
<token type="LiteralStringRegex"/>
|
||||
<push state="multiline-regex"/>
|
||||
</rule>
|
||||
<rule pattern="(\s+)(/)(?![\s=])">
|
||||
<bygroups>
|
||||
<token type="Text"/>
|
||||
<token type="LiteralStringRegex"/>
|
||||
</bygroups>
|
||||
<push state="multiline-regex"/>
|
||||
</rule>
|
||||
<rule pattern="(0o[0-7]+(?:_[0-7]+)*(?:_?[iu][0-9]+)?)\b(\s*)([/?])?">
|
||||
<bygroups>
|
||||
<token type="LiteralNumberOct"/>
|
||||
<token type="Text"/>
|
||||
<token type="Operator"/>
|
||||
</bygroups>
|
||||
</rule>
|
||||
<rule pattern="(0x[0-9A-Fa-f]+(?:_[0-9A-Fa-f]+)*(?:_?[iu][0-9]+)?)\b(\s*)([/?])?">
|
||||
<bygroups>
|
||||
<token type="LiteralNumberHex"/>
|
||||
<token type="Text"/>
|
||||
<token type="Operator"/>
|
||||
</bygroups>
|
||||
</rule>
|
||||
<rule pattern="(0b[01]+(?:_[01]+)*(?:_?[iu][0-9]+)?)\b(\s*)([/?])?">
|
||||
<bygroups>
|
||||
<token type="LiteralNumberBin"/>
|
||||
<token type="Text"/>
|
||||
<token type="Operator"/>
|
||||
</bygroups>
|
||||
</rule>
|
||||
<rule pattern="((?:0(?![0-9])|[1-9][\d_]*)(?:\.\d[\d_]*)(?:e[+-]?[0-9]+)?(?:_?f[0-9]+)?)(\s*)([/?])?">
|
||||
<bygroups>
|
||||
<token type="LiteralNumberFloat"/>
|
||||
<token type="Text"/>
|
||||
<token type="Operator"/>
|
||||
</bygroups>
|
||||
</rule>
|
||||
<rule pattern="((?:0(?![0-9])|[1-9][\d_]*)(?:\.\d[\d_]*)?(?:e[+-]?[0-9]+)(?:_?f[0-9]+)?)(\s*)([/?])?">
|
||||
<bygroups>
|
||||
<token type="LiteralNumberFloat"/>
|
||||
<token type="Text"/>
|
||||
<token type="Operator"/>
|
||||
</bygroups>
|
||||
</rule>
|
||||
<rule pattern="((?:0(?![0-9])|[1-9][\d_]*)(?:\.\d[\d_]*)?(?:e[+-]?[0-9]+)?(?:_?f[0-9]+))(\s*)([/?])?">
|
||||
<bygroups>
|
||||
<token type="LiteralNumberFloat"/>
|
||||
<token type="Text"/>
|
||||
<token type="Operator"/>
|
||||
</bygroups>
|
||||
</rule>
|
||||
<rule pattern="(0\b|[1-9][\d]*(?:_\d+)*(?:_?[iu][0-9]+)?)\b(\s*)([/?])?">
|
||||
<bygroups>
|
||||
<token type="LiteralNumberInteger"/>
|
||||
<token type="Text"/>
|
||||
<token type="Operator"/>
|
||||
</bygroups>
|
||||
</rule>
|
||||
<rule pattern="@@[a-zA-Z_]\w*">
|
||||
<token type="NameVariableClass"/>
|
||||
</rule>
|
||||
<rule pattern="@[a-zA-Z_]\w*">
|
||||
<token type="NameVariableInstance"/>
|
||||
</rule>
|
||||
<rule pattern="\$\w+">
|
||||
<token type="NameVariableGlobal"/>
|
||||
</rule>
|
||||
<rule pattern="\$[!@&`\'+~=/\\,;.<>_*$?:"^-]">
|
||||
<token type="NameVariableGlobal"/>
|
||||
</rule>
|
||||
<rule pattern="\$-[0adFiIlpvw]">
|
||||
<token type="NameVariableGlobal"/>
|
||||
</rule>
|
||||
<rule pattern="::">
|
||||
<token type="Operator"/>
|
||||
</rule>
|
||||
<rule>
|
||||
<include state="strings"/>
|
||||
</rule>
|
||||
<rule pattern="\?(\\[MC]-)*(\\([\\befnrtv#"\']|x[a-fA-F0-9]{1,2}|[0-7]{1,3})|\S)(?!\w)">
|
||||
<token type="LiteralStringChar"/>
|
||||
</rule>
|
||||
<rule pattern="[A-Z][A-Z_]+\b">
|
||||
<token type="NameConstant"/>
|
||||
</rule>
|
||||
<rule pattern="\{%">
|
||||
<token type="LiteralStringInterpol"/>
|
||||
<push state="in-macro-control"/>
|
||||
</rule>
|
||||
<rule pattern="\{\{">
|
||||
<token type="LiteralStringInterpol"/>
|
||||
<push state="in-macro-expr"/>
|
||||
</rule>
|
||||
<rule pattern="(@\[)(\s*)([A-Z]\w*)">
|
||||
<bygroups>
|
||||
<token type="Operator"/>
|
||||
<token type="Text"/>
|
||||
<token type="NameDecorator"/>
|
||||
</bygroups>
|
||||
<push state="in-attr"/>
|
||||
</rule>
|
||||
<rule pattern="(\.|::)(\[\]\?|<=>|===|\[\]=|>>|&&|\*\*|\[\]|\|\||>=|=~|!~|<<|<=|!=|==|<|/|=|-|\+|>|\*|&|%|\^|!|\||~)">
|
||||
<bygroups>
|
||||
<token type="Operator"/>
|
||||
<token type="NameOperator"/>
|
||||
</bygroups>
|
||||
</rule>
|
||||
<rule pattern="(\.|::)([a-zA-Z_]\w*[!?]?|[*%&^`~+\-/\[<>=])">
|
||||
<bygroups>
|
||||
<token type="Operator"/>
|
||||
<token type="Name"/>
|
||||
</bygroups>
|
||||
</rule>
|
||||
<rule pattern="[a-zA-Z_]\w*(?:[!?](?!=))?">
|
||||
<token type="Name"/>
|
||||
</rule>
|
||||
<rule pattern="(\[|\]\??|\*\*|<=>?|>=|<<?|>>?|=~|===|!~|&&?|\|\||\.{1,3})">
|
||||
<token type="Operator"/>
|
||||
</rule>
|
||||
<rule pattern="[-+/*%=<>&!^|~]=?">
|
||||
<token type="Operator"/>
|
||||
</rule>
|
||||
<rule pattern="[(){};,/?:\\]">
|
||||
<token type="Punctuation"/>
|
||||
</rule>
|
||||
<rule pattern="\s+">
|
||||
<token type="Text"/>
|
||||
</rule>
|
||||
</state>
|
||||
<state name="multiline-regex">
|
||||
<rule>
|
||||
<include state="string-intp"/>
|
||||
</rule>
|
||||
<rule pattern="\\\\">
|
||||
<token type="LiteralStringRegex"/>
|
||||
</rule>
|
||||
<rule pattern="\\/">
|
||||
<token type="LiteralStringRegex"/>
|
||||
</rule>
|
||||
<rule pattern="[\\#]">
|
||||
<token type="LiteralStringRegex"/>
|
||||
</rule>
|
||||
<rule pattern="[^\\/#]+">
|
||||
<token type="LiteralStringRegex"/>
|
||||
</rule>
|
||||
<rule pattern="/[imsx]*">
|
||||
<token type="LiteralStringRegex"/>
|
||||
<pop depth="1"/>
|
||||
</rule>
|
||||
</state>
|
||||
<state name="ab-string">
|
||||
<rule pattern="\\[\\<>]">
|
||||
<token type="LiteralStringOther"/>
|
||||
</rule>
|
||||
<rule pattern="<">
|
||||
<token type="LiteralStringOther"/>
|
||||
<push/>
|
||||
</rule>
|
||||
<rule pattern=">">
|
||||
<token type="LiteralStringOther"/>
|
||||
<pop depth="1"/>
|
||||
</rule>
|
||||
<rule pattern="[\\#<>]">
|
||||
<token type="LiteralStringOther"/>
|
||||
</rule>
|
||||
<rule pattern="[^\\#<>]+">
|
||||
<token type="LiteralStringOther"/>
|
||||
</rule>
|
||||
</state>
|
||||
<state name="pa-string">
|
||||
<rule pattern="\\[\\()]">
|
||||
<token type="LiteralStringOther"/>
|
||||
</rule>
|
||||
<rule pattern="\(">
|
||||
<token type="LiteralStringOther"/>
|
||||
<push/>
|
||||
</rule>
|
||||
<rule pattern="\)">
|
||||
<token type="LiteralStringOther"/>
|
||||
<pop depth="1"/>
|
||||
</rule>
|
||||
<rule pattern="[\\#()]">
|
||||
<token type="LiteralStringOther"/>
|
||||
</rule>
|
||||
<rule pattern="[^\\#()]+">
|
||||
<token type="LiteralStringOther"/>
|
||||
</rule>
|
||||
</state>
|
||||
<state name="strings">
|
||||
<rule pattern="\:@{0,2}[a-zA-Z_]\w*[!?]?">
|
||||
<token type="LiteralStringSymbol"/>
|
||||
</rule>
|
||||
<rule pattern="\:@{0,2}(\[\]\?|<=>|===|\[\]=|>>|&&|\*\*|\[\]|\|\||>=|=~|!~|<<|<=|!=|==|<|/|=|-|\+|>|\*|&|%|\^|!|\||~)">
|
||||
<token type="LiteralStringSymbol"/>
|
||||
</rule>
|
||||
<rule pattern=":'(\\\\|\\'|[^'])*'">
|
||||
<token type="LiteralStringSymbol"/>
|
||||
</rule>
|
||||
<rule pattern="'(\\\\|\\'|[^']|\\[^'\\]+)'">
|
||||
<token type="LiteralStringChar"/>
|
||||
</rule>
|
||||
<rule pattern=":"">
|
||||
<token type="LiteralStringSymbol"/>
|
||||
<push state="simple-sym"/>
|
||||
</rule>
|
||||
<rule pattern="([a-zA-Z_]\w*)(:)(?!:)">
|
||||
<bygroups>
|
||||
<token type="LiteralStringSymbol"/>
|
||||
<token type="Punctuation"/>
|
||||
</bygroups>
|
||||
</rule>
|
||||
<rule pattern=""">
|
||||
<token type="LiteralStringDouble"/>
|
||||
<push state="simple-string"/>
|
||||
</rule>
|
||||
<rule pattern="(?<!\.)`">
|
||||
<token type="LiteralStringBacktick"/>
|
||||
<push state="simple-backtick"/>
|
||||
</rule>
|
||||
<rule pattern="%\{">
|
||||
<token type="LiteralStringOther"/>
|
||||
<push state="cb-intp-string"/>
|
||||
</rule>
|
||||
<rule pattern="%[wi]\{">
|
||||
<token type="LiteralStringOther"/>
|
||||
<push state="cb-string"/>
|
||||
</rule>
|
||||
<rule pattern="%r\{">
|
||||
<token type="LiteralStringRegex"/>
|
||||
<push state="cb-regex"/>
|
||||
</rule>
|
||||
<rule pattern="%\[">
|
||||
<token type="LiteralStringOther"/>
|
||||
<push state="sb-intp-string"/>
|
||||
</rule>
|
||||
<rule pattern="%[wi]\[">
|
||||
<token type="LiteralStringOther"/>
|
||||
<push state="sb-string"/>
|
||||
</rule>
|
||||
<rule pattern="%r\[">
|
||||
<token type="LiteralStringRegex"/>
|
||||
<push state="sb-regex"/>
|
||||
</rule>
|
||||
<rule pattern="%\(">
|
||||
<token type="LiteralStringOther"/>
|
||||
<push state="pa-intp-string"/>
|
||||
</rule>
|
||||
<rule pattern="%[wi]\(">
|
||||
<token type="LiteralStringOther"/>
|
||||
<push state="pa-string"/>
|
||||
</rule>
|
||||
<rule pattern="%r\(">
|
||||
<token type="LiteralStringRegex"/>
|
||||
<push state="pa-regex"/>
|
||||
</rule>
|
||||
<rule pattern="%<">
|
||||
<token type="LiteralStringOther"/>
|
||||
<push state="ab-intp-string"/>
|
||||
</rule>
|
||||
<rule pattern="%[wi]<">
|
||||
<token type="LiteralStringOther"/>
|
||||
<push state="ab-string"/>
|
||||
</rule>
|
||||
<rule pattern="%r<">
|
||||
<token type="LiteralStringRegex"/>
|
||||
<push state="ab-regex"/>
|
||||
</rule>
|
||||
<rule pattern="(%r([\W_]))((?:\\\2|(?!\2).)*)(\2[imsx]*)">
|
||||
<token type="LiteralString"/>
|
||||
</rule>
|
||||
<rule pattern="(%[wi]([\W_]))((?:\\\2|(?!\2).)*)(\2)">
|
||||
<token type="LiteralString"/>
|
||||
</rule>
|
||||
<rule pattern="(?<=[-+/*%=<>&!^|~,(])(\s*)(%([\t ])(?:(?:\\\3|(?!\3).)*)\3)">
|
||||
<bygroups>
|
||||
<token type="Text"/>
|
||||
<token type="LiteralStringOther"/>
|
||||
<token type="None"/>
|
||||
</bygroups>
|
||||
</rule>
|
||||
<rule pattern="^(\s*)(%([\t ])(?:(?:\\\3|(?!\3).)*)\3)">
|
||||
<bygroups>
|
||||
<token type="Text"/>
|
||||
<token type="LiteralStringOther"/>
|
||||
<token type="None"/>
|
||||
</bygroups>
|
||||
</rule>
|
||||
<rule pattern="(%([\[{(<]))((?:\\\2|(?!\2).)*)(\2)">
|
||||
<token type="LiteralString"/>
|
||||
</rule>
|
||||
</state>
|
||||
<state name="sb-string">
|
||||
<rule pattern="\\[\\\[\]]">
|
||||
<token type="LiteralStringOther"/>
|
||||
</rule>
|
||||
<rule pattern="\[">
|
||||
<token type="LiteralStringOther"/>
|
||||
<push/>
|
||||
</rule>
|
||||
<rule pattern="\]">
|
||||
<token type="LiteralStringOther"/>
|
||||
<pop depth="1"/>
|
||||
</rule>
|
||||
<rule pattern="[\\#\[\]]">
|
||||
<token type="LiteralStringOther"/>
|
||||
</rule>
|
||||
<rule pattern="[^\\#\[\]]+">
|
||||
<token type="LiteralStringOther"/>
|
||||
</rule>
|
||||
</state>
|
||||
<state name="funcname">
|
||||
<rule pattern="(?:([a-zA-Z_]\w*)(\.))?([a-zA-Z_]\w*[!?]?|\*\*?|[-+]@?|[/%&|^`~]|\[\]=?|<<|>>|<=?>|>=?|===?)">
|
||||
<bygroups>
|
||||
<token type="NameClass"/>
|
||||
<token type="Operator"/>
|
||||
<token type="NameFunction"/>
|
||||
</bygroups>
|
||||
<pop depth="1"/>
|
||||
</rule>
|
||||
<rule>
|
||||
<pop depth="1"/>
|
||||
</rule>
|
||||
</state>
|
||||
<state name="simple-sym">
|
||||
<rule>
|
||||
<include state="string-escaped"/>
|
||||
</rule>
|
||||
<rule pattern="[^\\"#]+">
|
||||
<token type="LiteralStringSymbol"/>
|
||||
</rule>
|
||||
<rule pattern="[\\#]">
|
||||
<token type="LiteralStringSymbol"/>
|
||||
</rule>
|
||||
<rule pattern=""">
|
||||
<token type="LiteralStringSymbol"/>
|
||||
<pop depth="1"/>
|
||||
</rule>
|
||||
</state>
|
||||
</rules>
|
||||
</lexer>
|
53
lexers/csv.xml
Normal file
53
lexers/csv.xml
Normal file
@ -0,0 +1,53 @@
|
||||
<!--
|
||||
Lexer for RFC-4180 compliant CSV subject to the following additions:
|
||||
- UTF-8 encoding is accepted (the RFC requires 7-bit ASCII)
|
||||
- The line terminator character can be LF or CRLF (the RFC allows CRLF only)
|
||||
|
||||
Link to the RFC-4180 specification: https://tools.ietf.org/html/rfc4180
|
||||
|
||||
Additions inspired by:
|
||||
https://github.com/frictionlessdata/datapackage/issues/204#issuecomment-193242077
|
||||
|
||||
Future improvements:
|
||||
- Identify non-quoted numbers as LiteralNumber
|
||||
- Identify y as an error in "x"y. Currently it's identified as another string
|
||||
literal.
|
||||
-->
|
||||
|
||||
<lexer>
|
||||
<config>
|
||||
<name>CSV</name>
|
||||
<alias>csv</alias>
|
||||
<filename>*.csv</filename>
|
||||
<mime_type>text/csv</mime_type>
|
||||
</config>
|
||||
<rules>
|
||||
<state name="root">
|
||||
<rule pattern="\r?\n">
|
||||
<token type="Punctuation" />
|
||||
</rule>
|
||||
<rule pattern=",">
|
||||
<token type="Punctuation" />
|
||||
</rule>
|
||||
<rule pattern=""">
|
||||
<token type="LiteralStringDouble" />
|
||||
<push state="escaped" />
|
||||
</rule>
|
||||
<rule pattern="[^\r\n,]+">
|
||||
<token type="LiteralString" />
|
||||
</rule>
|
||||
</state>
|
||||
<state name="escaped">
|
||||
<rule pattern="""">
|
||||
<token type="LiteralStringEscape"/>
|
||||
</rule>
|
||||
<rule pattern=""">
|
||||
<token type="LiteralStringDouble" />
|
||||
<pop depth="1"/>
|
||||
</rule>
|
||||
<rule pattern="[^"]+">
|
||||
<token type="LiteralStringDouble" />
|
||||
</rule>
|
||||
</state>
|
||||
</rules>
|
||||
</lexer>
|
@ -95,19 +95,22 @@
|
||||
<rule pattern="[:!#$%&*+.\\/<=>?@^|~-]+">
|
||||
<token type="Operator"/>
|
||||
</rule>
|
||||
<rule pattern="\d+[eE][+-]?\d+">
|
||||
<rule pattern="\d+_*[eE][+-]?\d+">
|
||||
<token type="LiteralNumberFloat"/>
|
||||
</rule>
|
||||
<rule pattern="\d+\.\d+([eE][+-]?\d+)?">
|
||||
<rule pattern="\d+(_+[\d]+)*\.\d+(_+[\d]+)*([eE][+-]?\d+)?">
|
||||
<token type="LiteralNumberFloat"/>
|
||||
</rule>
|
||||
<rule pattern="0[oO][0-7]+">
|
||||
<rule pattern="0[oO](_*[0-7])+">
|
||||
<token type="LiteralNumberOct"/>
|
||||
</rule>
|
||||
<rule pattern="0[xX][\da-fA-F]+">
|
||||
<rule pattern="0[xX](_*[\da-fA-F])+">
|
||||
<token type="LiteralNumberHex"/>
|
||||
</rule>
|
||||
<rule pattern="\d+">
|
||||
<rule pattern="0[bB](_*[01])+">
|
||||
<token type="LiteralNumberBin"/>
|
||||
</rule>
|
||||
<rule pattern="\d+(_*[\d])*">
|
||||
<token type="LiteralNumberInteger"/>
|
||||
</rule>
|
||||
<rule pattern="'">
|
||||
|
913
lexers/heuristics.yml
Normal file
913
lexers/heuristics.yml
Normal file
@ -0,0 +1,913 @@
|
||||
# A collection of simple regexp-based rules that can be applied to content
|
||||
# to disambiguate languages with the same file extension.
|
||||
#
|
||||
# There are two top-level keys: disambiguations and named_patterns.
|
||||
#
|
||||
# disambiguations - a list of disambiguation rules, one for each
|
||||
# extension or group of extensions.
|
||||
# extensions - an array of file extensions that this block applies to.
|
||||
# rules - list of rules that are applied in order to the content
|
||||
# of a file with a matching extension. Rules are evaluated
|
||||
# until one of them matches. If none matches, no language
|
||||
# is returned.
|
||||
# language - Language to be returned if the rule matches.
|
||||
# pattern - Ruby-compatible regular expression that makes the rule
|
||||
# match. If no pattern is specified, the rule always matches.
|
||||
# Pattern can be a string with a single regular expression
|
||||
# or an array of strings that will be merged in a single
|
||||
# regular expression (with union).
|
||||
# and - An and block merges multiple rules and checks that all of
|
||||
# them must match.
|
||||
# negative_pattern - Same as pattern, but checks for absence of matches.
|
||||
# named_pattern - A pattern can be reused by specifying it in the
|
||||
# named_patterns section and referencing it here by its
|
||||
# key.
|
||||
# named_patterns - Key-value map of reusable named patterns.
|
||||
#
|
||||
# Please keep this list alphabetized.
|
||||
#
|
||||
---
|
||||
disambiguations:
|
||||
- extensions: ['.1', '.2', '.3', '.4', '.5', '.6', '.7', '.8', '.9']
|
||||
rules:
|
||||
- language: man
|
||||
and:
|
||||
- named_pattern: mdoc-date
|
||||
- named_pattern: mdoc-title
|
||||
- named_pattern: mdoc-heading
|
||||
- language: man
|
||||
and:
|
||||
- named_pattern: man-title
|
||||
- named_pattern: man-heading
|
||||
- language: Roff
|
||||
pattern: '^\.(?:[A-Za-z]{2}(?:\s|$)|\\")'
|
||||
- extensions: ['.1in', '.1m', '.1x', '.3in', '.3m', '.3p', '.3pm', '.3qt', '.3x', '.man', '.mdoc']
|
||||
rules:
|
||||
- language: man
|
||||
and:
|
||||
- named_pattern: mdoc-date
|
||||
- named_pattern: mdoc-title
|
||||
- named_pattern: mdoc-heading
|
||||
- language: man
|
||||
and:
|
||||
- named_pattern: man-title
|
||||
- named_pattern: man-heading
|
||||
- language: Roff
|
||||
- extensions: ['.al']
|
||||
rules:
|
||||
# AL pattern source from https://github.com/microsoft/AL/blob/master/grammar/alsyntax.tmlanguage - keyword.other.applicationobject.al
|
||||
- language: AL
|
||||
and:
|
||||
- pattern: '\b(?i:(CODEUNIT|PAGE|PAGEEXTENSION|PAGECUSTOMIZATION|DOTNET|ENUM|ENUMEXTENSION|VALUE|QUERY|REPORT|TABLE|TABLEEXTENSION|XMLPORT|PROFILE|CONTROLADDIN|REPORTEXTENSION|INTERFACE|PERMISSIONSET|PERMISSIONSETEXTENSION|ENTITLEMENT))\b'
|
||||
# Open-ended fallback to Perl AutoLoader
|
||||
- language: Perl
|
||||
- extensions: ['.app']
|
||||
rules:
|
||||
- language: Erlang
|
||||
pattern: '^\{\s*(?:application|''application'')\s*,\s*(?:[a-z]+[\w@]*|''[^'']+'')\s*,\s*\[(?:.|[\r\n])*\]\s*\}\.[ \t]*$'
|
||||
- extensions: ['.as']
|
||||
rules:
|
||||
- language: ActionScript
|
||||
pattern: '^\s*(?:package(?:\s+[\w.]+)?\s+(?:\{|$)|import\s+[\w.*]+\s*;|(?=.*?(?:intrinsic|extends))(intrinsic\s+)?class\s+[\w<>.]+(?:\s+extends\s+[\w<>.]+)?|(?:(?:public|protected|private|static)\s+)*(?:(?:var|const|local)\s+\w+\s*:\s*[\w<>.]+(?:\s*=.*)?\s*;|function\s+\w+\s*\((?:\s*\w+\s*:\s*[\w<>.]+\s*(,\s*\w+\s*:\s*[\w<>.]+\s*)*)?\)))'
|
||||
- extensions: ['.asc']
|
||||
rules:
|
||||
- language: Public Key
|
||||
pattern: '^(----[- ]BEGIN|ssh-(rsa|dss)) '
|
||||
- language: AsciiDoc
|
||||
pattern: '^[=-]+\s|\{\{[A-Za-z]'
|
||||
- language: AGS Script
|
||||
pattern: '^(\/\/.+|((import|export)\s+)?(function|int|float|char)\s+((room|repeatedly|on|game)_)?([A-Za-z]+[A-Za-z_0-9]+)\s*[;\(])'
|
||||
- extensions: ['.asm']
|
||||
rules:
|
||||
- language: Motorola 68K Assembly
|
||||
named_pattern: m68k
|
||||
- extensions: ['.asy']
|
||||
rules:
|
||||
- language: LTspice Symbol
|
||||
pattern: '^SymbolType[ \t]'
|
||||
- language: Asymptote
|
||||
- extensions: ['.bas']
|
||||
rules:
|
||||
- language: FreeBasic
|
||||
pattern: '^[ \t]*#(?i)(?:define|endif|endmacro|ifn?def|include|lang|macro)(?:$|\s)'
|
||||
- language: BASIC
|
||||
pattern: '\A\s*\d'
|
||||
- language: VBA
|
||||
and:
|
||||
- named_pattern: vb-module
|
||||
- named_pattern: vba
|
||||
- language: Visual Basic 6.0
|
||||
named_pattern: vb-module
|
||||
- extensions: ['.bb']
|
||||
rules:
|
||||
- language: BlitzBasic
|
||||
pattern: '(<^\s*; |End Function)'
|
||||
- language: BitBake
|
||||
pattern: '^(# |include|require|inherit)\b'
|
||||
- language: Clojure
|
||||
pattern: '\((def|defn|defmacro|let)\s'
|
||||
- extensions: ['.bf']
|
||||
rules:
|
||||
- language: Beef
|
||||
pattern: '(?-m)^\s*using\s+(System|Beefy)(\.(.*))?;\s*$'
|
||||
- language: HyPhy
|
||||
pattern:
|
||||
- '(?-m)^\s*#include\s+".*";\s*$'
|
||||
- '\sfprintf\s*\('
|
||||
- language: Brainfuck
|
||||
pattern: '(>\+>|>\+<)'
|
||||
- extensions: ['.bi']
|
||||
rules:
|
||||
- language: FreeBasic
|
||||
pattern: '^[ \t]*#(?i)(?:define|endif|endmacro|ifn?def|if|include|lang|macro)(?:$|\s)'
|
||||
- extensions: ['.bs']
|
||||
rules:
|
||||
- language: Bikeshed
|
||||
pattern: '^(?i:<pre\s+class)\s*=\s*(''|\"|\b)metadata\b\1[^>\r\n]*>'
|
||||
- language: BrighterScript
|
||||
pattern:
|
||||
- (?i:^\s*(?=^sub\s)(?:sub\s*\w+\(.*?\))|(?::\s*sub\(.*?\))$)
|
||||
- (?i:^\s*(end\ssub)$)
|
||||
- (?i:^\s*(?=^function\s)(?:function\s*\w+\(.*?\)\s*as\s*\w*)|(?::\s*function\(.*?\)\s*as\s*\w*)$)
|
||||
- (?i:^\s*(end\sfunction)$)
|
||||
- language: Bluespec BH
|
||||
pattern: '^package\s+[A-Za-z_][A-Za-z0-9_'']*(?:\s*\(|\s+where)'
|
||||
- extensions: ['.builds']
|
||||
rules:
|
||||
- language: XML
|
||||
pattern: '^(\s*)(?i:<Project|<Import|<Property|<?xml|xmlns)'
|
||||
- extensions: ['.ch']
|
||||
rules:
|
||||
- language: xBase
|
||||
pattern: '^\s*#\s*(?i:if|ifdef|ifndef|define|command|xcommand|translate|xtranslate|include|pragma|undef)\b'
|
||||
- extensions: ['.cl']
|
||||
rules:
|
||||
- language: Common Lisp
|
||||
pattern: '^\s*\((?i:defun|in-package|defpackage) '
|
||||
- language: Cool
|
||||
pattern: '^class'
|
||||
- language: OpenCL
|
||||
pattern: '\/\* |\/\/ |^\}'
|
||||
- extensions: ['.cls']
|
||||
rules:
|
||||
- language: Visual Basic 6.0
|
||||
and:
|
||||
- named_pattern: vb-class
|
||||
- pattern: '^\s*BEGIN(?:\r?\n|\r)\s*MultiUse\s*=.*(?:\r?\n|\r)\s*Persistable\s*='
|
||||
- language: VBA
|
||||
named_pattern: vb-class
|
||||
- language: TeX
|
||||
pattern: '^\s*\\(?:NeedsTeXFormat|ProvidesClass)\{'
|
||||
- language: ObjectScript
|
||||
pattern: '^Class\s'
|
||||
- extensions: ['.cmp']
|
||||
rules:
|
||||
- language: Gerber Image
|
||||
pattern: '^[DGMT][0-9]{2}\*(?:\r?\n|\r)'
|
||||
- extensions: ['.cs']
|
||||
rules:
|
||||
- language: Smalltalk
|
||||
pattern: '![\w\s]+methodsFor: '
|
||||
- language: 'C#'
|
||||
pattern: '^\s*(using\s+[A-Z][\s\w.]+;|namespace\s*[\w\.]+\s*(\{|;)|\/\/)'
|
||||
- extensions: ['.csc']
|
||||
rules:
|
||||
- language: GSC
|
||||
named_pattern: gsc
|
||||
- extensions: ['.csl']
|
||||
rules:
|
||||
- language: XML
|
||||
pattern: '(?i:^\s*(<\?xml|xmlns))'
|
||||
- language: Kusto
|
||||
pattern: '(^\|\s*(where|extend|project|limit|summarize))|(^\.\w+)'
|
||||
- extensions: ['.d']
|
||||
rules:
|
||||
- language: D
|
||||
# see http://dlang.org/spec/grammar
|
||||
# ModuleDeclaration | ImportDeclaration | FuncDeclaration | unittest
|
||||
pattern: '^module\s+[\w.]*\s*;|import\s+[\w\s,.:]*;|\w+\s+\w+\s*\(.*\)(?:\(.*\))?\s*\{[^}]*\}|unittest\s*(?:\(.*\))?\s*\{[^}]*\}'
|
||||
- language: DTrace
|
||||
# see http://dtrace.org/guide/chp-prog.html, http://dtrace.org/guide/chp-profile.html, http://dtrace.org/guide/chp-opt.html
|
||||
pattern: '^(\w+:\w*:\w*:\w*|BEGIN|END|provider\s+|(tick|profile)-\w+\s+\{[^}]*\}|#pragma\s+D\s+(option|attributes|depends_on)\s|#pragma\s+ident\s)'
|
||||
- language: Makefile
|
||||
# path/target : dependency \
|
||||
# target : \
|
||||
# : dependency
|
||||
# path/file.ext1 : some/path/../file.ext2
|
||||
pattern: '([\/\\].*:\s+.*\s\\$|: \\$|^[ %]:|^[\w\s\/\\.]+\w+\.\w+\s*:\s+[\w\s\/\\.]+\w+\.\w+)'
|
||||
- extensions: ['.dsp']
|
||||
rules:
|
||||
- language: Microsoft Developer Studio Project
|
||||
pattern: '# Microsoft Developer Studio Generated Build File'
|
||||
- language: Faust
|
||||
pattern: '\bprocess\s*[(=]|\b(library|import)\s*\(\s*"|\bdeclare\s+(name|version|author|copyright|license)\s+"'
|
||||
- extensions: ['.e']
|
||||
rules:
|
||||
- language: E
|
||||
pattern:
|
||||
- '^\s*(def|var)\s+(.+):='
|
||||
- '^\s*(def|to)\s+(\w+)(\(.+\))?\s+\{'
|
||||
- '^\s*(when)\s+(\(.+\))\s+->\s+\{'
|
||||
- language: Eiffel
|
||||
pattern:
|
||||
- '^\s*\w+\s*(?:,\s*\w+)*[:]\s*\w+\s'
|
||||
- '^\s*\w+\s*(?:\(\s*\w+[:][^)]+\))?(?:[:]\s*\w+)?(?:--.+\s+)*\s+(?:do|local)\s'
|
||||
- '^\s*(?:across|deferred|elseif|ensure|feature|from|inherit|inspect|invariant|note|once|require|undefine|variant|when)\s*$'
|
||||
- language: Euphoria
|
||||
named_pattern: euphoria
|
||||
- extensions: ['.ecl']
|
||||
rules:
|
||||
- language: ECLiPSe
|
||||
pattern: '^[^#]+:-'
|
||||
- language: ECL
|
||||
pattern: ':='
|
||||
- extensions: ['.es']
|
||||
rules:
|
||||
- language: Erlang
|
||||
pattern: '^\s*(?:%%|main\s*\(.*?\)\s*->)'
|
||||
- language: JavaScript
|
||||
pattern: '\/\/|("|'')use strict\1|export\s+default\s|\/\*(?:.|[\r\n])*?\*\/'
|
||||
- extensions: ['.ex']
|
||||
rules:
|
||||
- language: Elixir
|
||||
pattern:
|
||||
- '^\s*@moduledoc\s'
|
||||
- '^\s*(?:cond|import|quote|unless)\s'
|
||||
- '^\s*def(?:exception|impl|macro|module|protocol)[(\s]'
|
||||
- language: Euphoria
|
||||
named_pattern: euphoria
|
||||
- extensions: ['.f']
|
||||
rules:
|
||||
- language: Forth
|
||||
pattern: '^: '
|
||||
- language: Filebench WML
|
||||
pattern: 'flowop'
|
||||
- language: Fortran
|
||||
named_pattern: fortran
|
||||
- extensions: ['.for']
|
||||
rules:
|
||||
- language: Forth
|
||||
pattern: '^: '
|
||||
- language: Fortran
|
||||
named_pattern: fortran
|
||||
- extensions: ['.fr']
|
||||
rules:
|
||||
- language: Forth
|
||||
pattern: '^(: |also |new-device|previous )'
|
||||
- language: Frege
|
||||
pattern: '^\s*(import|module|package|data|type) '
|
||||
- language: Text
|
||||
- extensions: ['.frm']
|
||||
rules:
|
||||
- language: VBA
|
||||
and:
|
||||
- named_pattern: vb-form
|
||||
- pattern: '^\s*Begin\s+\{[0-9A-Z\-]*\}\s?'
|
||||
- language: Visual Basic 6.0
|
||||
and:
|
||||
- named_pattern: vb-form
|
||||
- pattern: '^\s*Begin\s+VB\.Form\s+'
|
||||
- extensions: ['.fs']
|
||||
rules:
|
||||
- language: Forth
|
||||
pattern: '^(: |new-device)'
|
||||
- language: 'F#'
|
||||
pattern: '^\s*(#light|import|let|module|namespace|open|type)'
|
||||
- language: GLSL
|
||||
pattern: '^\s*(#version|precision|uniform|varying|vec[234])'
|
||||
- language: Filterscript
|
||||
pattern: '#include|#pragma\s+(rs|version)|__attribute__'
|
||||
- extensions: ['.ftl']
|
||||
rules:
|
||||
- language: FreeMarker
|
||||
pattern: '^(?:<|[a-zA-Z-][a-zA-Z0-9_-]+[ \t]+\w)|\$\{\w+[^\r\n]*?\}|^[ \t]*(?:<#--.*?-->|<#([a-z]+)(?=\s|>)[^>]*>.*?</#\1>|\[#--.*?--\]|\[#([a-z]+)(?=\s|\])[^\]]*\].*?\[#\2\])'
|
||||
- language: Fluent
|
||||
pattern: '^-?[a-zA-Z][a-zA-Z0-9_-]* *=|\{\$-?[a-zA-Z][-\w]*(?:\.[a-zA-Z][-\w]*)?\}'
|
||||
- extensions: ['.g']
|
||||
rules:
|
||||
- language: GAP
|
||||
pattern: '\s*(Declare|BindGlobal|KeyDependentOperation|Install(Method|GlobalFunction)|SetPackageInfo)'
|
||||
- language: G-code
|
||||
pattern: '^[MG][0-9]+(?:\r?\n|\r)'
|
||||
- extensions: ['.gd']
|
||||
rules:
|
||||
- language: GAP
|
||||
pattern: '\s*(Declare|BindGlobal|KeyDependentOperation)'
|
||||
- language: GDScript
|
||||
pattern: '\s*(extends|var|const|enum|func|class|signal|tool|yield|assert|onready)'
|
||||
- extensions: ['.gml']
|
||||
rules:
|
||||
- language: XML
|
||||
pattern: '(?i:^\s*(<\?xml|xmlns))'
|
||||
- language: Graph Modeling Language
|
||||
pattern: '(?i:^\s*(graph|node)\s+\[$)'
|
||||
- language: Gerber Image
|
||||
pattern: '^[DGMT][0-9]{2}\*$'
|
||||
- language: Game Maker Language
|
||||
- extensions: ['.gs']
|
||||
rules:
|
||||
- language: GLSL
|
||||
pattern: '^#version\s+[0-9]+\b'
|
||||
- language: Gosu
|
||||
pattern: '^uses (java|gw)\.'
|
||||
- language: Genie
|
||||
pattern: '^\[indent=[0-9]+\]'
|
||||
- extensions: ['.gsc']
|
||||
rules:
|
||||
- language: GSC
|
||||
named_pattern: gsc
|
||||
- extensions: ['.gsh']
|
||||
rules:
|
||||
- language: GSC
|
||||
named_pattern: gsc
|
||||
- extensions: ['.gts']
|
||||
rules:
|
||||
- language: Gerber Image
|
||||
pattern: '^G0.'
|
||||
- language: Glimmer TS
|
||||
negative_pattern: '^G0.'
|
||||
- extensions: ['.h']
|
||||
rules:
|
||||
- language: Objective-C
|
||||
named_pattern: objectivec
|
||||
- language: C++
|
||||
named_pattern: cpp
|
||||
- language: C
|
||||
- extensions: ['.hh']
|
||||
rules:
|
||||
- language: Hack
|
||||
pattern: '<\?hh'
|
||||
- extensions: ['.html']
|
||||
rules:
|
||||
- language: Ecmarkup
|
||||
pattern: '<emu-(?:alg|annex|biblio|clause|eqn|example|figure|gann|gmod|gprose|grammar|intro|not-ref|note|nt|prodref|production|rhs|table|t|xref)(?:$|\s|>)'
|
||||
- language: HTML
|
||||
- extensions: ['.i']
|
||||
rules:
|
||||
- language: Motorola 68K Assembly
|
||||
named_pattern: m68k
|
||||
- language: SWIG
|
||||
pattern: '^[ \t]*%[a-z_]+\b|^%[{}]$'
|
||||
- extensions: ['.ice']
|
||||
rules:
|
||||
- language: JSON
|
||||
pattern: '\A\s*[{\[]'
|
||||
- language: Slice
|
||||
- extensions: ['.inc']
|
||||
rules:
|
||||
- language: Motorola 68K Assembly
|
||||
named_pattern: m68k
|
||||
- language: PHP
|
||||
pattern: '^<\?(?:php)?'
|
||||
- language: SourcePawn
|
||||
pattern:
|
||||
- '^public\s+(?:SharedPlugin(?:\s+|:)__pl_\w+\s*=(?:\s*\{)?|(?:void\s+)?__pl_\w+_SetNTVOptional\(\)(?:\s*\{)?)'
|
||||
- '^methodmap\s+\w+\s+<\s+\w+'
|
||||
- '^\s*MarkNativeAsOptional\s*\('
|
||||
- language: NASL
|
||||
pattern:
|
||||
- '^\s*include\s*\(\s*(?:"|'')[\\/\w\-\.:\s]+\.(?:nasl|inc)\s*(?:"|'')\s*\)\s*;'
|
||||
- '^\s*(?:global|local)_var\s+(?:\w+(?:\s*=\s*[\w\-"'']+)?\s*)(?:,\s*\w+(?:\s*=\s*[\w\-"'']+)?\s*)*+\s*;'
|
||||
- '^\s*namespace\s+\w+\s*\{'
|
||||
- '^\s*object\s+\w+\s*(?:extends\s+\w+(?:::\w+)?)?\s*\{'
|
||||
- '^\s*(?:public\s+|private\s+|\s*)function\s+\w+\s*\([\w\s,]*\)\s*\{'
|
||||
- language: POV-Ray SDL
|
||||
pattern: '^\s*#(declare|local|macro|while)\s'
|
||||
- language: Pascal
|
||||
pattern:
|
||||
- '(?i:^\s*\{\$(?:mode|ifdef|undef|define)[ ]+[a-z0-9_]+\})'
|
||||
- '^\s*end[.;]\s*$'
|
||||
- language: BitBake
|
||||
pattern: '^inherit(\s+[\w.-]+)+\s*$'
|
||||
- extensions: ['.json']
|
||||
rules:
|
||||
- language: OASv2-json
|
||||
pattern: '"swagger":\s?"2.[0-9.]+"'
|
||||
- language: OASv3-json
|
||||
pattern: '"openapi":\s?"3.[0-9.]+"'
|
||||
- language: JSON
|
||||
- extensions: ['.l']
|
||||
rules:
|
||||
- language: Common Lisp
|
||||
pattern: '\(def(un|macro)\s'
|
||||
- language: Lex
|
||||
pattern: '^(%[%{}]xs|<.*>)'
|
||||
- language: Roff
|
||||
pattern: '^\.[A-Za-z]{2}(\s|$)'
|
||||
- language: PicoLisp
|
||||
pattern: '^\((de|class|rel|code|data|must)\s'
|
||||
- extensions: ['.lean']
|
||||
rules:
|
||||
- language: Lean
|
||||
pattern: '^import [a-z]'
|
||||
- language: Lean 4
|
||||
pattern: '^import [A-Z]'
|
||||
- extensions: ['.ls']
|
||||
rules:
|
||||
- language: LoomScript
|
||||
pattern: '^\s*package\s*[\w\.\/\*\s]*\s*\{'
|
||||
- language: LiveScript
|
||||
- extensions: ['.lsp', '.lisp']
|
||||
rules:
|
||||
- language: Common Lisp
|
||||
pattern: '^\s*\((?i:defun|in-package|defpackage) '
|
||||
- language: NewLisp
|
||||
pattern: '^\s*\(define '
|
||||
- extensions: ['.m']
|
||||
rules:
|
||||
- language: Objective-C
|
||||
named_pattern: objectivec
|
||||
- language: Mercury
|
||||
pattern: ':- module'
|
||||
- language: MUF
|
||||
pattern: '^: '
|
||||
- language: M
|
||||
pattern: '^\s*;'
|
||||
- language: Mathematica
|
||||
and:
|
||||
- pattern: '\(\*'
|
||||
- pattern: '\*\)$'
|
||||
- language: MATLAB
|
||||
pattern: '^\s*%'
|
||||
- language: Limbo
|
||||
pattern: '^\w+\s*:\s*module\s*\{'
|
||||
- extensions: ['.m4']
|
||||
rules:
|
||||
- language: M4Sugar
|
||||
pattern:
|
||||
- 'AC_DEFUN|AC_PREREQ|AC_INIT'
|
||||
- '^_?m4_'
|
||||
- language: 'M4'
|
||||
- extensions: ['.mask']
|
||||
rules:
|
||||
- language: Unity3D Asset
|
||||
pattern: 'tag:unity3d.com'
|
||||
- extensions: ['.mc']
|
||||
rules:
|
||||
- language: Win32 Message File
|
||||
pattern: '(?i)^[ \t]*(?>\/\*\s*)?MessageId=|^\.$'
|
||||
- language: M4
|
||||
pattern: '^dnl|^divert\((?:-?\d+)?\)|^\w+\(`[^\r\n]*?''[),]'
|
||||
- language: Monkey C
|
||||
pattern: '\b(?:using|module|function|class|var)\s+\w'
|
||||
- extensions: ['.md']
|
||||
rules:
|
||||
- language: Markdown
|
||||
pattern:
|
||||
- '(^[-A-Za-z0-9=#!\*\[|>])|<\/'
|
||||
- '\A\z'
|
||||
- language: GCC Machine Description
|
||||
pattern: '^(;;|\(define_)'
|
||||
- language: Markdown
|
||||
- extensions: ['.ml']
|
||||
rules:
|
||||
- language: OCaml
|
||||
pattern: '(^\s*module)|let rec |match\s+(\S+\s)+with'
|
||||
- language: Standard ML
|
||||
pattern: '=> |case\s+(\S+\s)+of'
|
||||
- extensions: ['.mod']
|
||||
rules:
|
||||
- language: XML
|
||||
pattern: '<!ENTITY '
|
||||
- language: NMODL
|
||||
pattern: '\b(NEURON|INITIAL|UNITS)\b'
|
||||
- language: Modula-2
|
||||
pattern: '^\s*(?i:MODULE|END) [\w\.]+;'
|
||||
- language: [Linux Kernel Module, AMPL]
|
||||
- extensions: ['.mojo']
|
||||
rules:
|
||||
- language: Mojo
|
||||
pattern: '^\s*(alias|def|from|fn|import|struct|trait)\s'
|
||||
- language: XML
|
||||
pattern: '^\s*<\?xml'
|
||||
- extensions: ['.ms']
|
||||
rules:
|
||||
- language: Roff
|
||||
pattern: '^[.''][A-Za-z]{2}(\s|$)'
|
||||
- language: Unix Assembly
|
||||
and:
|
||||
- negative_pattern: '/\*'
|
||||
- pattern: '^\s*\.(?:include\s|globa?l\s|[A-Za-z][_A-Za-z0-9]*:)'
|
||||
- language: MAXScript
|
||||
- extensions: ['.n']
|
||||
rules:
|
||||
- language: Roff
|
||||
pattern: '^[.'']'
|
||||
- language: Nemerle
|
||||
pattern: '^(module|namespace|using)\s'
|
||||
- extensions: ['.ncl']
|
||||
rules:
|
||||
- language: XML
|
||||
pattern: '^\s*<\?xml\s+version'
|
||||
- language: Gerber Image
|
||||
pattern: '^[DGMT][0-9]{2}\*(?:\r?\n|\r)'
|
||||
- language: Text
|
||||
pattern: 'THE_TITLE'
|
||||
- extensions: ['.nl']
|
||||
rules:
|
||||
- language: NL
|
||||
pattern: '^(b|g)[0-9]+ '
|
||||
- language: NewLisp
|
||||
- extensions: ['.nu']
|
||||
rules:
|
||||
- language: Nushell
|
||||
pattern: '^\s*(import|export|module|def|let|let-env) '
|
||||
- language: Nu
|
||||
- extensions: ['.odin']
|
||||
rules:
|
||||
- language: Object Data Instance Notation
|
||||
pattern: '(?:^|<)\s*[A-Za-z0-9_]+\s*=\s*<'
|
||||
- language: Odin
|
||||
pattern: 'package\s+\w+|\b(?:im|ex)port\s*"[\w:./]+"|\w+\s*::\s*(?:proc|struct)\s*\(|^\s*//\s'
|
||||
- extensions: ['.p']
|
||||
rules:
|
||||
- language: Gnuplot
|
||||
pattern:
|
||||
- '^s?plot\b'
|
||||
- '^set\s+(term|terminal|out|output|[xy]tics|[xy]label|[xy]range|style)\b'
|
||||
- language: OpenEdge ABL
|
||||
- extensions: ['.php']
|
||||
rules:
|
||||
- language: Hack
|
||||
pattern: '<\?hh'
|
||||
- language: PHP
|
||||
pattern: '<\?[^h]'
|
||||
- extensions: ['.pkl']
|
||||
rules:
|
||||
- language: Pkl
|
||||
pattern:
|
||||
- '^\s*(module|import|amends|extends|local|const|fixed|abstract|open|class|typealias|@\w+)\b'
|
||||
- '^\s*[a-zA-Z0-9_$]+\s*(=|{|:)|^\s*`[^`]+`\s*(=|{|:)|for\s*\(|when\s*\('
|
||||
- language: Pickle
|
||||
- extensions: ['.pl']
|
||||
rules:
|
||||
- language: Prolog
|
||||
pattern: '^[^#]*:-'
|
||||
- language: Perl
|
||||
and:
|
||||
- negative_pattern: '^\s*use\s+v6\b'
|
||||
- named_pattern: perl
|
||||
- language: Raku
|
||||
named_pattern: raku
|
||||
- extensions: ['.plist']
|
||||
rules:
|
||||
- language: XML Property List
|
||||
pattern: '^\s*(?:<\?xml\s|<!DOCTYPE\s+plist|<plist(?:\s+version\s*=\s*(["''])\d+(?:\.\d+)?\1)?\s*>\s*$)'
|
||||
- language: OpenStep Property List
|
||||
- extensions: ['.plt']
|
||||
rules:
|
||||
- language: Prolog
|
||||
pattern: '^\s*:-'
|
||||
- extensions: ['.pm']
|
||||
rules:
|
||||
- language: Perl
|
||||
and:
|
||||
- negative_pattern: '^\s*use\s+v6\b'
|
||||
- named_pattern: perl
|
||||
- language: Raku
|
||||
named_pattern: raku
|
||||
- language: X PixMap
|
||||
pattern: '^\s*\/\* XPM \*\/'
|
||||
- extensions: ['.pod']
|
||||
rules:
|
||||
- language: Pod 6
|
||||
pattern: '^[\s&&[^\r\n]]*=(comment|begin pod|begin para|item\d+)'
|
||||
- language: Pod
|
||||
- extensions: ['.pp']
|
||||
rules:
|
||||
- language: Pascal
|
||||
pattern: '^\s*end[.;]'
|
||||
- language: Puppet
|
||||
pattern: '^\s+\w+\s+=>\s'
|
||||
- extensions: ['.pro']
|
||||
rules:
|
||||
- language: Proguard
|
||||
pattern: '^-(include\b.*\.pro$|keep\b|keepclassmembers\b|keepattributes\b)'
|
||||
- language: Prolog
|
||||
pattern: '^[^\[#]+:-'
|
||||
- language: INI
|
||||
pattern: 'last_client='
|
||||
- language: QMake
|
||||
and:
|
||||
- pattern: HEADERS
|
||||
- pattern: SOURCES
|
||||
- language: IDL
|
||||
pattern: '^\s*(?i:function|pro|compile_opt) \w[ \w,:]*$'
|
||||
- extensions: ['.properties']
|
||||
rules:
|
||||
- language: INI
|
||||
and:
|
||||
- named_pattern: key_equals_value
|
||||
- pattern: '^[;\[]'
|
||||
- language: Java Properties
|
||||
and:
|
||||
- named_pattern: key_equals_value
|
||||
- pattern: '^[#!]'
|
||||
- language: INI
|
||||
named_pattern: key_equals_value
|
||||
- language: Java Properties
|
||||
pattern: '^[^#!][^:]*:'
|
||||
- extensions: ['.q']
|
||||
rules:
|
||||
- language: q
|
||||
pattern: '((?i:[A-Z.][\w.]*:\{)|^\\(cd?|d|l|p|ts?) )'
|
||||
- language: HiveQL
|
||||
pattern: '(?i:SELECT\s+[\w*,]+\s+FROM|(CREATE|ALTER|DROP)\s(DATABASE|SCHEMA|TABLE))'
|
||||
- extensions: ['.qs']
|
||||
rules:
|
||||
- language: Q#
|
||||
pattern: '^((\/{2,3})?\s*(namespace|operation)\b)'
|
||||
- language: Qt Script
|
||||
pattern: '(\w+\.prototype\.\w+|===|\bvar\b)'
|
||||
- extensions: ['.r']
|
||||
rules:
|
||||
- language: Rebol
|
||||
pattern: '(?i:\bRebol\b)'
|
||||
- language: Rez
|
||||
pattern: '(#include\s+["<](Types\.r|Carbon\/Carbon\.r)[">])|((resource|data|type)\s+''[A-Za-z0-9]{4}''\s+((\(.*\)\s+){0,1}){)'
|
||||
- language: R
|
||||
pattern: '<-|^\s*#'
|
||||
- extensions: ['.re']
|
||||
rules:
|
||||
- language: Reason
|
||||
pattern:
|
||||
- '^\s*module\s+type\s'
|
||||
- '^\s*(?:include|open)\s+\w+\s*;\s*$'
|
||||
- '^\s*let\s+(?:module\s\w+\s*=\s*\{|\w+:\s+.*=.*;\s*$)'
|
||||
- language: C++
|
||||
pattern:
|
||||
- '^\s*#(?:(?:if|ifdef|define|pragma)\s+\w|\s*include\s+<[^>]+>)'
|
||||
- '^\s*template\s*<'
|
||||
- extensions: ['.res']
|
||||
rules:
|
||||
- language: ReScript
|
||||
pattern:
|
||||
- '^\s*(let|module|type)\s+\w*\s+=\s+'
|
||||
- '^\s*(?:include|open)\s+\w+\s*$'
|
||||
- extensions: ['.rno']
|
||||
rules:
|
||||
- language: RUNOFF
|
||||
pattern: '(?i:^\.!|^\f|\f$|^\.end lit(?:eral)?\b|^\.[a-zA-Z].*?;\.[a-zA-Z](?:[; \t])|\^\*[^\s*][^*]*\\\*(?=$|\s)|^\.c;[ \t]*\w+)'
|
||||
- language: Roff
|
||||
pattern: '^\.\\" '
|
||||
- extensions: ['.rpy']
|
||||
rules:
|
||||
- language: Python
|
||||
pattern: '^(import|from|class|def)\s'
|
||||
- language: "Ren'Py"
|
||||
- extensions: ['.rs']
|
||||
rules:
|
||||
- language: Rust
|
||||
pattern: '^(use |fn |mod |pub |macro_rules|impl|#!?\[)'
|
||||
- language: RenderScript
|
||||
pattern: '#include|#pragma\s+(rs|version)|__attribute__'
|
||||
- language: XML
|
||||
pattern: '^\s*<\?xml'
|
||||
- extensions: ['.s']
|
||||
rules:
|
||||
- language: Motorola 68K Assembly
|
||||
named_pattern: m68k
|
||||
- extensions: ['.sc']
|
||||
rules:
|
||||
- language: SuperCollider
|
||||
pattern: '(?i:\^(this|super)\.|^\s*~\w+\s*=\.)'
|
||||
- language: Scala
|
||||
pattern: '(^\s*import (scala|java)\.|^\s*class\b)'
|
||||
- extensions: ['.scd']
|
||||
rules:
|
||||
- language: SuperCollider
|
||||
pattern: '(?i:\^(this|super)\.|^\s*(~\w+\s*=\.|SynthDef\b))'
|
||||
- language: Markdown
|
||||
# Markdown syntax for scdoc
|
||||
pattern: '^#+\s+(NAME|SYNOPSIS|DESCRIPTION)'
|
||||
- extensions: ['.sol']
|
||||
rules:
|
||||
- language: Solidity
|
||||
pattern: '\bpragma\s+solidity\b|\b(?:abstract\s+)?contract\s+(?!\d)[a-zA-Z0-9$_]+(?:\s+is\s+(?:[a-zA-Z0-9$_][^\{]*?)?)?\s*\{'
|
||||
- language: Gerber Image
|
||||
pattern: '^[DGMT][0-9]{2}\*(?:\r?\n|\r)'
|
||||
- extensions: ['.sql']
|
||||
rules:
|
||||
# Postgres
|
||||
- language: PLpgSQL
|
||||
pattern: '(?i:^\\i\b|AS\s+\$\$|LANGUAGE\s+''?plpgsql''?|BEGIN(\s+WORK)?\s*;)'
|
||||
# IBM db2
|
||||
- language: SQLPL
|
||||
pattern: '(?i:ALTER\s+MODULE|MODE\s+DB2SQL|\bSYS(CAT|PROC)\.|ASSOCIATE\s+RESULT\s+SET|\bEND!\s*$)'
|
||||
# Oracle
|
||||
- language: PLSQL
|
||||
pattern: '(?i:\$\$PLSQL_|XMLTYPE|systimestamp|\.nextval|CONNECT\s+BY|AUTHID\s+(DEFINER|CURRENT_USER)|constructor\W+function)'
|
||||
# T-SQL
|
||||
- language: TSQL
|
||||
pattern: '(?i:^\s*GO\b|BEGIN(\s+TRY|\s+CATCH)|OUTPUT\s+INSERTED|DECLARE\s+@|\[dbo\])'
|
||||
- language: SQL
|
||||
- extensions: ['.srt']
|
||||
rules:
|
||||
- language: SubRip Text
|
||||
pattern: '^(\d{2}:\d{2}:\d{2},\d{3})\s*(-->)\s*(\d{2}:\d{2}:\d{2},\d{3})$'
|
||||
- extensions: ['.st']
|
||||
rules:
|
||||
- language: StringTemplate
|
||||
pattern: '\$\w+[($]|(.)!\s*.+?\s*!\1|<!\s*.+?\s*!>|\[!\s*.+?\s*!\]|\{!\s*.+?\s*!\}'
|
||||
- language: Smalltalk
|
||||
pattern: '\A\s*[\[{(^"''\w#]|[a-zA-Z_]\w*\s*:=\s*[a-zA-Z_]\w*|class\s*>>\s*[a-zA-Z_]\w*|^[a-zA-Z_]\w*\s+[a-zA-Z_]\w*:|^Class\s*\{|if(?:True|False):\s*\['
|
||||
- extensions: ['.star']
|
||||
rules:
|
||||
- language: STAR
|
||||
pattern: '^loop_\s*$'
|
||||
- language: Starlark
|
||||
- extensions: ['.stl']
|
||||
rules:
|
||||
- language: STL
|
||||
pattern: '\A\s*solid(?:$|\s)[\s\S]*^endsolid(?:$|\s)'
|
||||
- extensions: ['.sw']
|
||||
rules:
|
||||
- language: Sway
|
||||
pattern: '^\s*(?:(?:abi|dep|fn|impl|mod|pub|trait)\s|#\[)'
|
||||
- language: XML
|
||||
pattern: '^\s*<\?xml\s+version'
|
||||
- extensions: ['.t']
|
||||
rules:
|
||||
- language: Perl
|
||||
and:
|
||||
- negative_pattern: '^\s*use\s+v6\b'
|
||||
- named_pattern: perl
|
||||
- language: Raku
|
||||
pattern: '^\s*(?:use\s+v6\b|\bmodule\b|\bmy\s+class\b)'
|
||||
- language: Turing
|
||||
pattern: '^\s*%[ \t]+|^\s*var\s+\w+(\s*:\s*\w+)?\s*:=\s*\w+'
|
||||
- extensions: ['.tag']
|
||||
rules:
|
||||
- language: Java Server Pages
|
||||
pattern: '<%[@!=\s]?\s*(taglib|tag|include|attribute|variable)\s'
|
||||
- extensions: ['.tlv']
|
||||
rules:
|
||||
- language: TL-Verilog
|
||||
pattern: '^\\.{0,10}TLV_version'
|
||||
- extensions: ['.toc']
|
||||
rules:
|
||||
- language: World of Warcraft Addon Data
|
||||
pattern: '^## |@no-lib-strip@'
|
||||
- language: TeX
|
||||
pattern: '^\\(contentsline|defcounter|beamer|boolfalse)'
|
||||
- extensions: ['.ts']
|
||||
rules:
|
||||
- language: XML
|
||||
pattern: '<TS\b'
|
||||
- language: TypeScript
|
||||
- extensions: ['.tst']
|
||||
rules:
|
||||
- language: GAP
|
||||
pattern: 'gap> '
|
||||
# Heads up - we don't usually write heuristics like this (with no regex match)
|
||||
- language: Scilab
|
||||
- extensions: ['.tsx']
|
||||
rules:
|
||||
- language: TSX
|
||||
pattern: '^\s*(import.+(from\s+|require\()[''"]react|\/\/\/\s*<reference\s)'
|
||||
- language: XML
|
||||
pattern: '(?i:^\s*<\?xml\s+version)'
|
||||
- extensions: ['.txt']
|
||||
rules:
|
||||
# The following RegExp is simply a collapsed and simplified form of the
|
||||
# VIM_MODELINE pattern in `./lib/linguist/strategy/modeline.rb`.
|
||||
- language: Vim Help File
|
||||
pattern: '(?:(?:^|[ \t])(?:vi|Vi(?=m))(?:m[<=>]?[0-9]+|m)?|[ \t]ex)(?=:(?=[ \t]*set?[ \t][^\r\n:]+:)|:(?![ \t]*set?[ \t]))(?:(?:[ \t]*:[ \t]*|[ \t])\w*(?:[ \t]*=(?:[^\\\s]|\\.)*)?)*[ \t:](?:filetype|ft|syntax)[ \t]*=(help)(?=$|\s|:)'
|
||||
- language: Adblock Filter List
|
||||
pattern: |-
|
||||
(?x)\A
|
||||
\[
|
||||
(?<version>
|
||||
(?:
|
||||
[Aa]d[Bb]lock
|
||||
(?:[ \t][Pp]lus)?
|
||||
|
|
||||
u[Bb]lock
|
||||
(?:[ \t][Oo]rigin)?
|
||||
|
|
||||
[Aa]d[Gg]uard
|
||||
)
|
||||
(?:[ \t] \d+(?:\.\d+)*+)?
|
||||
)
|
||||
(?:
|
||||
[ \t]?;[ \t]?
|
||||
\g<version>
|
||||
)*+
|
||||
\]
|
||||
# HACK: This is a contrived use of heuristics needed to address
|
||||
# an unusual edge-case. See https://git.io/JULye for discussion.
|
||||
- language: Text
|
||||
- extensions: ['.typ']
|
||||
rules:
|
||||
- language: Typst
|
||||
pattern: '^#(import|show|let|set)'
|
||||
- language: XML
|
||||
- extensions: ['.url']
|
||||
rules:
|
||||
- language: INI
|
||||
pattern: '^\[InternetShortcut\](?:\r?\n|\r)(?>[^\s\[][^\r\n]*(?:\r?\n|\r))*URL='
|
||||
- extensions: ['.v']
|
||||
rules:
|
||||
- language: Coq
|
||||
pattern: '(?:^|\s)(?:Proof|Qed)\.(?:$|\s)|(?:^|\s)Require[ \t]+(Import|Export)\s'
|
||||
- language: Verilog
|
||||
pattern: '^[ \t]*module\s+[^\s()]+\s+\#?\(|^[ \t]*`(?:define|ifdef|ifndef|include|timescale)|^[ \t]*always[ \t]+@|^[ \t]*initial[ \t]+(begin|@)'
|
||||
- language: V
|
||||
pattern: '\$(?:if|else)[ \t]|^[ \t]*fn\s+[^\s()]+\(.*?\).*?\{|^[ \t]*for\s*\{'
|
||||
- extensions: ['.vba']
|
||||
rules:
|
||||
- language: Vim Script
|
||||
pattern: '^UseVimball'
|
||||
- language: VBA
|
||||
- extensions: ['.w']
|
||||
rules:
|
||||
- language: OpenEdge ABL
|
||||
pattern: '&ANALYZE-SUSPEND _UIB-CODE-BLOCK _CUSTOM _DEFINITIONS'
|
||||
- language: CWeb
|
||||
pattern: '^@(<|\w+\.)'
|
||||
- extensions: ['.x']
|
||||
rules:
|
||||
- language: DirectX 3D File
|
||||
pattern: '^xof 030(2|3)(?:txt|bin|tzip|bzip)\b'
|
||||
- language: RPC
|
||||
pattern: '\b(program|version)\s+\w+\s*\{|\bunion\s+\w+\s+switch\s*\('
|
||||
- language: Logos
|
||||
pattern: '^%(end|ctor|hook|group)\b'
|
||||
- language: Linker Script
|
||||
pattern: 'OUTPUT_ARCH\(|OUTPUT_FORMAT\(|SECTIONS'
|
||||
- extensions: ['.yaml', '.yml']
|
||||
rules:
|
||||
- language: MiniYAML
|
||||
pattern: '^\t+.*?[^\s:].*?:'
|
||||
negative_pattern: '---'
|
||||
- language: OASv2-yaml
|
||||
pattern: 'swagger:\s?''?"?2.[0-9.]+''?"?'
|
||||
- language: OASv3-yaml
|
||||
pattern: 'openapi:\s?''?"?3.[0-9.]+''?"?'
|
||||
- language: YAML
|
||||
- extensions: ['.yy']
|
||||
rules:
|
||||
- language: JSON
|
||||
pattern: '\"modelName\"\:\s*\"GM'
|
||||
- language: Yacc
|
||||
named_patterns:
|
||||
cpp:
|
||||
- '^\s*#\s*include <(cstdint|string|vector|map|list|array|bitset|queue|stack|forward_list|unordered_map|unordered_set|(i|o|io)stream)>'
|
||||
- '^\s*template\s*<'
|
||||
- '^[ \t]*(try|constexpr)'
|
||||
- '^[ \t]*catch\s*\('
|
||||
- '^[ \t]*(class|(using[ \t]+)?namespace)\s+\w+'
|
||||
- '^[ \t]*(private|public|protected):$'
|
||||
- '__has_cpp_attribute|__cplusplus >'
|
||||
- 'std::\w+'
|
||||
euphoria:
|
||||
- '^\s*namespace\s'
|
||||
- '^\s*(?:public\s+)?include\s'
|
||||
- '^\s*(?:(?:public|export|global)\s+)?(?:atom|constant|enum|function|integer|object|procedure|sequence|type)\s'
|
||||
fortran: '^(?i:[c*][^abd-z]| (subroutine|program|end|data)\s|\s*!)'
|
||||
gsc:
|
||||
- '^\s*#\s*(?:using|insert|include|define|namespace)[ \t]+\w'
|
||||
- '^\s*(?>(?:autoexec|private)\s+){0,2}function\s+(?>(?:autoexec|private)\s+){0,2}\w+\s*\('
|
||||
- '\b(?:level|self)[ \t]+thread[ \t]+(?:\[\[[ \t]*(?>\w+\.)*\w+[ \t]*\]\]|\w+)[ \t]*\([^\r\n\)]*\)[ \t]*;'
|
||||
- '^[ \t]*#[ \t]*(?:precache|using_animtree)[ \t]*\('
|
||||
key_equals_value: '^[^#!;][^=]*='
|
||||
m68k:
|
||||
- '(?im)\bmoveq(?:\.l)?\s+#(?:\$-?[0-9a-f]{1,3}|%[0-1]{1,8}|-?[0-9]{1,3}),\s*d[0-7]\b'
|
||||
- '(?im)^\s*move(?:\.[bwl])?\s+(?:sr|usp),\s*[^\s]+'
|
||||
- '(?im)^\s*move\.[bwl]\s+.*\b[ad]\d'
|
||||
- '(?im)^\s*movem\.[bwl]\b'
|
||||
- '(?im)^\s*move[mp](?:\.[wl])?\b'
|
||||
- '(?im)^\s*btst\b'
|
||||
- '(?im)^\s*dbra\b'
|
||||
man-heading: '^[.''][ \t]*SH +(?:[^"\s]+|"[^"\s]+)'
|
||||
man-title: '^[.''][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)'
|
||||
mdoc-date: '^[.''][ \t]*Dd +(?:[^"\s]+|"[^"]+")'
|
||||
mdoc-heading: '^[.''][ \t]*Sh +(?:[^"\s]|"[^"]+")'
|
||||
mdoc-title: '^[.''][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)'
|
||||
objectivec: '^\s*(@(interface|class|protocol|property|end|synchronised|selector|implementation)\b|#import\s+.+\.h[">])'
|
||||
perl:
|
||||
- '\buse\s+(?:strict\b|v?5\b)'
|
||||
- '^\s*use\s+(?:constant|overload)\b'
|
||||
- '^\s*(?:\*|(?:our\s*)?@)EXPORT\s*='
|
||||
- '^\s*package\s+[^\W\d]\w*(?:::\w+)*\s*(?:[;{]|\sv?\d)'
|
||||
- '[\s$][^\W\d]\w*(?::\w+)*->[a-zA-Z_\[({]'
|
||||
raku: '^\s*(?:use\s+v6\b|\bmodule\b|\b(?:my\s+)?class\b)'
|
||||
vb-class: '^[ ]*VERSION [0-9]\.[0-9] CLASS'
|
||||
vb-form: '^[ ]*VERSION [0-9]\.[0-9]{2}'
|
||||
vb-module: '^[ ]*Attribute VB_Name = '
|
||||
vba:
|
||||
- '\b(?:VBA|[vV]ba)(?:\b|[0-9A-Z_])'
|
||||
# VBA7 new 64-bit features
|
||||
- '^[ ]*(?:Public|Private)? Declare PtrSafe (?:Sub|Function)\b'
|
||||
- '^[ ]*#If Win64\b'
|
||||
- '^[ ]*(?:Dim|Const) [0-9a-zA-Z_]*[ ]*As Long(?:Ptr|Long)\b'
|
||||
# Top module declarations unique to VBA
|
||||
- '^[ ]*Option (?:Private Module|Compare (?:Database|Text|Binary))\b'
|
||||
# General VBA libraries and objects
|
||||
- '(?: |\()(?:Access|Excel|Outlook|PowerPoint|Visio|Word|VBIDE)\.\w'
|
||||
- '\b(?:(?:Active)?VBProjects?|VBComponents?|Application\.(?:VBE|ScreenUpdating))\b'
|
||||
# AutoCAD, Outlook, PowerPoint and Word objects
|
||||
- '\b(?:ThisDrawing|AcadObject|Active(?:Explorer|Inspector|Window\.Presentation|Presentation|Document)|Selection\.(?:Find|Paragraphs))\b'
|
||||
# Excel objects
|
||||
- '\b(?:(?:This|Active)?Workbooks?|Worksheets?|Active(?:Sheet|Chart|Cell)|WorksheetFunction)\b'
|
||||
- '\b(?:Range\(".*|Cells\([0-9a-zA-Z_]*, (?:[0-9a-zA-Z_]*|"[a-zA-Z]{1,3}"))\)'
|
@ -3,6 +3,7 @@
|
||||
<name>JSON</name>
|
||||
<alias>json</alias>
|
||||
<filename>*.json</filename>
|
||||
<filename>*.jsonc</filename>
|
||||
<filename>*.avsc</filename>
|
||||
<mime_type>application/json</mime_type>
|
||||
<dot_all>true</dot_all>
|
||||
|
137
lexers/jsonnet.xml
Normal file
137
lexers/jsonnet.xml
Normal file
@ -0,0 +1,137 @@
|
||||
|
||||
<lexer>
|
||||
<config>
|
||||
<name>Jsonnet</name>
|
||||
<alias>jsonnet</alias>
|
||||
<filename>*.jsonnet</filename>
|
||||
<filename>*.libsonnet</filename>
|
||||
</config>
|
||||
<rules>
|
||||
<state name="_comments">
|
||||
<rule pattern="(//|#).*\n"><token type="CommentSingle"/></rule>
|
||||
<rule pattern="/\*\*([^/]|/(?!\*))*\*/"><token type="LiteralStringDoc"/></rule>
|
||||
<rule pattern="/\*([^/]|/(?!\*))*\*/"><token type="Comment"/></rule>
|
||||
</state>
|
||||
<state name="root">
|
||||
<rule><include state="_comments"/></rule>
|
||||
<rule pattern="@'.*'"><token type="LiteralString"/></rule>
|
||||
<rule pattern="@".*""><token type="LiteralString"/></rule>
|
||||
<rule pattern="'"><token type="LiteralString"/><push state="singlestring"/></rule>
|
||||
<rule pattern="""><token type="LiteralString"/><push state="doublestring"/></rule>
|
||||
<rule pattern="\|\|\|(.|\n)*\|\|\|"><token type="LiteralString"/></rule>
|
||||
<rule pattern="[+-]?[0-9]+(.[0-9])?"><token type="LiteralNumberFloat"/></rule>
|
||||
<rule pattern="[!$~+\-&|^=<>*/%]"><token type="Operator"/></rule>
|
||||
<rule pattern="\{"><token type="Punctuation"/><push state="object"/></rule>
|
||||
<rule pattern="\["><token type="Punctuation"/><push state="array"/></rule>
|
||||
<rule pattern="local\b"><token type="Keyword"/><push state="local_name"/></rule>
|
||||
<rule pattern="assert\b"><token type="Keyword"/><push state="assert"/></rule>
|
||||
<rule pattern="(assert|else|error|false|for|if|import|importstr|in|null|tailstrict|then|self|super|true)\b"><token type="Keyword"/></rule>
|
||||
<rule pattern="\s+"><token type="TextWhitespace"/></rule>
|
||||
<rule pattern="function(?=\()"><token type="Keyword"/><push state="function_params"/></rule>
|
||||
<rule pattern="std\.[^\W\d]\w*(?=\()"><token type="NameBuiltin"/><push state="function_args"/></rule>
|
||||
<rule pattern="[^\W\d]\w*(?=\()"><token type="NameFunction"/><push state="function_args"/></rule>
|
||||
<rule pattern="[^\W\d]\w*"><token type="NameVariable"/></rule>
|
||||
<rule pattern="[\.()]"><token type="Punctuation"/></rule>
|
||||
</state>
|
||||
<state name="singlestring">
|
||||
<rule pattern="[^'\\]"><token type="LiteralString"/></rule>
|
||||
<rule pattern="\\."><token type="LiteralStringEscape"/></rule>
|
||||
<rule pattern="'"><token type="LiteralString"/><pop depth="1"/></rule>
|
||||
</state>
|
||||
<state name="doublestring">
|
||||
<rule pattern="[^"\\]"><token type="LiteralString"/></rule>
|
||||
<rule pattern="\\."><token type="LiteralStringEscape"/></rule>
|
||||
<rule pattern="""><token type="LiteralString"/><pop depth="1"/></rule>
|
||||
</state>
|
||||
<state name="array">
|
||||
<rule pattern=","><token type="Punctuation"/></rule>
|
||||
<rule pattern="\]"><token type="Punctuation"/><pop depth="1"/></rule>
|
||||
<rule><include state="root"/></rule>
|
||||
</state>
|
||||
<state name="local_name">
|
||||
<rule pattern="[^\W\d]\w*(?=\()"><token type="NameFunction"/><push state="function_params"/></rule>
|
||||
<rule pattern="[^\W\d]\w*"><token type="NameVariable"/></rule>
|
||||
<rule pattern="\s+"><token type="TextWhitespace"/></rule>
|
||||
<rule pattern="(?==)"><token type="TextWhitespace"/><push state="#pop" state="local_value"/></rule>
|
||||
</state>
|
||||
<state name="local_value">
|
||||
<rule pattern="="><token type="Operator"/></rule>
|
||||
<rule pattern=";"><token type="Punctuation"/><pop depth="1"/></rule>
|
||||
<rule><include state="root"/></rule>
|
||||
</state>
|
||||
<state name="assert">
|
||||
<rule pattern=":"><token type="Punctuation"/></rule>
|
||||
<rule pattern=";"><token type="Punctuation"/><pop depth="1"/></rule>
|
||||
<rule><include state="root"/></rule>
|
||||
</state>
|
||||
<state name="function_params">
|
||||
<rule pattern="[^\W\d]\w*"><token type="NameVariable"/></rule>
|
||||
<rule pattern="\("><token type="Punctuation"/></rule>
|
||||
<rule pattern="\)"><token type="Punctuation"/><pop depth="1"/></rule>
|
||||
<rule pattern=","><token type="Punctuation"/></rule>
|
||||
<rule pattern="\s+"><token type="TextWhitespace"/></rule>
|
||||
<rule pattern="="><token type="Operator"/><push state="function_param_default"/></rule>
|
||||
</state>
|
||||
<state name="function_args">
|
||||
<rule pattern="\("><token type="Punctuation"/></rule>
|
||||
<rule pattern="\)"><token type="Punctuation"/><pop depth="1"/></rule>
|
||||
<rule pattern=","><token type="Punctuation"/></rule>
|
||||
<rule pattern="\s+"><token type="TextWhitespace"/></rule>
|
||||
<rule><include state="root"/></rule>
|
||||
</state>
|
||||
<state name="object">
|
||||
<rule pattern="\s+"><token type="TextWhitespace"/></rule>
|
||||
<rule pattern="local\b"><token type="Keyword"/><push state="object_local_name"/></rule>
|
||||
<rule pattern="assert\b"><token type="Keyword"/><push state="object_assert"/></rule>
|
||||
<rule pattern="\["><token type="Operator"/><push state="field_name_expr"/></rule>
|
||||
<rule pattern="(?=[^\W\d]\w*)"><token type="Text"/><push state="field_name"/></rule>
|
||||
<rule pattern="\}"><token type="Punctuation"/><pop depth="1"/></rule>
|
||||
<rule pattern="""><token type="NameVariable"/><push state="double_field_name"/></rule>
|
||||
<rule pattern="'"><token type="NameVariable"/><push state="single_field_name"/></rule>
|
||||
<rule><include state="_comments"/></rule>
|
||||
</state>
|
||||
<state name="field_name">
|
||||
<rule pattern="[^\W\d]\w*(?=\()"><token type="NameFunction"/><push state="field_separator" state="function_params"/></rule>
|
||||
<rule pattern="[^\W\d]\w*"><token type="NameVariable"/><push state="field_separator"/></rule>
|
||||
</state>
|
||||
<state name="double_field_name">
|
||||
<rule pattern="([^"\\]|\\.)*""><token type="NameVariable"/><push state="field_separator"/></rule>
|
||||
</state>
|
||||
<state name="single_field_name">
|
||||
<rule pattern="([^'\\]|\\.)*'"><token type="NameVariable"/><push state="field_separator"/></rule>
|
||||
</state>
|
||||
<state name="field_name_expr">
|
||||
<rule pattern="\]"><token type="Operator"/><push state="field_separator"/></rule>
|
||||
<rule><include state="root"/></rule>
|
||||
</state>
|
||||
<state name="function_param_default">
|
||||
<rule pattern="(?=[,\)])"><token type="TextWhitespace"/><pop depth="1"/></rule>
|
||||
<rule><include state="root"/></rule>
|
||||
</state>
|
||||
<state name="field_separator">
|
||||
<rule pattern="\s+"><token type="TextWhitespace"/></rule>
|
||||
<rule pattern="\+?::?:?"><token type="Punctuation"/><push state="#pop" state="#pop" state="field_value"/></rule>
|
||||
<rule><include state="_comments"/></rule>
|
||||
</state>
|
||||
<state name="field_value">
|
||||
<rule pattern=","><token type="Punctuation"/><pop depth="1"/></rule>
|
||||
<rule pattern="\}"><token type="Punctuation"/><pop depth="2"/></rule>
|
||||
<rule><include state="root"/></rule>
|
||||
</state>
|
||||
<state name="object_assert">
|
||||
<rule pattern=":"><token type="Punctuation"/></rule>
|
||||
<rule pattern=","><token type="Punctuation"/><pop depth="1"/></rule>
|
||||
<rule><include state="root"/></rule>
|
||||
</state>
|
||||
<state name="object_local_name">
|
||||
<rule pattern="[^\W\d]\w*"><token type="NameVariable"/><push state="#pop" state="object_local_value"/></rule>
|
||||
<rule pattern="\s+"><token type="TextWhitespace"/></rule>
|
||||
</state>
|
||||
<state name="object_local_value">
|
||||
<rule pattern="="><token type="Operator"/></rule>
|
||||
<rule pattern=","><token type="Punctuation"/><pop depth="1"/></rule>
|
||||
<rule pattern="\}"><token type="Punctuation"/><pop depth="2"/></rule>
|
||||
<rule><include state="root"/></rule>
|
||||
</state>
|
||||
</rules>
|
||||
</lexer>
|
55
lexers/markdown.xml
Normal file
55
lexers/markdown.xml
Normal file
@ -0,0 +1,55 @@
|
||||
|
||||
<lexer>
|
||||
<config>
|
||||
<name>Markdown</name>
|
||||
<alias>markdown</alias>
|
||||
<alias>md</alias>
|
||||
<filename>*.md</filename>
|
||||
<filename>*.markdown</filename>
|
||||
<mime_type>text/x-markdown</mime_type>
|
||||
</config>
|
||||
<rules>
|
||||
<state name="root">
|
||||
<rule pattern="(^#[^#].+)(\n)"><bygroups><token type="GenericHeading"/><token type="Text"/></bygroups></rule>
|
||||
<rule pattern="(^#{2,6}[^#].+)(\n)"><bygroups><token type="GenericSubheading"/><token type="Text"/></bygroups></rule>
|
||||
<rule pattern="^(.+)(\n)(=+)(\n)"><bygroups><token type="GenericHeading"/><token type="Text"/><token type="GenericHeading"/><token type="Text"/></bygroups></rule>
|
||||
<rule pattern="^(.+)(\n)(-+)(\n)"><bygroups><token type="GenericSubheading"/><token type="Text"/><token type="GenericSubheading"/><token type="Text"/></bygroups></rule>
|
||||
<rule pattern="^(\s*)([*-] )(\[[ xX]\])( .+\n)"><bygroups><token type="TextWhitespace"/><token type="Keyword"/><token type="Keyword"/><usingself state="inline"/></bygroups></rule>
|
||||
<rule pattern="^(\s*)([*-])(\s)(.+\n)"><bygroups><token type="TextWhitespace"/><token type="Keyword"/><token type="TextWhitespace"/><usingself state="inline"/></bygroups></rule>
|
||||
<rule pattern="^(\s*)([0-9]+\.)( .+\n)"><bygroups><token type="TextWhitespace"/><token type="Keyword"/><usingself state="inline"/></bygroups></rule>
|
||||
<rule pattern="^(\s*>\s)(.+\n)"><bygroups><token type="Keyword"/><token type="GenericEmph"/></bygroups></rule>
|
||||
<rule pattern="^(```\n)([\w\W]*?)(^```$)">
|
||||
<bygroups>
|
||||
<token type="LiteralStringBacktick"/>
|
||||
<token type="Text"/>
|
||||
<token type="LiteralStringBacktick"/>
|
||||
</bygroups>
|
||||
</rule>
|
||||
<rule pattern="^(```)(\w+)(\n)([\w\W]*?)(^```$)">
|
||||
<bygroups>
|
||||
<token type="LiteralStringBacktick"/>
|
||||
<token type="NameLabel"/>
|
||||
<token type="TextWhitespace"/>
|
||||
<UsingByGroup lexer="2" content="4"/>
|
||||
<token type="LiteralStringBacktick"/>
|
||||
</bygroups>
|
||||
</rule>
|
||||
<rule><include state="inline"/></rule>
|
||||
</state>
|
||||
<state name="inline">
|
||||
<rule pattern="\\."><token type="Text"/></rule>
|
||||
<rule pattern="([^`]?)(`[^`\n]+`)"><bygroups><token type="Text"/><token type="LiteralStringBacktick"/></bygroups></rule>
|
||||
<rule pattern="([^\*]?)(\*\*[^* \n][^*\n]*\*\*)"><bygroups><token type="Text"/><token type="GenericStrong"/></bygroups></rule>
|
||||
<rule pattern="([^_]?)(__[^_ \n][^_\n]*__)"><bygroups><token type="Text"/><token type="GenericStrong"/></bygroups></rule>
|
||||
<rule pattern="([^\*]?)(\*[^* \n][^*\n]*\*)"><bygroups><token type="Text"/><token type="GenericEmph"/></bygroups></rule>
|
||||
<rule pattern="([^_]?)(_[^_ \n][^_\n]*_)"><bygroups><token type="Text"/><token type="GenericEmph"/></bygroups></rule>
|
||||
<rule pattern="([^~]?)(~~[^~ \n][^~\n]*~~)"><bygroups><token type="Text"/><token type="GenericDeleted"/></bygroups></rule>
|
||||
<rule pattern="[@#][\w/:]+"><token type="NameEntity"/></rule>
|
||||
<rule pattern="(!?\[)([^]]+)(\])(\()([^)]+)(\))"><bygroups><token type="Text"/><token type="NameTag"/><token type="Text"/><token type="Text"/><token type="NameAttribute"/><token type="Text"/></bygroups></rule>
|
||||
<rule pattern="(\[)([^]]+)(\])(\[)([^]]*)(\])"><bygroups><token type="Text"/><token type="NameTag"/><token type="Text"/><token type="Text"/><token type="NameLabel"/><token type="Text"/></bygroups></rule>
|
||||
<rule pattern="^(\s*\[)([^]]*)(\]:\s*)(.+)"><bygroups><token type="Text"/><token type="NameLabel"/><token type="Text"/><token type="NameAttribute"/></bygroups></rule>
|
||||
<rule pattern="[^\\\s]+"><token type="Text"/></rule>
|
||||
<rule pattern="."><token type="Text"/></rule>
|
||||
</state>
|
||||
</rules>
|
||||
</lexer>
|
@ -45,7 +45,7 @@
|
||||
</emitters>
|
||||
</usingbygroup>
|
||||
</rule>
|
||||
<rule pattern="(ACCESS|ADD|ADDRESSES|AGGREGATE|ALIGNED|ALL|ALTER|ANALYSIS|AND|ANY|ARITY|ARN|ARRANGEMENT|ARRAY|AS|ASC|ASSERT|ASSUME|AT|AUCTION|AUTHORITY|AVAILABILITY|AVRO|AWS|BATCH|BEGIN|BETWEEN|BIGINT|BILLED|BODY|BOOLEAN|BOTH|BPCHAR|BROKEN|BROKER|BROKERS|BY|BYTES|CARDINALITY|CASCADE|CASE|CAST|CERTIFICATE|CHAIN|CHAINS|CHAR|CHARACTER|CHARACTERISTICS|CHECK|CLIENT|CLOSE|CLUSTER|CLUSTERS|COALESCE|COLLATE|COLUMN|COLUMNS|COMMENT|COMMIT|COMMITTED|COMPACTION|COMPATIBILITY|COMPRESSION|COMPUTE|COMPUTECTL|CONFIG|CONFLUENT|CONNECTION|CONNECTIONS|CONSTRAINT|COPY|COUNT|COUNTER|CREATE|CREATECLUSTER|CREATEDB|CREATEROLE|CREATION|CROSS|CSV|CURRENT|CURSOR|DATABASE|DATABASES|DATUMS|DAY|DAYS|DEALLOCATE|DEBEZIUM|DEBUG|DEBUGGING|DEC|DECIMAL|DECLARE|DECODING|DECORRELATED|DEFAULT|DEFAULTS|DELETE|DELIMITED|DELIMITER|DELTA|DESC|DETAILS|DISCARD|DISK|DISTINCT|DOC|DOT|DOUBLE|DROP|EAGER|ELEMENT|ELSE|ENABLE|END|ENDPOINT|ENFORCED|ENVELOPE|ERROR|ERRORS|ESCAPE|ESTIMATE|EVERY|EXCEPT|EXECUTE|EXISTS|EXPECTED|EXPLAIN|EXPOSE|EXPRESSIONS|EXTERNAL|EXTRACT|FACTOR|FALSE|FAST|FEATURES|FETCH|FIELDS|FILE|FILTER|FIRST|FIXPOINT|FLOAT|FOLLOWING|FOR|FOREIGN|FORMAT|FORWARD|FROM|FULL|FULLNAME|FUNCTION|GENERATOR|GRANT|GREATEST|GROUP|GROUPS|HAVING|HEADER|HEADERS|HISTORY|HOLD|HOST|HOUR|HOURS|HUMANIZED|ID|IDENTIFIERS|IDS|IF|IGNORE|ILIKE|IMPLEMENTATIONS|IMPORTED|IN|INCLUDE|INDEX|INDEXES|INFO|INHERIT|INLINE|INNER|INPUT|INSERT|INSIGHTS|INSPECT|INT|INTEGER|INTERNAL|INTERSECT|INTERVAL|INTO|INTROSPECTION|IS|ISNULL|ISOLATION|JOIN|JOINS|JSON|KAFKA|KEY|KEYS|LAST|LATERAL|LATEST|LEADING|LEAST|LEFT|LEGACY|LETREC|LEVEL|LIKE|LIMIT|LINEAR|LIST|LOAD|LOCAL|LOCALLY|LOG|LOGICAL|LOGIN|LOWERING|MANAGED|MANUAL|MAP|MARKETING|MATERIALIZE|MATERIALIZED|MAX|MECHANISMS|MEMBERSHIP|MESSAGE|METADATA|MINUTE|MINUTES|MODE|MONTH|MONTHS|MUTUALLY|MYSQL|NAME|NAMES|NATURAL|NEGATIVE|NEW|NEXT|NO|NOCREATECLUSTER|NOCREATEDB|NOCREATEROLE|NODE|NOINHERIT|NOLOGIN|NON|NONE|NOSUPERUSER|NOT|NOTICE|NOTICES|NULL|NULLIF|NULLS|OBJECTS|OF|OFFSET|ON|ONLY|OPERATOR|OPTIMIZED|OPTIMIZER|OPTIONS|OR|ORDER|ORDINALITY|OUTER|OVER|OWNED|OWNER|PARTITION|PARTITIONS|PASSWORD|PATH|PHYSICAL|PLAN|PLANS|PORT|POSITION|POSTGRES|PRECEDING|PRECISION|PREFIX|PREPARE|PRIMARY|PRIVATELINK|PRIVILEGES|PROGRESS|PROTOBUF|PROTOCOL|PUBLICATION|PUSHDOWN|QUERY|QUOTE|RAISE|RANGE|RATE|RAW|READ|REAL|REASSIGN|RECURSION|RECURSIVE|REDACTED|REFERENCE|REFERENCES|REFRESH|REGEX|REGION|REGISTRY|REHYDRATION|RENAME|REOPTIMIZE|REPEATABLE|REPLACE|REPLAN|REPLICA|REPLICAS|REPLICATION|RESET|RESPECT|RESTRICT|RETAIN|RETURN|RETURNING|REVOKE|RIGHT|ROLE|ROLES|ROLLBACK|ROTATE|ROUNDS|ROW|ROWS|SASL|SCALE|SCHEDULE|SCHEMA|SCHEMAS|SECOND|SECONDS|SECRET|SECRETS|SECURITY|SEED|SELECT|SEQUENCES|SERIALIZABLE|SERVICE|SESSION|SET|SHARD|SHOW|SINK|SINKS|SIZE|SMALLINT|SNAPSHOT|SOME|SOURCE|SOURCES|SSH|SSL|START|STDIN|STDOUT|STORAGE|STORAGECTL|STRATEGY|STRICT|STRING|STRONG|SUBSCRIBE|SUBSOURCE|SUBSOURCES|SUBSTRING|SUBTREE|SUPERUSER|SWAP|SYNTAX|SYSTEM|TABLE|TABLES|TAIL|TEMP|TEMPORARY|TEXT|THEN|TICK|TIES|TIME|TIMELINE|TIMEOUT|TIMESTAMP|TIMESTAMPTZ|TIMING|TO|TOKEN|TOPIC|TPCH|TRACE|TRAILING|TRANSACTION|TRANSACTIONAL|TRIM|TRUE|TUNNEL|TYPE|TYPES|UNBOUNDED|UNCOMMITTED|UNION|UNIQUE|UNKNOWN|UP|UPDATE|UPSERT|URL|USAGE|USER|USERNAME|USERS|USING|VALIDATE|VALUE|VALUES|VARCHAR|VARIADIC|VARYING|VERSION|VIEW|VIEWS|WARNING|WEBHOOK|WHEN|WHERE|WINDOW|WIRE|WITH|WITHIN|WITHOUT|WORK|WORKERS|WRITE|YEAR|YEARS|ZONE|ZONES)\b">
|
||||
<rule pattern="(ACCESS|ADD|ADDRESSES|AGGREGATE|ALIGNED|ALL|ALTER|ANALYSIS|AND|ANY|ARITY|ARN|ARRANGEMENT|ARRAY|AS|ASC|ASSERT|ASSUME|AT|AUCTION|AUTHORITY|AVAILABILITY|AVRO|AWS|BATCH|BEGIN|BETWEEN|BIGINT|BILLED|BODY|BOOLEAN|BOTH|BPCHAR|BROKEN|BROKER|BROKERS|BY|BYTES|CARDINALITY|CASCADE|CASE|CAST|CERTIFICATE|CHAIN|CHAINS|CHAR|CHARACTER|CHARACTERISTICS|CHECK|CLASS|CLIENT|CLOCK|CLOSE|CLUSTER|CLUSTERS|COALESCE|COLLATE|COLUMN|COLUMNS|COMMENT|COMMIT|COMMITTED|COMPACTION|COMPATIBILITY|COMPRESSION|COMPUTE|COMPUTECTL|CONFIG|CONFLUENT|CONNECTION|CONNECTIONS|CONSTRAINT|CONTINUAL|COPY|COUNT|COUNTER|CREATE|CREATECLUSTER|CREATEDB|CREATEROLE|CREATION|CROSS|CSV|CURRENT|CURSOR|DATABASE|DATABASES|DATUMS|DAY|DAYS|DEALLOCATE|DEBEZIUM|DEBUG|DEBUGGING|DEC|DECIMAL|DECLARE|DECODING|DECORRELATED|DEFAULT|DEFAULTS|DELETE|DELIMITED|DELIMITER|DELTA|DESC|DETAILS|DISCARD|DISK|DISTINCT|DOC|DOT|DOUBLE|DROP|EAGER|ELEMENT|ELSE|ENABLE|END|ENDPOINT|ENFORCED|ENVELOPE|ERROR|ERRORS|ESCAPE|ESTIMATE|EVERY|EXCEPT|EXCLUDE|EXECUTE|EXISTS|EXPECTED|EXPLAIN|EXPOSE|EXPRESSIONS|EXTERNAL|EXTRACT|FACTOR|FALSE|FAST|FEATURES|FETCH|FIELDS|FILE|FILTER|FIRST|FIXPOINT|FLOAT|FOLLOWING|FOR|FOREIGN|FORMAT|FORWARD|FROM|FULL|FULLNAME|FUNCTION|FUSION|GENERATOR|GRANT|GREATEST|GROUP|GROUPS|HAVING|HEADER|HEADERS|HISTORY|HOLD|HOST|HOUR|HOURS|HUMANIZED|HYDRATION|ID|IDENTIFIERS|IDS|IF|IGNORE|ILIKE|IMPLEMENTATIONS|IMPORTED|IN|INCLUDE|INDEX|INDEXES|INFO|INHERIT|INLINE|INNER|INPUT|INSERT|INSIGHTS|INSPECT|INT|INTEGER|INTERNAL|INTERSECT|INTERVAL|INTO|INTROSPECTION|IS|ISNULL|ISOLATION|JOIN|JOINS|JSON|KAFKA|KEY|KEYS|LAST|LATERAL|LATEST|LEADING|LEAST|LEFT|LEGACY|LETREC|LEVEL|LIKE|LIMIT|LINEAR|LIST|LOAD|LOCAL|LOCALLY|LOG|LOGICAL|LOGIN|LOWERING|MANAGED|MANUAL|MAP|MARKETING|MATERIALIZE|MATERIALIZED|MAX|MECHANISMS|MEMBERSHIP|MESSAGE|METADATA|MINUTE|MINUTES|MODE|MONTH|MONTHS|MUTUALLY|MYSQL|NAME|NAMES|NATURAL|NEGATIVE|NEW|NEXT|NO|NOCREATECLUSTER|NOCREATEDB|NOCREATEROLE|NODE|NOINHERIT|NOLOGIN|NON|NONE|NOSUPERUSER|NOT|NOTICE|NOTICES|NULL|NULLIF|NULLS|OBJECTS|OF|OFFSET|ON|ONLY|OPERATOR|OPTIMIZED|OPTIMIZER|OPTIONS|OR|ORDER|ORDINALITY|OUTER|OVER|OWNED|OWNER|PARTITION|PARTITIONS|PASSWORD|PATH|PHYSICAL|PLAN|PLANS|PORT|POSITION|POSTGRES|PRECEDING|PRECISION|PREFIX|PREPARE|PRIMARY|PRIVATELINK|PRIVILEGES|PROGRESS|PROTOBUF|PROTOCOL|PUBLIC|PUBLICATION|PUSHDOWN|QUERY|QUOTE|RAISE|RANGE|RATE|RAW|READ|READY|REAL|REASSIGN|RECURSION|RECURSIVE|REDACTED|REDUCE|REFERENCE|REFERENCES|REFRESH|REGEX|REGION|REGISTRY|RENAME|REOPTIMIZE|REPEATABLE|REPLACE|REPLAN|REPLICA|REPLICAS|REPLICATION|RESET|RESPECT|RESTRICT|RETAIN|RETURN|RETURNING|REVOKE|RIGHT|ROLE|ROLES|ROLLBACK|ROTATE|ROUNDS|ROW|ROWS|SASL|SCALE|SCHEDULE|SCHEMA|SCHEMAS|SECOND|SECONDS|SECRET|SECRETS|SECURITY|SEED|SELECT|SEQUENCES|SERIALIZABLE|SERVICE|SESSION|SET|SHARD|SHOW|SINK|SINKS|SIZE|SMALLINT|SNAPSHOT|SOME|SOURCE|SOURCES|SSH|SSL|START|STDIN|STDOUT|STORAGE|STORAGECTL|STRATEGY|STRICT|STRING|STRONG|SUBSCRIBE|SUBSOURCE|SUBSOURCES|SUBSTRING|SUBTREE|SUPERUSER|SWAP|SYNTAX|SYSTEM|TABLE|TABLES|TAIL|TASK|TEMP|TEMPORARY|TEXT|THEN|TICK|TIES|TIME|TIMELINE|TIMEOUT|TIMESTAMP|TIMESTAMPTZ|TIMING|TO|TOKEN|TOPIC|TPCH|TRACE|TRAILING|TRANSACTION|TRANSACTIONAL|TRIM|TRUE|TUNNEL|TYPE|TYPES|UNBOUNDED|UNCOMMITTED|UNION|UNIQUE|UNKNOWN|UNNEST|UNTIL|UP|UPDATE|UPSERT|URL|USAGE|USER|USERNAME|USERS|USING|VALIDATE|VALUE|VALUES|VARCHAR|VARIADIC|VARYING|VERSION|VIEW|VIEWS|WAIT|WARNING|WEBHOOK|WHEN|WHERE|WINDOW|WIRE|WITH|WITHIN|WITHOUT|WORK|WORKERS|WORKLOAD|WRITE|YEAR|YEARS|YUGABYTE|ZONE|ZONES)\b">
|
||||
<token type="Keyword" />
|
||||
</rule>
|
||||
<rule pattern="[+*/<>=~!@#%^&|`?-]+">
|
||||
|
@ -1,182 +1,137 @@
|
||||
|
||||
<lexer>
|
||||
<config>
|
||||
<name>mcfunction</name>
|
||||
<name>MCFunction</name>
|
||||
<alias>mcfunction</alias>
|
||||
<alias>mcf</alias>
|
||||
<filename>*.mcfunction</filename>
|
||||
<dot_all>true</dot_all>
|
||||
<not_multiline>true</not_multiline>
|
||||
<mime_type>text/mcfunction</mime_type>
|
||||
</config>
|
||||
<rules>
|
||||
<state name="nbtobjectvalue">
|
||||
<rule pattern="("(\\\\|\\"|[^"])*"|[a-zA-Z0-9_]+)">
|
||||
<token type="NameTag"/>
|
||||
<push state="nbtobjectattribute"/>
|
||||
</rule>
|
||||
<rule pattern="\}">
|
||||
<token type="Punctuation"/>
|
||||
<pop depth="1"/>
|
||||
</rule>
|
||||
</state>
|
||||
<state name="nbtarrayvalue">
|
||||
<rule>
|
||||
<include state="nbtvalue"/>
|
||||
</rule>
|
||||
<rule pattern=",">
|
||||
<token type="Punctuation"/>
|
||||
</rule>
|
||||
<rule pattern="\]">
|
||||
<token type="Punctuation"/>
|
||||
<pop depth="1"/>
|
||||
</rule>
|
||||
</state>
|
||||
<state name="nbtvalue">
|
||||
<rule>
|
||||
<include state="simplevalue"/>
|
||||
</rule>
|
||||
<rule pattern="\{">
|
||||
<token type="Punctuation"/>
|
||||
<push state="nbtobjectvalue"/>
|
||||
</rule>
|
||||
<rule pattern="\[">
|
||||
<token type="Punctuation"/>
|
||||
<push state="nbtarrayvalue"/>
|
||||
</rule>
|
||||
</state>
|
||||
<state name="argumentvalue">
|
||||
<rule>
|
||||
<include state="simplevalue"/>
|
||||
</rule>
|
||||
<rule pattern=",">
|
||||
<token type="Punctuation"/>
|
||||
<pop depth="1"/>
|
||||
</rule>
|
||||
<rule pattern="[}\]]">
|
||||
<token type="Punctuation"/>
|
||||
<pop depth="2"/>
|
||||
</rule>
|
||||
</state>
|
||||
<state name="argumentlist">
|
||||
<rule pattern="(nbt)(={)">
|
||||
<bygroups>
|
||||
<token type="NameAttribute"/>
|
||||
<token type="Punctuation"/>
|
||||
</bygroups>
|
||||
<push state="nbtobjectvalue"/>
|
||||
</rule>
|
||||
<rule pattern="([A-Za-z0-9/_!]+)(={)">
|
||||
<bygroups>
|
||||
<token type="NameAttribute"/>
|
||||
<token type="Punctuation"/>
|
||||
</bygroups>
|
||||
<push state="argumentlist"/>
|
||||
</rule>
|
||||
<rule pattern="([A-Za-z0-9/_!]+)(=)">
|
||||
<bygroups>
|
||||
<token type="NameAttribute"/>
|
||||
<token type="Punctuation"/>
|
||||
</bygroups>
|
||||
<push state="argumentvalue"/>
|
||||
</rule>
|
||||
<rule>
|
||||
<include state="simplevalue"/>
|
||||
</rule>
|
||||
<rule pattern=",">
|
||||
<token type="Punctuation"/>
|
||||
</rule>
|
||||
<rule pattern="[}\]]">
|
||||
<token type="Punctuation"/>
|
||||
<pop depth="1"/>
|
||||
</rule>
|
||||
</state>
|
||||
<state name="root">
|
||||
<rule pattern="#.*?\n">
|
||||
<token type="CommentSingle"/>
|
||||
</rule>
|
||||
<rule pattern="/?(geteduclientinfo|clearspawnpoint|defaultgamemode|transferserver|toggledownfall|immutableworld|detectredstone|setidletimeout|playanimation|classroommode|spreadplayers|testforblocks|setmaxplayers|setworldspawn|testforblock|worldbuilder|createagent|worldborder|camerashake|advancement|raytracefog|locatebiome|tickingarea|replaceitem|attributes|spawnpoint|difficulty|experience|scoreboard|whitelist|structure|playsound|stopsound|forceload|spectate|gamerule|function|schedule|wsserver|teleport|position|save-off|particle|setblock|datapack|mobevent|transfer|gamemode|save-all|bossbar|enchant|trigger|collect|execute|weather|teammsg|tpagent|banlist|dropall|publish|tellraw|testfor|save-on|destroy|ability|locate|summon|remove|effect|reload|ban-ip|recipe|pardon|detect|music|clear|clone|event|mixer|debug|title|ride|stop|list|turn|data|team|kick|loot|tell|help|give|flog|fill|move|time|seed|kill|save|item|deop|code|tag|ban|msg|say|tp|me|op|xp|w|place)\b">
|
||||
<token type="KeywordReserved"/>
|
||||
</rule>
|
||||
<rule pattern="(@p|@r|@a|@e|@s|@c|@v)">
|
||||
<token type="KeywordConstant"/>
|
||||
</rule>
|
||||
<rule pattern="\[">
|
||||
<token type="Punctuation"/>
|
||||
<push state="argumentlist"/>
|
||||
</rule>
|
||||
<rule pattern="{">
|
||||
<token type="Punctuation"/>
|
||||
<push state="nbtobjectvalue"/>
|
||||
</rule>
|
||||
<rule pattern="~">
|
||||
<token type="NameBuiltin"/>
|
||||
</rule>
|
||||
<rule pattern="([a-zA-Z_]+:)?[a-zA-Z_]+\b">
|
||||
<token type="Text"/>
|
||||
</rule>
|
||||
<rule pattern="([a-z]+)(\.)([0-9]+)\b">
|
||||
<bygroups>
|
||||
<token type="Text"/>
|
||||
<token type="Punctuation"/>
|
||||
<token type="LiteralNumber"/>
|
||||
</bygroups>
|
||||
</rule>
|
||||
<rule pattern="([<>=]|<=|>=)">
|
||||
<token type="Punctuation"/>
|
||||
</rule>
|
||||
<rule>
|
||||
<include state="simplevalue"/>
|
||||
</rule>
|
||||
<rule pattern="\s+">
|
||||
<token type="TextWhitespace"/>
|
||||
</rule>
|
||||
<rule><include state="names"/></rule>
|
||||
<rule><include state="comments"/></rule>
|
||||
<rule><include state="literals"/></rule>
|
||||
<rule><include state="whitespace"/></rule>
|
||||
<rule><include state="property"/></rule>
|
||||
<rule><include state="operators"/></rule>
|
||||
<rule><include state="selectors"/></rule>
|
||||
</state>
|
||||
<state name="simplevalue">
|
||||
<rule pattern="(true|false)">
|
||||
<token type="KeywordConstant"/>
|
||||
</rule>
|
||||
<rule pattern="[01]b">
|
||||
<token type="LiteralNumber"/>
|
||||
</rule>
|
||||
<rule pattern="-?(0|[1-9]\d*)(\.\d+[eE](\+|-)?\d+|[eE](\+|-)?\d+|\.\d+)">
|
||||
<token type="LiteralNumberFloat"/>
|
||||
</rule>
|
||||
<rule pattern="(-?\d+)(\.\.)(-?\d+)">
|
||||
<bygroups>
|
||||
<token type="LiteralNumberInteger"/>
|
||||
<token type="Punctuation"/>
|
||||
<token type="LiteralNumberInteger"/>
|
||||
</bygroups>
|
||||
</rule>
|
||||
<rule pattern="-?(0|[1-9]\d*)">
|
||||
<token type="LiteralNumberInteger"/>
|
||||
</rule>
|
||||
<rule pattern=""(\\\\|\\"|[^"])*"">
|
||||
<token type="LiteralStringDouble"/>
|
||||
</rule>
|
||||
<rule pattern="'[^']+'">
|
||||
<token type="LiteralStringSingle"/>
|
||||
</rule>
|
||||
<rule pattern="([!#]?)(\w+)">
|
||||
<bygroups>
|
||||
<token type="Punctuation"/>
|
||||
<token type="Text"/>
|
||||
</bygroups>
|
||||
</rule>
|
||||
<state name="names">
|
||||
<rule pattern="^(\s*)([a-z_]+)"><bygroups><token type="TextWhitespace"/><token type="NameBuiltin"/></bygroups></rule>
|
||||
<rule pattern="(?<=run)\s+[a-z_]+"><token type="NameBuiltin"/></rule>
|
||||
<rule pattern="\b[0-9a-fA-F]+(?:-[0-9a-fA-F]+){4}\b"><token type="NameVariable"/></rule>
|
||||
<rule><include state="resource-name"/></rule>
|
||||
<rule pattern="[A-Za-z_][\w.#%$]+"><token type="KeywordConstant"/></rule>
|
||||
<rule pattern="[#%$][\w.#%$]+"><token type="NameVariableMagic"/></rule>
|
||||
</state>
|
||||
<state name="nbtobjectattribute">
|
||||
<rule>
|
||||
<include state="nbtvalue"/>
|
||||
</rule>
|
||||
<rule pattern=":">
|
||||
<token type="Punctuation"/>
|
||||
</rule>
|
||||
<rule pattern=",">
|
||||
<token type="Punctuation"/>
|
||||
<pop depth="1"/>
|
||||
</rule>
|
||||
<rule pattern="\}">
|
||||
<token type="Punctuation"/>
|
||||
<pop depth="2"/>
|
||||
</rule>
|
||||
<state name="resource-name">
|
||||
<rule pattern="#?[a-z_][a-z_.-]*:[a-z0-9_./-]+"><token type="NameFunction"/></rule>
|
||||
<rule pattern="#?[a-z0-9_\.\-]+\/[a-z0-9_\.\-\/]+"><token type="NameFunction"/></rule>
|
||||
</state>
|
||||
<state name="whitespace">
|
||||
<rule pattern="\s+"><token type="TextWhitespace"/></rule>
|
||||
</state>
|
||||
<state name="comments">
|
||||
<rule pattern="^\s*(#[>!])"><token type="CommentMultiline"/><push state="comments.block" state="comments.block.emphasized"/></rule>
|
||||
<rule pattern="#.*$"><token type="CommentSingle"/></rule>
|
||||
</state>
|
||||
<state name="comments.block">
|
||||
<rule pattern="^\s*#[>!]"><token type="CommentMultiline"/><push state="comments.block.emphasized"/></rule>
|
||||
<rule pattern="^\s*#"><token type="CommentMultiline"/><push state="comments.block.normal"/></rule>
|
||||
<rule><pop depth="1"/></rule>
|
||||
</state>
|
||||
<state name="comments.block.normal">
|
||||
<rule><include state="comments.block.special"/></rule>
|
||||
<rule pattern="\S+"><token type="CommentMultiline"/></rule>
|
||||
<rule pattern="\n"><token type="Text"/><pop depth="1"/></rule>
|
||||
<rule><include state="whitespace"/></rule>
|
||||
</state>
|
||||
<state name="comments.block.emphasized">
|
||||
<rule><include state="comments.block.special"/></rule>
|
||||
<rule pattern="\S+"><token type="LiteralStringDoc"/></rule>
|
||||
<rule pattern="\n"><token type="Text"/><pop depth="1"/></rule>
|
||||
<rule><include state="whitespace"/></rule>
|
||||
</state>
|
||||
<state name="comments.block.special">
|
||||
<rule pattern="@\S+"><token type="NameDecorator"/></rule>
|
||||
<rule><include state="resource-name"/></rule>
|
||||
<rule pattern="[#%$][\w.#%$]+"><token type="NameVariableMagic"/></rule>
|
||||
</state>
|
||||
<state name="operators">
|
||||
<rule pattern="[\-~%^?!+*<>\\/|&=.]"><token type="Operator"/></rule>
|
||||
</state>
|
||||
<state name="literals">
|
||||
<rule pattern="\.\."><token type="Literal"/></rule>
|
||||
<rule pattern="(true|false)"><token type="KeywordPseudo"/></rule>
|
||||
<rule pattern="[A-Za-z_]+"><token type="NameVariableClass"/></rule>
|
||||
<rule pattern="[0-7]b"><token type="LiteralNumberByte"/></rule>
|
||||
<rule pattern="[+-]?\d*\.?\d+([eE]?[+-]?\d+)?[df]?\b"><token type="LiteralNumberFloat"/></rule>
|
||||
<rule pattern="[+-]?\d+\b"><token type="LiteralNumberInteger"/></rule>
|
||||
<rule pattern="""><token type="LiteralStringDouble"/><push state="literals.string-double"/></rule>
|
||||
<rule pattern="'"><token type="LiteralStringSingle"/><push state="literals.string-single"/></rule>
|
||||
</state>
|
||||
<state name="literals.string-double">
|
||||
<rule pattern="\\."><token type="LiteralStringEscape"/></rule>
|
||||
<rule pattern="[^\\"\n]+"><token type="LiteralStringDouble"/></rule>
|
||||
<rule pattern="""><token type="LiteralStringDouble"/><pop depth="1"/></rule>
|
||||
</state>
|
||||
<state name="literals.string-single">
|
||||
<rule pattern="\\."><token type="LiteralStringEscape"/></rule>
|
||||
<rule pattern="[^\\'\n]+"><token type="LiteralStringSingle"/></rule>
|
||||
<rule pattern="'"><token type="LiteralStringSingle"/><pop depth="1"/></rule>
|
||||
</state>
|
||||
<state name="selectors">
|
||||
<rule pattern="@[a-z]"><token type="NameVariable"/></rule>
|
||||
</state>
|
||||
<state name="property">
|
||||
<rule pattern="\{"><token type="Punctuation"/><push state="property.curly" state="property.key"/></rule>
|
||||
<rule pattern="\["><token type="Punctuation"/><push state="property.square" state="property.key"/></rule>
|
||||
</state>
|
||||
<state name="property.curly">
|
||||
<rule><include state="whitespace"/></rule>
|
||||
<rule><include state="property"/></rule>
|
||||
<rule pattern="\}"><token type="Punctuation"/><pop depth="1"/></rule>
|
||||
</state>
|
||||
<state name="property.square">
|
||||
<rule><include state="whitespace"/></rule>
|
||||
<rule><include state="property"/></rule>
|
||||
<rule pattern="\]"><token type="Punctuation"/><pop depth="1"/></rule>
|
||||
<rule pattern=","><token type="Punctuation"/></rule>
|
||||
</state>
|
||||
<state name="property.key">
|
||||
<rule><include state="whitespace"/></rule>
|
||||
<rule pattern="#?[a-z_][a-z_\.\-]*\:[a-z0-9_\.\-/]+(?=\s*\=)"><token type="NameAttribute"/><push state="property.delimiter"/></rule>
|
||||
<rule pattern="#?[a-z_][a-z0-9_\.\-/]+"><token type="NameAttribute"/><push state="property.delimiter"/></rule>
|
||||
<rule pattern="[A-Za-z_\-\+]+"><token type="NameAttribute"/><push state="property.delimiter"/></rule>
|
||||
<rule pattern="""><token type="NameAttribute"/><push state="property.delimiter"/></rule>
|
||||
<rule pattern="'"><token type="NameAttribute"/><push state="property.delimiter"/></rule>
|
||||
<rule pattern="-?\d+"><token type="LiteralNumberInteger"/><push state="property.delimiter"/></rule>
|
||||
<rule><pop depth="1"/></rule>
|
||||
</state>
|
||||
<state name="property.key.string-double">
|
||||
<rule pattern="\\."><token type="LiteralStringEscape"/></rule>
|
||||
<rule pattern="[^\\"\n]+"><token type="NameAttribute"/></rule>
|
||||
<rule pattern="""><token type="NameAttribute"/><pop depth="1"/></rule>
|
||||
</state>
|
||||
<state name="property.key.string-single">
|
||||
<rule pattern="\\."><token type="LiteralStringEscape"/></rule>
|
||||
<rule pattern="[^\\'\n]+"><token type="NameAttribute"/></rule>
|
||||
<rule pattern="'"><token type="NameAttribute"/><pop depth="1"/></rule>
|
||||
</state>
|
||||
<state name="property.delimiter">
|
||||
<rule><include state="whitespace"/></rule>
|
||||
<rule pattern="[:=]!?"><token type="Punctuation"/><push state="property.value"/></rule>
|
||||
<rule pattern=","><token type="Punctuation"/></rule>
|
||||
<rule><pop depth="1"/></rule>
|
||||
</state>
|
||||
<state name="property.value">
|
||||
<rule><include state="whitespace"/></rule>
|
||||
<rule pattern="#?[a-z_][a-z_\.\-]*\:[a-z0-9_\.\-/]+"><token type="NameTag"/></rule>
|
||||
<rule pattern="#?[a-z_][a-z0-9_\.\-/]+"><token type="NameTag"/></rule>
|
||||
<rule><include state="literals"/></rule>
|
||||
<rule><include state="property"/></rule>
|
||||
<rule><pop depth="1"/></rule>
|
||||
</state>
|
||||
</rules>
|
||||
</lexer>
|
||||
|
33
lexers/moinwiki.xml
Normal file
33
lexers/moinwiki.xml
Normal file
@ -0,0 +1,33 @@
|
||||
|
||||
<lexer>
|
||||
<config>
|
||||
<name>MoinMoin/Trac Wiki markup</name>
|
||||
<alias>trac-wiki</alias>
|
||||
<alias>moin</alias>
|
||||
<mime_type>text/x-trac-wiki</mime_type>
|
||||
<case_insensitive>true</case_insensitive>
|
||||
</config>
|
||||
<rules>
|
||||
<state name="root">
|
||||
<rule pattern="^#.*$"><token type="Comment"/></rule>
|
||||
<rule pattern="(!)(\S+)"><bygroups><token type="Keyword"/><token type="Text"/></bygroups></rule>
|
||||
<rule pattern="^(=+)([^=]+)(=+)(\s*#.+)?$"><bygroups><token type="GenericHeading"/><usingself state="root"/><token type="GenericHeading"/><token type="LiteralString"/></bygroups></rule>
|
||||
<rule pattern="(\{\{\{)(\n#!.+)?"><bygroups><token type="NameBuiltin"/><token type="NameNamespace"/></bygroups><push state="codeblock"/></rule>
|
||||
<rule pattern="(\'\'\'?|\|\||`|__|~~|\^|,,|::)"><token type="Comment"/></rule>
|
||||
<rule pattern="^( +)([.*-])( )"><bygroups><token type="Text"/><token type="NameBuiltin"/><token type="Text"/></bygroups></rule>
|
||||
<rule pattern="^( +)([a-z]{1,5}\.)( )"><bygroups><token type="Text"/><token type="NameBuiltin"/><token type="Text"/></bygroups></rule>
|
||||
<rule pattern="\[\[\w+.*?\]\]"><token type="Keyword"/></rule>
|
||||
<rule pattern="(\[[^\s\]]+)(\s+[^\]]+?)?(\])"><bygroups><token type="Keyword"/><token type="LiteralString"/><token type="Keyword"/></bygroups></rule>
|
||||
<rule pattern="^----+$"><token type="Keyword"/></rule>
|
||||
<rule pattern="[^\n\'\[{!_~^,|]+"><token type="Text"/></rule>
|
||||
<rule pattern="\n"><token type="Text"/></rule>
|
||||
<rule pattern="."><token type="Text"/></rule>
|
||||
</state>
|
||||
<state name="codeblock">
|
||||
<rule pattern="\}\}\}"><token type="NameBuiltin"/><pop depth="1"/></rule>
|
||||
<rule pattern="\{\{\{"><token type="Text"/><push/></rule>
|
||||
<rule pattern="[^{}]+"><token type="CommentPreproc"/></rule>
|
||||
<rule pattern="."><token type="CommentPreproc"/></rule>
|
||||
</state>
|
||||
</rules>
|
||||
</lexer>
|
@ -106,7 +106,7 @@
|
||||
</bygroups>
|
||||
<push state="interpol"/>
|
||||
</rule>
|
||||
<rule pattern="(&&|>=|<=|\+\+|->|!=|\|\||//|==|@|!|\+|\?|<|\.|>|\*)">
|
||||
<rule pattern="(&&|>=|<=|\+\+|->|!=|=|\|\||//|==|@|!|\+|\?|<|\.|>|\*)">
|
||||
<token type="Operator"/>
|
||||
</rule>
|
||||
<rule pattern="[;:]">
|
||||
|
59
lexers/nsis.xml
Normal file
59
lexers/nsis.xml
Normal file
@ -0,0 +1,59 @@
|
||||
<lexer>
|
||||
<config>
|
||||
<name>NSIS</name>
|
||||
<alias>nsis</alias>
|
||||
<alias>nsi</alias>
|
||||
<alias>nsh</alias>
|
||||
<filename>*.nsi</filename>
|
||||
<filename>*.nsh</filename>
|
||||
<mime_type>text/x-nsis</mime_type>
|
||||
<case_insensitive>true</case_insensitive>
|
||||
<not_multiline>true</not_multiline>
|
||||
</config>
|
||||
<rules>
|
||||
<state name="root">
|
||||
<rule pattern="([;#].*)(\n)"><bygroups><token type="Comment"/><token type="TextWhitespace"/></bygroups></rule>
|
||||
<rule pattern="'.*?'"><token type="LiteralStringSingle"/></rule>
|
||||
<rule pattern="""><token type="LiteralStringDouble"/><push state="str_double"/></rule>
|
||||
<rule pattern="`"><token type="LiteralStringBacktick"/><push state="str_backtick"/></rule>
|
||||
<rule><include state="macro"/></rule>
|
||||
<rule><include state="interpol"/></rule>
|
||||
<rule><include state="basic"/></rule>
|
||||
<rule pattern="\$\{[a-z_|][\w|]*\}"><token type="KeywordPseudo"/></rule>
|
||||
<rule pattern="/[a-z_]\w*"><token type="NameAttribute"/></rule>
|
||||
<rule pattern="\s+"><token type="TextWhitespace"/></rule>
|
||||
<rule pattern="[\w.]+"><token type="Text"/></rule>
|
||||
</state>
|
||||
<state name="basic">
|
||||
<rule pattern="(\n)(Function)(\s+)([._a-z][.\w]*)\b"><bygroups><token type="TextWhitespace"/><token type="Keyword"/><token type="TextWhitespace"/><token type="NameFunction"/></bygroups></rule>
|
||||
<rule pattern="\b([_a-z]\w*)(::)([a-z][a-z0-9]*)\b"><bygroups><token type="KeywordNamespace"/><token type="Punctuation"/><token type="NameFunction"/></bygroups></rule>
|
||||
<rule pattern="\b([_a-z]\w*)(:)"><bygroups><token type="NameLabel"/><token type="Punctuation"/></bygroups></rule>
|
||||
<rule pattern="(\b[ULS]|\B)([!<>=]?=|\<\>?|\>)\B"><token type="Operator"/></rule>
|
||||
<rule pattern="[|+-]"><token type="Operator"/></rule>
|
||||
<rule pattern="\\"><token type="Punctuation"/></rule>
|
||||
<rule pattern="\b(Abort|Add(?:BrandingImage|Size)|Allow(?:RootDirInstall|SkipFiles)|AutoCloseWindow|BG(?:Font|Gradient)|BrandingText|BringToFront|Call(?:InstDLL)?|(?:Sub)?Caption|ChangeUI|CheckBitmap|ClearErrors|CompletedText|ComponentText|CopyFiles|CRCCheck|Create(?:Directory|Font|Shortcut)|Delete(?:INI(?:Sec|Str)|Reg(?:Key|Value))?|DetailPrint|DetailsButtonText|Dir(?:Show|Text|Var|Verify)|(?:Disabled|Enabled)Bitmap|EnableWindow|EnumReg(?:Key|Value)|Exch|Exec(?:Shell|Wait)?|ExpandEnvStrings|File(?:BufSize|Close|ErrorText|Open|Read(?:Byte)?|Seek|Write(?:Byte)?)?|Find(?:Close|First|Next|Window)|FlushINI|Function(?:End)?|Get(?:CurInstType|CurrentAddress|DlgItem|DLLVersion(?:Local)?|ErrorLevel|FileTime(?:Local)?|FullPathName|FunctionAddress|InstDirError|LabelAddress|TempFileName)|Goto|HideWindow|Icon|If(?:Abort|Errors|FileExists|RebootFlag|Silent)|InitPluginsDir|Install(?:ButtonText|Colors|Dir(?:RegKey)?)|Inst(?:ProgressFlags|Type(?:[GS]etText)?)|Int(?:CmpU?|Fmt|Op)|IsWindow|LangString(?:UP)?|License(?:BkColor|Data|ForceSelection|LangString|Text)|LoadLanguageFile|LockWindow|Log(?:Set|Text)|MessageBox|MiscButtonText|Name|Nop|OutFile|(?:Uninst)?Page(?:Ex(?:End)?)?|PluginDir|Pop|Push|Quit|Read(?:(?:Env|INI|Reg)Str|RegDWORD)|Reboot|(?:Un)?RegDLL|Rename|RequestExecutionLevel|ReserveFile|Return|RMDir|SearchPath|Section(?:Divider|End|(?:(?:Get|Set)(?:Flags|InstTypes|Size|Text))|Group(?:End)?|In)?|SendMessage|Set(?:AutoClose|BrandingImage|Compress(?:ionLevel|or(?:DictSize)?)?|CtlColors|CurInstType|DatablockOptimize|DateSave|Details(?:Print|View)|Error(?:s|Level)|FileAttributes|Font|OutPath|Overwrite|PluginUnload|RebootFlag|ShellVarContext|Silent|StaticBkColor)|Show(?:(?:I|Uni)nstDetails|Window)|Silent(?:Un)?Install|Sleep|SpaceTexts|Str(?:CmpS?|Cpy|Len)|SubSection(?:End)?|Uninstall(?:ButtonText|(?:Sub)?Caption|EXEName|Icon|Text)|UninstPage|Var|VI(?:AddVersionKey|ProductVersion)|WindowIcon|Write(?:INIStr|Reg(:?Bin|DWORD|(?:Expand)?Str)|Uninstaller)|XPStyle)\b"><token type="Keyword"/></rule>
|
||||
<rule pattern="\b(CUR|END|(?:FILE_ATTRIBUTE_)?(?:ARCHIVE|HIDDEN|NORMAL|OFFLINE|READONLY|SYSTEM|TEMPORARY)|HK(CC|CR|CU|DD|LM|PD|U)|HKEY_(?:CLASSES_ROOT|CURRENT_(?:CONFIG|USER)|DYN_DATA|LOCAL_MACHINE|PERFORMANCE_DATA|USERS)|ID(?:ABORT|CANCEL|IGNORE|NO|OK|RETRY|YES)|MB_(?:ABORTRETRYIGNORE|DEFBUTTON[1-4]|ICON(?:EXCLAMATION|INFORMATION|QUESTION|STOP)|OK(?:CANCEL)?|RETRYCANCEL|RIGHT|SETFOREGROUND|TOPMOST|USERICON|YESNO(?:CANCEL)?)|SET|SHCTX|SW_(?:HIDE|SHOW(?:MAXIMIZED|MINIMIZED|NORMAL))|admin|all|auto|both|bottom|bzip2|checkbox|colored|current|false|force|hide|highest|if(?:diff|newer)|lastused|leave|left|listonly|lzma|nevershow|none|normal|off|on|pop|push|radiobuttons|right|show|silent|silentlog|smooth|textonly|top|true|try|user|zlib)\b"><token type="NameConstant"/></rule>
|
||||
</state>
|
||||
<state name="macro">
|
||||
<rule pattern="\!(addincludedir(?:dir)?|addplugindir|appendfile|cd|define|delfilefile|echo(?:message)?|else|endif|error|execute|if(?:macro)?n?(?:def)?|include|insertmacro|macro(?:end)?|packhdr|search(?:parse|replace)|system|tempfilesymbol|undef|verbose|warning)\b"><token type="CommentPreproc"/></rule>
|
||||
</state>
|
||||
<state name="interpol">
|
||||
<rule pattern="\$(R?[0-9])"><token type="NameBuiltinPseudo"/></rule>
|
||||
<rule pattern="\$(ADMINTOOLS|APPDATA|CDBURN_AREA|COOKIES|COMMONFILES(?:32|64)|DESKTOP|DOCUMENTS|EXE(?:DIR|FILE|PATH)|FAVORITES|FONTS|HISTORY|HWNDPARENT|INTERNET_CACHE|LOCALAPPDATA|MUSIC|NETHOOD|PICTURES|PLUGINSDIR|PRINTHOOD|PROFILE|PROGRAMFILES(?:32|64)|QUICKLAUNCH|RECENT|RESOURCES(?:_LOCALIZED)?|SENDTO|SM(?:PROGRAMS|STARTUP)|STARTMENU|SYSDIR|TEMP(?:LATES)?|VIDEOS|WINDIR|\{NSISDIR\})"><token type="NameBuiltin"/></rule>
|
||||
<rule pattern="\$(CMDLINE|INSTDIR|OUTDIR|LANGUAGE)"><token type="NameVariableGlobal"/></rule>
|
||||
<rule pattern="\$[a-z_]\w*"><token type="NameVariable"/></rule>
|
||||
</state>
|
||||
<state name="str_double">
|
||||
<rule pattern="""><token type="LiteralStringDouble"/><pop depth="1"/></rule>
|
||||
<rule pattern="\$(\\[nrt"]|\$)"><token type="LiteralStringEscape"/></rule>
|
||||
<rule><include state="interpol"/></rule>
|
||||
<rule pattern="[^"]+"><token type="LiteralStringDouble"/></rule>
|
||||
</state>
|
||||
<state name="str_backtick">
|
||||
<rule pattern="`"><token type="LiteralStringDouble"/><pop depth="1"/></rule>
|
||||
<rule pattern="\$(\\[nrt"]|\$)"><token type="LiteralStringEscape"/></rule>
|
||||
<rule><include state="interpol"/></rule>
|
||||
<rule pattern="[^`]+"><token type="LiteralStringDouble"/></rule>
|
||||
</state>
|
||||
</rules>
|
||||
</lexer>
|
@ -41,6 +41,14 @@
|
||||
<rule pattern="\b(as|assert|begin|class|constraint|do|done|downto|else|end|exception|external|false|for|fun|function|functor|if|in|include|inherit|initializer|lazy|let|match|method|module|mutable|new|object|of|open|private|raise|rec|sig|struct|then|to|true|try|type|value|val|virtual|when|while|with)\b">
|
||||
<token type="Keyword"/>
|
||||
</rule>
|
||||
<rule pattern="({([a-z_]*)\|)([\s\S]+?)(?=\|\2})(\|\2})">
|
||||
<bygroups>
|
||||
<token type="LiteralStringAffix"/>
|
||||
<token type="Ignore"/>
|
||||
<token type="LiteralString"/>
|
||||
<token type="LiteralStringAffix"/>
|
||||
</bygroups>
|
||||
</rule>
|
||||
<rule pattern="(~|\}|\|]|\||\{<|\{|`|_|]|\[\||\[>|\[<|\[|\?\?|\?|>\}|>]|>|=|<-|<|;;|;|:>|:=|::|:|\.\.|\.|->|-\.|-|,|\+|\*|\)|\(|&&|&|#|!=)">
|
||||
<token type="Operator"/>
|
||||
</rule>
|
||||
|
@ -51,6 +51,20 @@
|
||||
<rule pattern = "\#[a-zA-Z_]+\b">
|
||||
<token type = "NameDecorator"/>
|
||||
</rule>
|
||||
<rule pattern = "^\#\+\w+\s*$">
|
||||
<token type = "NameAttribute"/>
|
||||
</rule>
|
||||
<rule pattern = "^(\#\+\w+)(\s+)(\!)?([A-Za-z0-9-_!]+)(?:(,)(\!)?([A-Za-z0-9-_!]+))*\s*$">
|
||||
<bygroups>
|
||||
<token type = "NameAttribute"/>
|
||||
<token type = "TextWhitespace"/>
|
||||
<token type = "Operator"/>
|
||||
<token type = "Name"/>
|
||||
<token type = "Punctuation"/>
|
||||
<token type = "Operator"/>
|
||||
<token type = "Name"/>
|
||||
</bygroups>
|
||||
</rule>
|
||||
<rule pattern = "\@(\([a-zA-Z_]+\b\s*.*\)|\(?[a-zA-Z_]+\)?)">
|
||||
<token type = "NameAttribute"/>
|
||||
</rule>
|
||||
|
@ -55,4 +55,3 @@
|
||||
</state>
|
||||
</rules>
|
||||
</lexer>
|
||||
|
||||
|
75
lexers/rst.xml
Normal file
75
lexers/rst.xml
Normal file
@ -0,0 +1,75 @@
|
||||
|
||||
<lexer>
|
||||
<config>
|
||||
<name>reStructuredText</name>
|
||||
<alias>restructuredtext</alias>
|
||||
<alias>rst</alias>
|
||||
<alias>rest</alias>
|
||||
<filename>*.rst</filename>
|
||||
<filename>*.rest</filename>
|
||||
<mime_type>text/x-rst</mime_type>
|
||||
<mime_type>text/prs.fallenstein.rst</mime_type>
|
||||
</config>
|
||||
<rules>
|
||||
<state name="root">
|
||||
<rule pattern="^(=+|-+|`+|:+|\.+|\'+|"+|~+|\^+|_+|\*+|\++|#+)([ \t]*\n)(.+)(\n)(\1)(\n)"><bygroups><token type="GenericHeading"/><token type="Text"/><token type="GenericHeading"/><token type="Text"/><token type="GenericHeading"/><token type="Text"/></bygroups></rule>
|
||||
<rule pattern="^(\S.*)(\n)(={3,}|-{3,}|`{3,}|:{3,}|\.{3,}|\'{3,}|"{3,}|~{3,}|\^{3,}|_{3,}|\*{3,}|\+{3,}|#{3,})(\n)"><bygroups><token type="GenericHeading"/><token type="Text"/><token type="GenericHeading"/><token type="Text"/></bygroups></rule>
|
||||
<rule pattern="^(\s*)([-*+])( .+\n(?:\1 .+\n)*)"><bygroups><token type="Text"/><token type="LiteralNumber"/><usingself state="inline"/></bygroups></rule>
|
||||
<rule pattern="^(\s*)([0-9#ivxlcmIVXLCM]+\.)( .+\n(?:\1 .+\n)*)"><bygroups><token type="Text"/><token type="LiteralNumber"/><usingself state="inline"/></bygroups></rule>
|
||||
<rule pattern="^(\s*)(\(?[0-9#ivxlcmIVXLCM]+\))( .+\n(?:\1 .+\n)*)"><bygroups><token type="Text"/><token type="LiteralNumber"/><usingself state="inline"/></bygroups></rule>
|
||||
<rule pattern="^(\s*)([A-Z]+\.)( .+\n(?:\1 .+\n)+)"><bygroups><token type="Text"/><token type="LiteralNumber"/><usingself state="inline"/></bygroups></rule>
|
||||
<rule pattern="^(\s*)(\(?[A-Za-z]+\))( .+\n(?:\1 .+\n)+)"><bygroups><token type="Text"/><token type="LiteralNumber"/><usingself state="inline"/></bygroups></rule>
|
||||
<rule pattern="^(\s*)(\|)( .+\n(?:\| .+\n)*)"><bygroups><token type="Text"/><token type="Operator"/><usingself state="inline"/></bygroups></rule>
|
||||
<rule pattern="^( *\.\.)(\s*)((?:source)?code(?:-block)?)(::)([ \t]*)([^\n]+)(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\8.*)?\n)+)">
|
||||
<bygroups>
|
||||
<token type="Punctuation"/>
|
||||
<token type="Text"/>
|
||||
<token type="OperatorWord"/>
|
||||
<token type="Punctuation"/>
|
||||
<token type="Text"/>
|
||||
<token type="Keyword"/>
|
||||
<token type="Text"/>
|
||||
<token type="Text"/>
|
||||
<UsingByGroup lexer="6" content="9,10,11"/>
|
||||
</bygroups>
|
||||
</rule>
|
||||
<rule pattern="^( *\.\.)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))">
|
||||
<bygroups>
|
||||
<token type="Punctuation"/>
|
||||
<token type="Text"/>
|
||||
<token type="OperatorWord"/>
|
||||
<token type="Punctuation"/>
|
||||
<token type="Text"/>
|
||||
<usingself state="inline"/>
|
||||
</bygroups>
|
||||
</rule>
|
||||
<rule pattern="^( *\.\.)(\s*)(_(?:[^:\\]|\\.)+:)(.*?)$"><bygroups><token type="Punctuation"/><token type="Text"/><token type="NameTag"/><usingself state="inline"/></bygroups></rule>
|
||||
<rule pattern="^( *\.\.)(\s*)(\[.+\])(.*?)$"><bygroups><token type="Punctuation"/><token type="Text"/><token type="NameTag"/><usingself state="inline"/></bygroups></rule>
|
||||
<rule pattern="^( *\.\.)(\s*)(\|.+\|)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))"><bygroups><token type="Punctuation"/><token type="Text"/><token type="NameTag"/><token type="Text"/><token type="OperatorWord"/><token type="Punctuation"/><token type="Text"/><usingself state="inline"/></bygroups></rule>
|
||||
<rule pattern="^ *\.\..*(\n( +.*\n|\n)+)?"><token type="Comment"/></rule>
|
||||
<rule pattern="^( *)(:(?:\\\\|\\:|[^:\n])+:(?=\s))([ \t]*)"><bygroups><token type="Text"/><token type="NameClass"/><token type="Text"/></bygroups></rule>
|
||||
<rule pattern="^(\S.*(?<!::)\n)((?:(?: +.*)\n)+)"><bygroups><usingself state="inline"/><usingself state="inline"/></bygroups></rule>
|
||||
<rule pattern="(::)(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\3.*)?\n)+)"><bygroups><token type="LiteralStringEscape"/><token type="Text"/><token type="LiteralString"/><token type="LiteralString"/><token type="Text"/><token type="LiteralString"/></bygroups></rule>
|
||||
<rule><include state="inline"/></rule>
|
||||
</state>
|
||||
<state name="inline">
|
||||
<rule pattern="\\."><token type="Text"/></rule>
|
||||
<rule pattern="``"><token type="LiteralString"/><push state="literal"/></rule>
|
||||
<rule pattern="(`.+?)(<.+?>)(`__?)"><bygroups><token type="LiteralString"/><token type="LiteralStringInterpol"/><token type="LiteralString"/></bygroups></rule>
|
||||
<rule pattern="`.+?`__?"><token type="LiteralString"/></rule>
|
||||
<rule pattern="(`.+?`)(:[a-zA-Z0-9:-]+?:)?"><bygroups><token type="NameVariable"/><token type="NameAttribute"/></bygroups></rule>
|
||||
<rule pattern="(:[a-zA-Z0-9:-]+?:)(`.+?`)"><bygroups><token type="NameAttribute"/><token type="NameVariable"/></bygroups></rule>
|
||||
<rule pattern="\*\*.+?\*\*"><token type="GenericStrong"/></rule>
|
||||
<rule pattern="\*.+?\*"><token type="GenericEmph"/></rule>
|
||||
<rule pattern="\[.*?\]_"><token type="LiteralString"/></rule>
|
||||
<rule pattern="<.+?>"><token type="NameTag"/></rule>
|
||||
<rule pattern="[^\\\n\[*`:]+"><token type="Text"/></rule>
|
||||
<rule pattern="."><token type="Text"/></rule>
|
||||
</state>
|
||||
<state name="literal">
|
||||
<rule pattern="[^`]+"><token type="LiteralString"/></rule>
|
||||
<rule pattern="``((?=$)|(?=[-/:.,; \n\x00‐‑‒–— '"\)\]\}>’”»!\?]))"><token type="LiteralString"/><pop depth="1"/></rule>
|
||||
<rule pattern="`"><token type="LiteralString"/></rule>
|
||||
</state>
|
||||
</rules>
|
||||
</lexer>
|
@ -70,4 +70,3 @@
|
||||
</state>
|
||||
</rules>
|
||||
</lexer>
|
||||
|
||||
|
57
lexers/snbt.xml
Normal file
57
lexers/snbt.xml
Normal file
@ -0,0 +1,57 @@
|
||||
|
||||
<lexer>
|
||||
<config>
|
||||
<name>SNBT</name>
|
||||
<alias>snbt</alias>
|
||||
<filename>*.snbt</filename>
|
||||
<mime_type>text/snbt</mime_type>
|
||||
</config>
|
||||
<rules>
|
||||
<state name="root">
|
||||
<rule pattern="\{"><token type="Punctuation"/><push state="compound"/></rule>
|
||||
<rule pattern="[^\{]+"><token type="Text"/></rule>
|
||||
</state>
|
||||
<state name="whitespace">
|
||||
<rule pattern="\s+"><token type="TextWhitespace"/></rule>
|
||||
</state>
|
||||
<state name="operators">
|
||||
<rule pattern="[,:;]"><token type="Punctuation"/></rule>
|
||||
</state>
|
||||
<state name="literals">
|
||||
<rule pattern="(true|false)"><token type="KeywordConstant"/></rule>
|
||||
<rule pattern="-?\d+[eE]-?\d+"><token type="LiteralNumberFloat"/></rule>
|
||||
<rule pattern="-?\d*\.\d+[fFdD]?"><token type="LiteralNumberFloat"/></rule>
|
||||
<rule pattern="-?\d+[bBsSlLfFdD]?"><token type="LiteralNumberInteger"/></rule>
|
||||
<rule pattern="""><token type="LiteralStringDouble"/><push state="literals.string_double"/></rule>
|
||||
<rule pattern="'"><token type="LiteralStringSingle"/><push state="literals.string_single"/></rule>
|
||||
</state>
|
||||
<state name="literals.string_double">
|
||||
<rule pattern="\\."><token type="LiteralStringEscape"/></rule>
|
||||
<rule pattern="[^\\"\n]+"><token type="LiteralStringDouble"/></rule>
|
||||
<rule pattern="""><token type="LiteralStringDouble"/><pop depth="1"/></rule>
|
||||
</state>
|
||||
<state name="literals.string_single">
|
||||
<rule pattern="\\."><token type="LiteralStringEscape"/></rule>
|
||||
<rule pattern="[^\\'\n]+"><token type="LiteralStringSingle"/></rule>
|
||||
<rule pattern="'"><token type="LiteralStringSingle"/><pop depth="1"/></rule>
|
||||
</state>
|
||||
<state name="compound">
|
||||
<rule pattern="[A-Z_a-z]+"><token type="NameAttribute"/></rule>
|
||||
<rule><include state="operators"/></rule>
|
||||
<rule><include state="whitespace"/></rule>
|
||||
<rule><include state="literals"/></rule>
|
||||
<rule pattern="\{"><token type="Punctuation"/><push/></rule>
|
||||
<rule pattern="\["><token type="Punctuation"/><push state="list"/></rule>
|
||||
<rule pattern="\}"><token type="Punctuation"/><pop depth="1"/></rule>
|
||||
</state>
|
||||
<state name="list">
|
||||
<rule pattern="[A-Z_a-z]+"><token type="NameAttribute"/></rule>
|
||||
<rule><include state="literals"/></rule>
|
||||
<rule><include state="operators"/></rule>
|
||||
<rule><include state="whitespace"/></rule>
|
||||
<rule pattern="\["><token type="Punctuation"/><push/></rule>
|
||||
<rule pattern="\{"><token type="Punctuation"/><push state="compound"/></rule>
|
||||
<rule pattern="\]"><token type="Punctuation"/><pop depth="1"/></rule>
|
||||
</state>
|
||||
</rules>
|
||||
</lexer>
|
@ -157,8 +157,20 @@
|
||||
<rule pattern="(continue|returns|storage|memory|delete|return|throw|break|catch|while|else|from|new|try|for|if|is|as|do|in|_)\b">
|
||||
<token type="Keyword"/>
|
||||
</rule>
|
||||
<rule pattern="assembly\b">
|
||||
<rule pattern="(assembly)(\s+\()(.+)(\)\s+{)">
|
||||
<bygroups>
|
||||
<token type="Keyword"/>
|
||||
<token type="Text"/>
|
||||
<token type="LiteralString"/>
|
||||
<token type="Text"/>
|
||||
</bygroups>
|
||||
<push state="assembly"/>
|
||||
</rule>
|
||||
<rule pattern="(assembly)(\s+{)">
|
||||
<bygroups>
|
||||
<token type="Keyword"/>
|
||||
<token type="Text"/>
|
||||
</bygroups>
|
||||
<push state="assembly"/>
|
||||
</rule>
|
||||
<rule pattern="(contract|interface|enum|event|struct)(\s+)([a-zA-Z_]\w*)">
|
||||
@ -235,7 +247,7 @@
|
||||
<token type="Punctuation"/>
|
||||
<pop depth="1"/>
|
||||
</rule>
|
||||
<rule pattern="[(),]">
|
||||
<rule pattern="[(),.]">
|
||||
<token type="Punctuation"/>
|
||||
</rule>
|
||||
<rule pattern=":=|=:">
|
||||
|
@ -40,4 +40,3 @@
|
||||
</state>
|
||||
</rules>
|
||||
</lexer>
|
||||
|
||||
|
@ -51,6 +51,22 @@
|
||||
</rule>
|
||||
</state>
|
||||
<state name="tag">
|
||||
<rule>
|
||||
<include state="jsx"/>
|
||||
</rule>
|
||||
<rule pattern=",">
|
||||
<token type="Punctuation"/>
|
||||
</rule>
|
||||
<rule pattern=""(\\\\|\\"|[^"])*"">
|
||||
<token type="LiteralStringDouble"/>
|
||||
</rule>
|
||||
<rule pattern="'(\\\\|\\'|[^'])*'">
|
||||
<token type="LiteralStringSingle"/>
|
||||
</rule>
|
||||
<rule pattern="`">
|
||||
<token type="LiteralStringBacktick"/>
|
||||
<push state="interp"/>
|
||||
</rule>
|
||||
<rule>
|
||||
<include state="commentsandwhitespace"/>
|
||||
</rule>
|
||||
@ -171,7 +187,7 @@
|
||||
</rule>
|
||||
<rule pattern="(?=/)">
|
||||
<token type="Text"/>
|
||||
<push state="#pop" state="badregex"/>
|
||||
<push state="badregex"/>
|
||||
</rule>
|
||||
<rule>
|
||||
<pop depth="1"/>
|
||||
|
107
lexers/typst.xml
Normal file
107
lexers/typst.xml
Normal file
@ -0,0 +1,107 @@
|
||||
|
||||
<lexer>
|
||||
<config>
|
||||
<name>Typst</name>
|
||||
<alias>typst</alias>
|
||||
<filename>*.typ</filename>
|
||||
<mime_type>text/x-typst</mime_type>
|
||||
</config>
|
||||
<rules>
|
||||
<state name="root">
|
||||
<rule><include state="markup"/></rule>
|
||||
</state>
|
||||
<state name="into_code">
|
||||
<rule pattern="(\#let|\#set|\#show)\b"><token type="KeywordDeclaration"/><push state="inline_code"/></rule>
|
||||
<rule pattern="(\#import|\#include)\b"><token type="KeywordNamespace"/><push state="inline_code"/></rule>
|
||||
<rule pattern="(\#if|\#for|\#while|\#export)\b"><token type="KeywordReserved"/><push state="inline_code"/></rule>
|
||||
<rule pattern="#\{"><token type="Punctuation"/><push state="code"/></rule>
|
||||
<rule pattern="#\("><token type="Punctuation"/><push state="code"/></rule>
|
||||
<rule pattern="(#[a-zA-Z_][a-zA-Z0-9_-]*)(\[)"><bygroups><token type="NameFunction"/><token type="Punctuation"/></bygroups><push state="markup"/></rule>
|
||||
<rule pattern="(#[a-zA-Z_][a-zA-Z0-9_-]*)(\()"><bygroups><token type="NameFunction"/><token type="Punctuation"/></bygroups><push state="code"/></rule>
|
||||
<rule pattern="(\#true|\#false|\#none|\#auto)\b"><token type="KeywordConstant"/></rule>
|
||||
<rule pattern="#[a-zA-Z_][a-zA-Z0-9_]*"><token type="NameVariable"/></rule>
|
||||
<rule pattern="#0x[0-9a-fA-F]+"><token type="LiteralNumberHex"/></rule>
|
||||
<rule pattern="#0b[01]+"><token type="LiteralNumberBin"/></rule>
|
||||
<rule pattern="#0o[0-7]+"><token type="LiteralNumberOct"/></rule>
|
||||
<rule pattern="#[0-9]+[\.e][0-9]+"><token type="LiteralNumberFloat"/></rule>
|
||||
<rule pattern="#[0-9]+"><token type="LiteralNumberInteger"/></rule>
|
||||
</state>
|
||||
<state name="markup">
|
||||
<rule><include state="comment"/></rule>
|
||||
<rule pattern="^\s*=+.*$"><token type="GenericHeading"/></rule>
|
||||
<rule pattern="[*][^*]*[*]"><token type="GenericStrong"/></rule>
|
||||
<rule pattern="_[^_]*_"><token type="GenericEmph"/></rule>
|
||||
<rule pattern="\$"><token type="Punctuation"/><push state="math"/></rule>
|
||||
<rule pattern="`[^`]*`"><token type="LiteralStringBacktick"/></rule>
|
||||
<rule pattern="^(\s*)(-)(\s+)"><bygroups><token type="TextWhitespace"/><token type="Punctuation"/><token type="TextWhitespace"/></bygroups></rule>
|
||||
<rule pattern="^(\s*)(\+)(\s+)"><bygroups><token type="TextWhitespace"/><token type="Punctuation"/><token type="TextWhitespace"/></bygroups></rule>
|
||||
<rule pattern="^(\s*)([0-9]+\.)"><bygroups><token type="TextWhitespace"/><token type="Punctuation"/></bygroups></rule>
|
||||
<rule pattern="^(\s*)(/)(\s+)([^:]+)(:)"><bygroups><token type="TextWhitespace"/><token type="Punctuation"/><token type="TextWhitespace"/><token type="NameVariable"/><token type="Punctuation"/></bygroups></rule>
|
||||
<rule pattern="<[a-zA-Z_][a-zA-Z0-9_-]*>"><token type="NameLabel"/></rule>
|
||||
<rule pattern="@[a-zA-Z_][a-zA-Z0-9_-]*"><token type="NameLabel"/></rule>
|
||||
<rule pattern="\\#"><token type="Text"/></rule>
|
||||
<rule><include state="into_code"/></rule>
|
||||
<rule pattern="```(?:.|\n)*?```"><token type="LiteralStringBacktick"/></rule>
|
||||
<rule pattern="https?://[0-9a-zA-Z~/%#&=\',;.+?]*"><token type="GenericEmph"/></rule>
|
||||
<rule pattern="(\-\-\-|\\|\~|\-\-|\.\.\.)\B"><token type="Punctuation"/></rule>
|
||||
<rule pattern="\\\["><token type="Punctuation"/></rule>
|
||||
<rule pattern="\\\]"><token type="Punctuation"/></rule>
|
||||
<rule pattern="\["><token type="Punctuation"/><push/></rule>
|
||||
<rule pattern="\]"><token type="Punctuation"/><pop depth="1"/></rule>
|
||||
<rule pattern="[ \t]+\n?|\n"><token type="TextWhitespace"/></rule>
|
||||
<rule pattern="((?![*_$`<@\\#\] ]|https?://).)+"><token type="Text"/></rule>
|
||||
</state>
|
||||
<state name="math">
|
||||
<rule><include state="comment"/></rule>
|
||||
<rule pattern="(\\_|\\\^|\\\&)"><token type="Text"/></rule>
|
||||
<rule pattern="(_|\^|\&|;)"><token type="Punctuation"/></rule>
|
||||
<rule pattern="(\+|/|=|\[\||\|\]|\|\||\*|:=|::=|\.\.\.|'|\-|=:|!=|>>|>=|>>>|<<|<=|<<<|\->|\|\->|=>|\|=>|==>|\-\->|\~\~>|\~>|>\->|\->>|<\-|<==|<\-\-|<\~\~|<\~|<\-<|<<\-|<\->|<=>|<==>|<\-\->|>|<|\~|:|\|)"><token type="Operator"/></rule>
|
||||
<rule pattern="\\"><token type="Punctuation"/></rule>
|
||||
<rule pattern="\\\$"><token type="Punctuation"/></rule>
|
||||
<rule pattern="\$"><token type="Punctuation"/><pop depth="1"/></rule>
|
||||
<rule><include state="into_code"/></rule>
|
||||
<rule pattern="([a-zA-Z][a-zA-Z0-9-]*)(\s*)(\()"><bygroups><token type="NameFunction"/><token type="TextWhitespace"/><token type="Punctuation"/></bygroups></rule>
|
||||
<rule pattern="([a-zA-Z][a-zA-Z0-9-]*)(:)"><bygroups><token type="NameVariable"/><token type="Punctuation"/></bygroups></rule>
|
||||
<rule pattern="([a-zA-Z][a-zA-Z0-9-]*)"><token type="NameVariable"/></rule>
|
||||
<rule pattern="[0-9]+(\.[0-9]+)?"><token type="LiteralNumber"/></rule>
|
||||
<rule pattern="\.{1,3}|\(|\)|,|\{|\}"><token type="Punctuation"/></rule>
|
||||
<rule pattern=""[^"]*""><token type="LiteralStringDouble"/></rule>
|
||||
<rule pattern="[ \t\n]+"><token type="TextWhitespace"/></rule>
|
||||
</state>
|
||||
<state name="comment">
|
||||
<rule pattern="//.*$"><token type="CommentSingle"/></rule>
|
||||
<rule pattern="/[*](.|\n)*?[*]/"><token type="CommentMultiline"/></rule>
|
||||
</state>
|
||||
<state name="code">
|
||||
<rule><include state="comment"/></rule>
|
||||
<rule pattern="\["><token type="Punctuation"/><push state="markup"/></rule>
|
||||
<rule pattern="\(|\{"><token type="Punctuation"/><push state="code"/></rule>
|
||||
<rule pattern="\)|\}"><token type="Punctuation"/><pop depth="1"/></rule>
|
||||
<rule pattern=""[^"]*""><token type="LiteralStringDouble"/></rule>
|
||||
<rule pattern=",|\.{1,2}"><token type="Punctuation"/></rule>
|
||||
<rule pattern="="><token type="Operator"/></rule>
|
||||
<rule pattern="(and|or|not)\b"><token type="OperatorWord"/></rule>
|
||||
<rule pattern="=>|<=|==|!=|>|<|-=|\+=|\*=|/=|\+|-|\\|\*"><token type="Operator"/></rule>
|
||||
<rule pattern="([a-zA-Z_][a-zA-Z0-9_-]*)(:)"><bygroups><token type="NameVariable"/><token type="Punctuation"/></bygroups></rule>
|
||||
<rule pattern="([a-zA-Z_][a-zA-Z0-9_-]*)(\()"><bygroups><token type="NameFunction"/><token type="Punctuation"/></bygroups><push state="code"/></rule>
|
||||
<rule pattern="(as|break|export|continue|else|for|if|in|return|while)\b"><token type="KeywordReserved"/></rule>
|
||||
<rule pattern="(import|include)\b"><token type="KeywordNamespace"/></rule>
|
||||
<rule pattern="(auto|none|true|false)\b"><token type="KeywordConstant"/></rule>
|
||||
<rule pattern="([0-9.]+)(mm|pt|cm|in|em|fr|%)"><bygroups><token type="LiteralNumber"/><token type="KeywordReserved"/></bygroups></rule>
|
||||
<rule pattern="0x[0-9a-fA-F]+"><token type="LiteralNumberHex"/></rule>
|
||||
<rule pattern="0b[01]+"><token type="LiteralNumberBin"/></rule>
|
||||
<rule pattern="0o[0-7]+"><token type="LiteralNumberOct"/></rule>
|
||||
<rule pattern="[0-9]+[\.e][0-9]+"><token type="LiteralNumberFloat"/></rule>
|
||||
<rule pattern="[0-9]+"><token type="LiteralNumberInteger"/></rule>
|
||||
<rule pattern="(let|set|show)\b"><token type="KeywordDeclaration"/></rule>
|
||||
<rule pattern="([a-zA-Z_][a-zA-Z0-9_-]*)"><token type="NameVariable"/></rule>
|
||||
<rule pattern="[ \t\n]+"><token type="TextWhitespace"/></rule>
|
||||
<rule pattern=":"><token type="Punctuation"/></rule>
|
||||
</state>
|
||||
<state name="inline_code">
|
||||
<rule pattern=";\b"><token type="Punctuation"/><pop depth="1"/></rule>
|
||||
<rule pattern="\n"><token type="TextWhitespace"/><pop depth="1"/></rule>
|
||||
<rule><include state="code"/></rule>
|
||||
</state>
|
||||
</rules>
|
||||
</lexer>
|
283
lexers/webvtt.xml
Normal file
283
lexers/webvtt.xml
Normal file
@ -0,0 +1,283 @@
|
||||
<lexer>
|
||||
<config>
|
||||
<name>WebVTT</name>
|
||||
<alias>vtt</alias>
|
||||
<filename>*.vtt</filename>
|
||||
<mime_type>text/vtt</mime_type>
|
||||
</config>
|
||||
<!--
|
||||
The WebVTT spec refers to a WebVTT line terminator as either CRLF, CR or LF.
|
||||
(https://www.w3.org/TR/webvtt1/#webvtt-line-terminator) However, with this
|
||||
definition it is unclear whether CRLF is one line terminator (CRLF) or two
|
||||
line terminators (CR and LF).
|
||||
|
||||
To work around this ambiguity, only CRLF and LF are considered as line terminators.
|
||||
To my knowledge only classic Mac OS uses CR as line terminators, so the lexer should
|
||||
still work for most files.
|
||||
-->
|
||||
<rules>
|
||||
<!-- https://www.w3.org/TR/webvtt1/#webvtt-file-body -->
|
||||
<state name="root">
|
||||
<rule pattern="(\AWEBVTT)((?:[ \t][^\r\n]*)?(?:\r?\n){2,})">
|
||||
<bygroups>
|
||||
<token type="Keyword" />
|
||||
<token type="Text" />
|
||||
</bygroups>
|
||||
</rule>
|
||||
<rule pattern="(^REGION)([ \t]*$)">
|
||||
<bygroups>
|
||||
<token type="Keyword" />
|
||||
<token type="Text" />
|
||||
</bygroups>
|
||||
<push state="region-settings-list" />
|
||||
</rule>
|
||||
<rule
|
||||
pattern="(^STYLE)([ \t]*$)((?:(?!-->)[\s\S])*?)((?:\r?\n){2})">
|
||||
<bygroups>
|
||||
<token type="Keyword" />
|
||||
<token type="Text" />
|
||||
<using lexer="CSS" />
|
||||
<token type="Text" />
|
||||
</bygroups>
|
||||
</rule>
|
||||
<rule>
|
||||
<include state="comment" />
|
||||
</rule>
|
||||
<rule
|
||||
pattern="(?=((?![^\r\n]*-->)[^\r\n]*\r?\n)?(\d{2}:)?(?:[0-5][0-9]):(?:[0-5][0-9])\.\d{3}[ \t]+-->[ \t]+(\d{2}:)?(?:[0-5][0-9]):(?:[0-5][0-9])\.\d{3})"
|
||||
>
|
||||
<push state="cues" />
|
||||
</rule>
|
||||
</state>
|
||||
|
||||
<!-- https://www.w3.org/TR/webvtt1/#webvtt-region-settings-list -->
|
||||
<state name="region-settings-list">
|
||||
<rule pattern="(?: |\t|\r?\n(?!\r?\n))+">
|
||||
<token type="Text" />
|
||||
</rule>
|
||||
<rule pattern="(?:\r?\n){2}">
|
||||
<token type="Text" />
|
||||
<pop depth="1" />
|
||||
</rule>
|
||||
<rule pattern="(id)(:)(?!-->)(\S+)">
|
||||
<bygroups>
|
||||
<token type="Keyword" />
|
||||
<token type="Punctuation" />
|
||||
<token type="Literal" />
|
||||
</bygroups>
|
||||
</rule>
|
||||
<rule pattern="(width)(:)((?:[1-9]?\d|100)(?:\.\d+)?)(%)">
|
||||
<bygroups>
|
||||
<token type="Keyword" />
|
||||
<token type="Punctuation" />
|
||||
<token type="Literal" />
|
||||
<token type="KeywordType" />
|
||||
</bygroups>
|
||||
</rule>
|
||||
<rule pattern="(lines)(:)(\d+)">
|
||||
<bygroups>
|
||||
<token type="Keyword" />
|
||||
<token type="Punctuation" />
|
||||
<token type="Literal" />
|
||||
</bygroups>
|
||||
</rule>
|
||||
<rule
|
||||
pattern="(regionanchor|viewportanchor)(:)((?:[1-9]?\d|100)(?:\.\d+)?)(%)(,)((?:[1-9]?\d|100)(?:\.\d+)?)(%)">
|
||||
<bygroups>
|
||||
<token type="Keyword" />
|
||||
<token type="Punctuation" />
|
||||
<token type="Literal" />
|
||||
<token type="KeywordType" />
|
||||
<token type="Punctuation" />
|
||||
<token type="Literal" />
|
||||
<token type="KeywordType" />
|
||||
</bygroups>
|
||||
</rule>
|
||||
<rule pattern="(scroll)(:)(up)">
|
||||
<bygroups>
|
||||
<token type="Keyword" />
|
||||
<token type="Punctuation" />
|
||||
<token type="KeywordConstant" />
|
||||
</bygroups>
|
||||
</rule>
|
||||
</state>
|
||||
|
||||
<!-- https://www.w3.org/TR/webvtt1/#webvtt-comment-block -->
|
||||
<state name="comment">
|
||||
<rule
|
||||
pattern="^NOTE( |\t|\r?\n)((?!-->)[\s\S])*?(?:(\r?\n){2}|\Z)">
|
||||
<token type="Comment" />
|
||||
</rule>
|
||||
</state>
|
||||
|
||||
<!--
|
||||
"Zero or more WebVTT cue blocks and WebVTT comment blocks separated from each other by one or more
|
||||
WebVTT line terminators." (https://www.w3.org/TR/webvtt1/#file-structure)
|
||||
-->
|
||||
<state name="cues">
|
||||
<rule
|
||||
pattern="(?:((?!-->)[^\r\n]+)?(\r?\n))?((?:\d{2}:)?(?:[0-5][0-9]):(?:[0-5][0-9])\.\d{3})([ \t]+)(-->)([ \t]+)((?:\d{2}:)?(?:[0-5][0-9]):(?:[0-5][0-9])\.\d{3})([ \t]*)">
|
||||
<bygroups>
|
||||
<token type="Name" />
|
||||
<token type="Text" />
|
||||
<token type="LiteralDate" />
|
||||
<token type="Text" />
|
||||
<token type="Operator" />
|
||||
<token type="Text" />
|
||||
<token type="LiteralDate" />
|
||||
<token type="Text" />
|
||||
</bygroups>
|
||||
<push state="cue-settings-list" />
|
||||
</rule>
|
||||
<rule>
|
||||
<include state="comment" />
|
||||
</rule>
|
||||
</state>
|
||||
|
||||
<!-- https://www.w3.org/TR/webvtt1/#webvtt-cue-settings-list -->
|
||||
<state name="cue-settings-list">
|
||||
<rule pattern="[ \t]+">
|
||||
<token type="Text" />
|
||||
</rule>
|
||||
<rule pattern="(vertical)(:)?(rl|lr)?">
|
||||
<bygroups>
|
||||
<token type="Keyword" />
|
||||
<token type="Punctuation" />
|
||||
<token type="KeywordConstant" />
|
||||
</bygroups>
|
||||
</rule>
|
||||
<rule
|
||||
pattern="(line)(:)?(?:(?:((?:[1-9]?\d|100)(?:\.\d+)?)(%)|(-?\d+))(?:(,)(start|center|end))?)?">
|
||||
<bygroups>
|
||||
<token type="Keyword" />
|
||||
<token type="Punctuation" />
|
||||
<token type="Literal" />
|
||||
<token type="KeywordType" />
|
||||
<token type="Literal" />
|
||||
<token type="Punctuation" />
|
||||
<token type="KeywordConstant" />
|
||||
</bygroups>
|
||||
</rule>
|
||||
<rule
|
||||
pattern="(position)(:)?(?:(?:((?:[1-9]?\d|100)(?:\.\d+)?)(%)|(-?\d+))(?:(,)(line-left|center|line-right))?)?">
|
||||
<bygroups>
|
||||
<token type="Keyword" />
|
||||
<token type="Punctuation" />
|
||||
<token type="Literal" />
|
||||
<token type="KeywordType" />
|
||||
<token type="Literal" />
|
||||
<token type="Punctuation" />
|
||||
<token type="KeywordConstant" />
|
||||
</bygroups>
|
||||
</rule>
|
||||
<rule pattern="(size)(:)?(?:((?:[1-9]?\d|100)(?:\.\d+)?)(%))?">
|
||||
<bygroups>
|
||||
<token type="Keyword" />
|
||||
<token type="Punctuation" />
|
||||
<token type="Literal" />
|
||||
<token type="KeywordType" />
|
||||
</bygroups>
|
||||
</rule>
|
||||
<rule pattern="(align)(:)?(start|center|end|left|right)?">
|
||||
<bygroups>
|
||||
<token type="Keyword" />
|
||||
<token type="Punctuation" />
|
||||
<token type="KeywordConstant" />
|
||||
</bygroups>
|
||||
</rule>
|
||||
<rule pattern="(region)(:)?((?![^\r\n]*-->(?=[ \t]+?))[^ \t\r\n]+)?">
|
||||
<bygroups>
|
||||
<token type="Keyword" />
|
||||
<token type="Punctuation" />
|
||||
<token type="Literal" />
|
||||
</bygroups>
|
||||
</rule>
|
||||
<rule
|
||||
pattern="(?=\r?\n)">
|
||||
<push state="cue-payload" />
|
||||
</rule>
|
||||
</state>
|
||||
|
||||
<!-- https://www.w3.org/TR/webvtt1/#cue-payload -->
|
||||
<state name="cue-payload">
|
||||
<rule pattern="(\r?\n){2,}">
|
||||
<token type="Text" />
|
||||
<pop depth="2" />
|
||||
</rule>
|
||||
<rule pattern="[^<&]+?">
|
||||
<token type="Text" />
|
||||
</rule>
|
||||
<rule pattern="&(#\d+|#x[0-9A-Fa-f]+|[a-zA-Z0-9]+);">
|
||||
<token type="Text" />
|
||||
</rule>
|
||||
<rule pattern="(?=<)">
|
||||
<token type="Text" />
|
||||
<push state="cue-span-tag" />
|
||||
</rule>
|
||||
</state>
|
||||
<state name="cue-span-tag">
|
||||
<rule
|
||||
pattern="<(?=c|i|b|u|ruby|rt|v|lang|(?:\d{2}:)?(?:[0-5][0-9]):(?:[0-5][0-9])\.\d{3})">
|
||||
<token type="Punctuation" />
|
||||
<push state="cue-span-start-tag-name" />
|
||||
</rule>
|
||||
<rule pattern="(</)(c|i|b|u|ruby|rt|v|lang)">
|
||||
<bygroups>
|
||||
<token type="Punctuation" />
|
||||
<token type="NameTag" />
|
||||
</bygroups>
|
||||
</rule>
|
||||
<rule pattern=">">
|
||||
<token type="Punctuation" />
|
||||
<pop depth="1" />
|
||||
</rule>
|
||||
</state>
|
||||
<state name="cue-span-start-tag-name">
|
||||
<rule pattern="(c|i|b|u|ruby|rt)|((?:\d{2}:)?(?:[0-5][0-9]):(?:[0-5][0-9])\.\d{3})">
|
||||
<bygroups>
|
||||
<token type="NameTag" />
|
||||
<token type="LiteralDate" />
|
||||
</bygroups>
|
||||
<push state="cue-span-classes-without-annotations" />
|
||||
</rule>
|
||||
<rule pattern="v|lang">
|
||||
<token type="NameTag" />
|
||||
<push state="cue-span-classes-with-annotations" />
|
||||
</rule>
|
||||
</state>
|
||||
<state name="cue-span-classes-without-annotations">
|
||||
<rule>
|
||||
<include state="cue-span-classes" />
|
||||
</rule>
|
||||
<rule pattern="(?=>)">
|
||||
<pop depth="2" />
|
||||
</rule>
|
||||
</state>
|
||||
<state name="cue-span-classes-with-annotations">
|
||||
<rule>
|
||||
<include state="cue-span-classes" />
|
||||
</rule>
|
||||
<rule pattern="(?=[ \t])">
|
||||
<push state="cue-span-start-tag-annotations" />
|
||||
</rule>
|
||||
</state>
|
||||
<state name="cue-span-classes">
|
||||
<rule pattern="(\.)([^ \t\n\r&<>\.]+)">
|
||||
<bygroups>
|
||||
<token type="Punctuation" />
|
||||
<token type="NameTag" />
|
||||
</bygroups>
|
||||
</rule>
|
||||
</state>
|
||||
<state name="cue-span-start-tag-annotations">
|
||||
<rule
|
||||
pattern="[ \t](?:[^\n\r&>]|&(?:#\d+|#x[0-9A-Fa-f]+|[a-zA-Z0-9]+);)+">
|
||||
<token type="Text" />
|
||||
</rule>
|
||||
<rule pattern="(?=>)">
|
||||
<token type="Text" />
|
||||
<pop depth="3" />
|
||||
</rule>
|
||||
</state>
|
||||
</rules>
|
||||
</lexer>
|
@ -53,7 +53,7 @@
|
||||
<bygroups>
|
||||
<token type="Punctuation"/>
|
||||
<token type="LiteralStringDoc"/>
|
||||
<token type="TextWhitespace"/>
|
||||
<token type="Ignore"/>
|
||||
</bygroups>
|
||||
</rule>
|
||||
<rule pattern="(false|False|FALSE|true|True|TRUE|null|Off|off|yes|Yes|YES|OFF|On|ON|no|No|on|NO|n|N|Y|y)\b">
|
||||
|
63
scripts/lexer_metadata.py
Normal file
63
scripts/lexer_metadata.py
Normal file
@ -0,0 +1,63 @@
|
||||
# This script parses the metadata of all the lexers and generates
|
||||
# a datafile with all the information so we don't have to instantiate
|
||||
# all the lexers to get the information.
|
||||
|
||||
import glob
|
||||
from collections import defaultdict
|
||||
|
||||
lexer_by_name = {}
|
||||
lexer_by_mimetype = defaultdict(set)
|
||||
lexer_by_filename = defaultdict(set)
|
||||
|
||||
|
||||
for fname in glob.glob("lexers/*.xml"):
|
||||
aliases = set([])
|
||||
mimetypes = set([])
|
||||
filenames = set([])
|
||||
print(fname)
|
||||
with open(fname) as f:
|
||||
lexer_name = fname.split("/")[-1].split(".")[0]
|
||||
for line in f:
|
||||
if "</config" in line:
|
||||
break
|
||||
if "<filename>" in line:
|
||||
filenames.add(line.split(">")[1].split("<")[0].lower())
|
||||
if "<mime_type>" in line:
|
||||
mimetypes.add(line.split(">")[1].split("<")[0].lower())
|
||||
if "<alias>" in line:
|
||||
aliases.add(line.split(">")[1].split("<")[0].lower())
|
||||
if "<name>" in line:
|
||||
aliases.add(line.split(">")[1].split("<")[0].lower())
|
||||
for alias in aliases:
|
||||
if alias in lexer_by_name and alias != lexer_by_name[alias]:
|
||||
raise Exception(f"Alias {alias} already in use by {lexer_by_name[alias]}")
|
||||
lexer_by_name[alias] = lexer_name
|
||||
for mimetype in mimetypes:
|
||||
lexer_by_mimetype[mimetype] = lexer_name
|
||||
for filename in filenames:
|
||||
lexer_by_filename[filename].add(lexer_name)
|
||||
|
||||
with open("src/constants/lexers.cr", "w") as f:
|
||||
# Crystal doesn't come from a xml file
|
||||
lexer_by_name["crystal"] = "crystal"
|
||||
lexer_by_name["cr"] = "crystal"
|
||||
lexer_by_filename["*.cr"] = ["crystal"]
|
||||
lexer_by_mimetype["text/x-crystal"] = "crystal"
|
||||
|
||||
f.write("module Tartrazine\n")
|
||||
f.write(" LEXERS_BY_NAME = {\n")
|
||||
for k in sorted(lexer_by_name.keys()):
|
||||
v = lexer_by_name[k]
|
||||
f.write(f'"{k}" => "{v}", \n')
|
||||
f.write("}\n")
|
||||
f.write(" LEXERS_BY_MIMETYPE = {\n")
|
||||
for k in sorted(lexer_by_mimetype.keys()):
|
||||
v = lexer_by_mimetype[k]
|
||||
f.write(f'"{k}" => "{v}", \n')
|
||||
f.write("}\n")
|
||||
f.write(" LEXERS_BY_FILENAME = {\n")
|
||||
for k in sorted(lexer_by_filename.keys()):
|
||||
v = lexer_by_filename[k]
|
||||
f.write(f'"{k}" => {str(sorted(list(v))).replace("'", "\"")}, \n')
|
||||
f.write("}\n")
|
||||
f.write("end\n")
|
@ -1,15 +1,30 @@
|
||||
# Script to generate abbreviations for tokens. Parses all lexers
|
||||
# and styles files to find all token names and generate a unique
|
||||
# abbreviation for each one. The abbreviations are generated by
|
||||
# taking the uppercase letters of the token name and converting
|
||||
# them to lowercase. If the abbreviation is not unique, the script
|
||||
# will print a warning and exit.
|
||||
|
||||
import sys
|
||||
import string
|
||||
import glob
|
||||
|
||||
# Run it as grep token lexers/* | python scripts/token_abbrevs.py
|
||||
|
||||
tokens = {"Highlight"}
|
||||
abbrevs = {"Highlight": "hl"}
|
||||
|
||||
def abbr(line):
|
||||
return "".join(c for c in line if c in string.ascii_uppercase).lower()
|
||||
|
||||
abbrevs = {}
|
||||
tokens = set([])
|
||||
for line in sys.stdin:
|
||||
def check_abbrevs():
|
||||
if len(abbrevs) != len(tokens):
|
||||
print("Warning: Abbreviations are not unique")
|
||||
print(len(abbrevs), len(tokens))
|
||||
sys.exit(1)
|
||||
|
||||
# Processes all files in lexers looking for token names
|
||||
for fname in glob.glob("lexers/*.xml"):
|
||||
with open(fname) as f:
|
||||
for line in f:
|
||||
if "<token" not in line:
|
||||
continue
|
||||
line = line.strip()
|
||||
@ -17,8 +32,24 @@ for line in sys.stdin:
|
||||
line = line.split('"')[1]
|
||||
abbrevs[line] = abbr(line)
|
||||
tokens.add(line)
|
||||
check_abbrevs()
|
||||
|
||||
print("Abbreviations: {")
|
||||
for k, v in abbrevs.items():
|
||||
print(f' "{k}" => "{v}",')
|
||||
print("}")
|
||||
# Processes all files in styles looking for token names too
|
||||
for fname in glob.glob("styles/*.xml"):
|
||||
with open(fname) as f:
|
||||
for line in f:
|
||||
if "<entry" not in line:
|
||||
continue
|
||||
line = line.strip()
|
||||
line = line.split('type=',1)[-1]
|
||||
line = line.split('"')[1]
|
||||
abbrevs[line] = abbr(line)
|
||||
tokens.add(line)
|
||||
check_abbrevs()
|
||||
|
||||
with open ("src/constants/token_abbrevs.cr", "w") as outf:
|
||||
outf.write("module Tartrazine\n")
|
||||
outf.write(" Abbreviations = {\n")
|
||||
for k in sorted(abbrevs.keys()):
|
||||
outf.write(f' "{k}" => "{abbrevs[k]}",\n')
|
||||
outf.write(" }\nend\n")
|
||||
|
11
shard.yml
11
shard.yml
@ -1,5 +1,5 @@
|
||||
name: tartrazine
|
||||
version: 0.1.1
|
||||
version: 0.12.0
|
||||
|
||||
authors:
|
||||
- Roberto Alsina <roberto.alsina@gmail.com>
|
||||
@ -10,11 +10,18 @@ targets:
|
||||
|
||||
dependencies:
|
||||
baked_file_system:
|
||||
github: schovi/baked_file_system
|
||||
github: ralsina/baked_file_system
|
||||
branch: master
|
||||
base58:
|
||||
github: crystal-china/base58.cr
|
||||
sixteen:
|
||||
github: ralsina/sixteen
|
||||
docopt:
|
||||
github: chenkovsky/docopt.cr
|
||||
stumpy_utils:
|
||||
github: stumpycr/stumpy_utils
|
||||
stumpy_png:
|
||||
github: stumpycr/stumpy_png
|
||||
|
||||
crystal: ">= 1.13.0"
|
||||
|
||||
|
1
spec/css/manni.css
Normal file
1
spec/css/manni.css
Normal file
@ -0,0 +1 @@
|
||||
.e {color: #aa0000;background-color: #ffaaaa;}.b {background-color: #f0f3f3;tab-size: 8;}.k {color: #006699;font-weight: 600;}.kp {}.kt {color: #007788;}.na {color: #330099;}.nb {color: #336666;}.nc {color: #00aa88;font-weight: 600;}.nc {color: #336600;}.nd {color: #9999ff;}.ne {color: #999999;font-weight: 600;}.ne {color: #cc0000;font-weight: 600;}.nf {color: #cc00ff;}.nl {color: #9999ff;}.nn {color: #00ccff;font-weight: 600;}.nt {color: #330099;font-weight: 600;}.nv {color: #003333;}.ls {color: #cc3300;}.lsd {font-style: italic;}.lse {color: #cc3300;font-weight: 600;}.lsi {color: #aa0000;}.lso {color: #cc3300;}.lsr {color: #33aaaa;}.lss {color: #ffcc33;}.ln {color: #ff6600;}.o {color: #555555;}.ow {color: #000000;font-weight: 600;}.c {color: #0099ff;font-style: italic;}.cs {font-weight: 600;}.cp {color: #009999;font-style: normal;}.gd {background-color: #ffcccc;border: 1px solid #cc0000;}.ge {font-style: italic;}.ge {color: #ff0000;}.gh {color: #003300;font-weight: 600;}.gi {background-color: #ccffcc;border: 1px solid #00cc00;}.go {color: #aaaaaa;}.gp {color: #000099;font-weight: 600;}.gs {font-weight: 600;}.gs {color: #003300;font-weight: 600;}.gt {color: #99cc66;}.gu {text-decoration: underline;}.tw {color: #bbbbbb;}.lh {}
|
1
spec/css/vesper.css
Normal file
1
spec/css/vesper.css
Normal file
@ -0,0 +1 @@
|
||||
.b {color: #b7b7b7;background-color: #101010;font-weight: 600;tab-size: 8;}.lh {color: #8eaaaa;background-color: #232323;}.t {color: #b7b7b7;}.e {color: #de6e6e;}.c {color: #333333;}.cp {color: #876c4f;}.cpf {color: #5f8787;}.k {color: #d69094;}.kt {color: #de6e6e;}.na {color: #8eaaaa;}.nb {color: #de6e6e;}.nbp {color: #de6e6e;}.nc {color: #8eaaaa;}.nc {color: #dab083;}.nd {color: #dab083;}.nf {color: #8eaaaa;}.nn {color: #8eaaaa;}.nt {color: #d69094;}.nv {color: #8eaaaa;}.nvi {color: #de6e6e;}.ln {color: #dab083;}.o {color: #60a592;}.ow {color: #d69094;}.l {color: #5f8787;}.ls {color: #5f8787;}.lsi {color: #876c4f;}.lsr {color: #60a592;}.lss {color: #dab083;}
|
1
spec/examples/crystal/hello.cr
Normal file
1
spec/examples/crystal/hello.cr
Normal file
@ -0,0 +1 @@
|
||||
puts "Hello Crystal!"
|
1
spec/examples/crystal/hello.cr.json
Normal file
1
spec/examples/crystal/hello.cr.json
Normal file
@ -0,0 +1 @@
|
||||
[{"type":"Text","value":"puts "},{"type":"LiteralString","value":"\"Hello Crystal!\""},{"type":"Text","value":"\n"}]
|
11
spec/examples/jinja+python/funko.py.j2
Normal file
11
spec/examples/jinja+python/funko.py.j2
Normal file
@ -0,0 +1,11 @@
|
||||
from flask import Flask, request
|
||||
|
||||
app = Flask("{{name}}")
|
||||
|
||||
@app.route('/')
|
||||
def handle():
|
||||
return "Hello World from Flask!"
|
||||
|
||||
@app.route('/ping')
|
||||
def ping():
|
||||
return "OK"
|
1
spec/examples/jinja+python/funko.py.j2.json
Normal file
1
spec/examples/jinja+python/funko.py.j2.json
Normal file
@ -0,0 +1 @@
|
||||
[{"type":"KeywordNamespace","value":"from"},{"type":"Text","value":" "},{"type":"NameNamespace","value":"flask"},{"type":"Text","value":" "},{"type":"KeywordNamespace","value":"import"},{"type":"Text","value":" "},{"type":"Name","value":"Flask"},{"type":"Punctuation","value":","},{"type":"Text","value":" "},{"type":"Name","value":"request"},{"type":"Text","value":"\n\n"},{"type":"Name","value":"app"},{"type":"Text","value":" "},{"type":"Operator","value":"="},{"type":"Text","value":" "},{"type":"Name","value":"Flask"},{"type":"Punctuation","value":"("},{"type":"LiteralStringDouble","value":"\""},{"type":"CommentPreproc","value":"{{"},{"type":"NameVariable","value":"name"},{"type":"CommentPreproc","value":"}}"},{"type":"LiteralStringDouble","value":"\")"},{"type":"Text","value":"\n\n"},{"type":"NameDecorator","value":"@app.route"},{"type":"Punctuation","value":"("},{"type":"LiteralStringSingle","value":"'/'"},{"type":"Punctuation","value":")"},{"type":"Text","value":"\n"},{"type":"Keyword","value":"def"},{"type":"Text","value":" "},{"type":"NameFunction","value":"handle"},{"type":"Punctuation","value":"():"},{"type":"Text","value":"\n "},{"type":"Keyword","value":"return"},{"type":"Text","value":" "},{"type":"LiteralStringDouble","value":"\"Hello World from Flask!\""},{"type":"Text","value":"\n\n"},{"type":"NameDecorator","value":"@app.route"},{"type":"Punctuation","value":"("},{"type":"LiteralStringSingle","value":"'/ping'"},{"type":"Punctuation","value":")"},{"type":"Text","value":"\n"},{"type":"Keyword","value":"def"},{"type":"Text","value":" "},{"type":"NameFunction","value":"ping"},{"type":"Punctuation","value":"():"},{"type":"Text","value":"\n "},{"type":"Keyword","value":"return"},{"type":"Text","value":" "},{"type":"LiteralStringDouble","value":"\"OK\""},{"type":"Text","value":"\n"}]
|
@ -1,8 +1,15 @@
|
||||
require "./spec_helper"
|
||||
require "digest/sha1"
|
||||
|
||||
# These are the testcases from Pygments
|
||||
testcases = Dir.glob("#{__DIR__}/tests/**/*txt").sort
|
||||
|
||||
# These are custom testcases
|
||||
examples = Dir.glob("#{__DIR__}/examples/**/*.*").reject(&.ends_with? ".json").sort!
|
||||
|
||||
# CSS Stylesheets
|
||||
css_files = Dir.glob("#{__DIR__}/css/*.css")
|
||||
|
||||
# These lexers don't load because of parsing issues
|
||||
failing_lexers = {
|
||||
"webgpu_shading_language",
|
||||
@ -14,35 +21,39 @@ unicode_problems = {
|
||||
"#{__DIR__}/tests/java/test_string_literals.txt",
|
||||
"#{__DIR__}/tests/json/test_strings.txt",
|
||||
"#{__DIR__}/tests/systemd/example1.txt",
|
||||
"#{__DIR__}/tests/c++/test_unicode_identifiers.txt",
|
||||
}
|
||||
|
||||
# These testcases fail because of differences in the way chroma and tartrazine tokenize
|
||||
# but tartrazine is correct
|
||||
bad_in_chroma = {
|
||||
"#{__DIR__}/tests/bash_session/test_comment_after_prompt.txt",
|
||||
"#{__DIR__}/tests/html/javascript_backtracking.txt",
|
||||
"#{__DIR__}/tests/java/test_default.txt",
|
||||
"#{__DIR__}/tests/java/test_multiline_string.txt",
|
||||
"#{__DIR__}/tests/java/test_numeric_literals.txt",
|
||||
"#{__DIR__}/tests/octave/test_multilinecomment.txt",
|
||||
"#{__DIR__}/tests/php/test_string_escaping_run.txt",
|
||||
"#{__DIR__}/tests/python_2/test_cls_builtin.txt",
|
||||
"#{__DIR__}/tests/bqn/test_syntax_roles.txt", # This one only fails in CI
|
||||
}
|
||||
|
||||
known_bad = {
|
||||
"#{__DIR__}/tests/bash_session/fake_ps2_prompt.txt",
|
||||
"#{__DIR__}/tests/bash_session/prompt_in_output.txt",
|
||||
"#{__DIR__}/tests/bash_session/test_newline_in_echo_no_ps2.txt",
|
||||
"#{__DIR__}/tests/bash_session/test_newline_in_ls_ps2.txt",
|
||||
"#{__DIR__}/tests/bash_session/ps2_prompt.txt",
|
||||
"#{__DIR__}/tests/bash_session/test_newline_in_ls_no_ps2.txt",
|
||||
"#{__DIR__}/tests/bash_session/test_virtualenv.txt",
|
||||
"#{__DIR__}/tests/bash_session/test_newline_in_echo_no_ps2.txt",
|
||||
"#{__DIR__}/tests/bash_session/test_newline_in_echo_ps2.txt",
|
||||
"#{__DIR__}/tests/c/test_string_resembling_decl_end.txt",
|
||||
"#{__DIR__}/tests/html/css_backtracking.txt",
|
||||
"#{__DIR__}/tests/bash_session/test_newline_in_ls_no_ps2.txt",
|
||||
"#{__DIR__}/tests/bash_session/test_newline_in_ls_ps2.txt",
|
||||
"#{__DIR__}/tests/bash_session/test_virtualenv.txt",
|
||||
"#{__DIR__}/tests/mcfunction/commenting.txt",
|
||||
"#{__DIR__}/tests/mcfunction/coordinates.txt",
|
||||
"#{__DIR__}/tests/mcfunction/data.txt",
|
||||
"#{__DIR__}/tests/mcfunction/difficult_1.txt",
|
||||
"#{__DIR__}/tests/mcfunction/multiline.txt",
|
||||
"#{__DIR__}/tests/mcfunction/selectors.txt",
|
||||
"#{__DIR__}/tests/php/anonymous_class.txt",
|
||||
"#{__DIR__}/tests/html/javascript_unclosed.txt",
|
||||
|
||||
"#{__DIR__}/tests/mcfunction/simple.txt",
|
||||
}
|
||||
|
||||
# Tests that fail because of a limitation in PCRE2
|
||||
@ -52,6 +63,14 @@ not_my_fault = {
|
||||
|
||||
describe Tartrazine do
|
||||
describe "Lexer" do
|
||||
examples.each do |example|
|
||||
it "parses #{example}".split("/")[-2...].join("/") do
|
||||
lexer = Tartrazine.lexer(name: File.basename(File.dirname(example)).downcase)
|
||||
text = File.read(example)
|
||||
expected = Array(Tartrazine::Token).from_json(File.read("#{example}.json"))
|
||||
Tartrazine::RegexLexer.collapse_tokens(lexer.tokenizer(text).to_a).should eq expected
|
||||
end
|
||||
end
|
||||
testcases.each do |testcase|
|
||||
if known_bad.includes?(testcase)
|
||||
pending "parses #{testcase}".split("/")[-2...].join("/") do
|
||||
@ -70,12 +89,67 @@ describe Tartrazine do
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
describe "formatter" do
|
||||
css_files.each do |css_file|
|
||||
it "generates #{css_file}" do
|
||||
css = File.read(css_file)
|
||||
theme = Tartrazine.theme(File.basename(css_file, ".css"))
|
||||
formatter = Tartrazine::Html.new(theme: theme)
|
||||
formatter.style_defs.strip.should eq css.strip
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
describe "to_html" do
|
||||
it "should do basic highlighting" do
|
||||
html = Tartrazine.to_html("puts 'Hello, World!'", "ruby", standalone: false)
|
||||
html.should eq(
|
||||
"<pre class=\"b\" ><code class=\"b\"><span class=\"nb\">puts</span><span class=\"t\"> </span><span class=\"lss\">'Hello, World!'</span></code></pre>"
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
describe "to_ansi" do
|
||||
it "should do basic highlighting" do
|
||||
ansi = Tartrazine.to_ansi("puts 'Hello, World!'", "ruby")
|
||||
if ENV.fetch("CI", nil)
|
||||
# In Github Actions there is no terminal so these don't
|
||||
# really work
|
||||
ansi.should eq(
|
||||
"puts 'Hello, World!'"
|
||||
)
|
||||
else
|
||||
ansi.should eq(
|
||||
"\e[38;2;171;70;66mputs\e[0m\e[38;2;216;216;216m \e[0m\e[38;2;161;181;108m'Hello, World!'\e[0m"
|
||||
)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
describe "to_svg" do
|
||||
it "should do basic highlighting" do
|
||||
svg = Tartrazine.to_svg("puts 'Hello, World!'", "ruby", standalone: false)
|
||||
svg.should eq(
|
||||
"<text x=\"0\" y=\"19\" xml:space=\"preserve\"><tspan fill=\"#ab4642\">puts</tspan><tspan fill=\"#d8d8d8\"> </tspan><tspan fill=\"#a1b56c\">'Hello, World!'</tspan></text>"
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
describe "to_png" do
|
||||
it "should do basic highlighting" do
|
||||
png = Digest::SHA1.hexdigest(Tartrazine.to_png("puts 'Hello, World!'", "ruby"))
|
||||
png.should eq(
|
||||
"62d419dcd263fffffc265a0f04c156dc2530c362"
|
||||
)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# Helper that creates lexer and tokenizes
|
||||
def tokenize(lexer_name, text)
|
||||
lexer = Tartrazine.lexer(lexer_name)
|
||||
lexer.tokenize(text)
|
||||
tokenizer = Tartrazine.lexer(lexer_name).tokenizer(text)
|
||||
Tartrazine::RegexLexer.collapse_tokens(tokenizer.to_a)
|
||||
end
|
||||
|
||||
# Helper that tokenizes using chroma to validate the lexer
|
||||
@ -87,5 +161,5 @@ def chroma_tokenize(lexer_name, text)
|
||||
["-f", "json", "-l", lexer_name],
|
||||
input: input, output: output
|
||||
)
|
||||
Tartrazine::Lexer.collapse_tokens(Array(Tartrazine::Token).from_json(output.to_s))
|
||||
Tartrazine::RegexLexer.collapse_tokens(Array(Tartrazine::Token).from_json(output.to_s))
|
||||
end
|
||||
|
@ -1,16 +0,0 @@
|
||||
---input---
|
||||
@[FOO::Bar::Baz(opt: "xx")]
|
||||
|
||||
---tokens---
|
||||
'@[' Operator
|
||||
'FOO::Bar::Baz' Name.Decorator
|
||||
'(' Punctuation
|
||||
'opt' Literal.String.Symbol
|
||||
':' Punctuation
|
||||
' ' Text.Whitespace
|
||||
'"' Literal.String.Double
|
||||
'xx' Literal.String.Double
|
||||
'"' Literal.String.Double
|
||||
')' Punctuation
|
||||
']' Operator
|
||||
'\n' Text.Whitespace
|
@ -1,11 +0,0 @@
|
||||
---input---
|
||||
[5][5]?
|
||||
|
||||
---tokens---
|
||||
'[' Operator
|
||||
'5' Literal.Number.Integer
|
||||
']' Operator
|
||||
'[' Operator
|
||||
'5' Literal.Number.Integer
|
||||
']?' Operator
|
||||
'\n' Text.Whitespace
|
@ -1,25 +0,0 @@
|
||||
---input---
|
||||
'a'
|
||||
'я'
|
||||
'\u{1234}'
|
||||
'
|
||||
'
|
||||
'abc'
|
||||
|
||||
---tokens---
|
||||
"'a'" Literal.String.Char
|
||||
'\n' Text.Whitespace
|
||||
|
||||
"'я'" Literal.String.Char
|
||||
'\n' Text.Whitespace
|
||||
|
||||
"'\\u{1234}'" Literal.String.Char
|
||||
'\n' Text.Whitespace
|
||||
|
||||
"'\n'" Literal.String.Char
|
||||
'\n' Text.Whitespace
|
||||
|
||||
"'" Error
|
||||
'abc' Name
|
||||
"'" Error
|
||||
'\n' Text.Whitespace
|
@ -1,14 +0,0 @@
|
||||
---input---
|
||||
HTTP
|
||||
HTTP::Server.new
|
||||
|
||||
---tokens---
|
||||
'HTTP' Name.Constant
|
||||
'\n' Text.Whitespace
|
||||
|
||||
'HTTP' Name
|
||||
'::' Operator
|
||||
'Server' Name
|
||||
'.' Operator
|
||||
'new' Name
|
||||
'\n' Text.Whitespace
|
@ -1,27 +0,0 @@
|
||||
---input---
|
||||
%()
|
||||
%[]
|
||||
%{}
|
||||
%<>
|
||||
%||
|
||||
|
||||
---tokens---
|
||||
'%(' Literal.String.Other
|
||||
')' Literal.String.Other
|
||||
'\n' Text.Whitespace
|
||||
|
||||
'%[' Literal.String.Other
|
||||
']' Literal.String.Other
|
||||
'\n' Text.Whitespace
|
||||
|
||||
'%{' Literal.String.Other
|
||||
'}' Literal.String.Other
|
||||
'\n' Text.Whitespace
|
||||
|
||||
'%<' Literal.String.Other
|
||||
'>' Literal.String.Other
|
||||
'\n' Text.Whitespace
|
||||
|
||||
'%|' Literal.String.Other
|
||||
'|' Literal.String.Other
|
||||
'\n' Text.Whitespace
|
@ -1,19 +0,0 @@
|
||||
---input---
|
||||
str.gsub(%r{\\\\}, "/")
|
||||
|
||||
---tokens---
|
||||
'str' Name
|
||||
'.' Operator
|
||||
'gsub' Name
|
||||
'(' Punctuation
|
||||
'%r{' Literal.String.Regex
|
||||
'\\\\' Literal.String.Regex
|
||||
'\\\\' Literal.String.Regex
|
||||
'}' Literal.String.Regex
|
||||
',' Punctuation
|
||||
' ' Text.Whitespace
|
||||
'"' Literal.String.Double
|
||||
'/' Literal.String.Double
|
||||
'"' Literal.String.Double
|
||||
')' Punctuation
|
||||
'\n' Text.Whitespace
|
@ -1,9 +0,0 @@
|
||||
---input---
|
||||
"\#{a + b}"
|
||||
|
||||
---tokens---
|
||||
'"' Literal.String.Double
|
||||
'\\#' Literal.String.Escape
|
||||
'{a + b}' Literal.String.Double
|
||||
'"' Literal.String.Double
|
||||
'\n' Text.Whitespace
|
@ -1,56 +0,0 @@
|
||||
---input---
|
||||
"A#{ (3..5).group_by { |x| x/2}.map do |k,v| "#{k}" end.join }" + "Z"
|
||||
|
||||
---tokens---
|
||||
'"' Literal.String.Double
|
||||
'A' Literal.String.Double
|
||||
'#{' Literal.String.Interpol
|
||||
' ' Text.Whitespace
|
||||
'(' Punctuation
|
||||
'3' Literal.Number.Integer
|
||||
'..' Operator
|
||||
'5' Literal.Number.Integer
|
||||
')' Punctuation
|
||||
'.' Operator
|
||||
'group_by' Name
|
||||
' ' Text.Whitespace
|
||||
'{' Literal.String.Interpol
|
||||
' ' Text.Whitespace
|
||||
'|' Operator
|
||||
'x' Name
|
||||
'|' Operator
|
||||
' ' Text.Whitespace
|
||||
'x' Name
|
||||
'/' Operator
|
||||
'2' Literal.Number.Integer
|
||||
'}' Literal.String.Interpol
|
||||
'.' Operator
|
||||
'map' Name
|
||||
' ' Text.Whitespace
|
||||
'do' Keyword
|
||||
' ' Text.Whitespace
|
||||
'|' Operator
|
||||
'k' Name
|
||||
',' Punctuation
|
||||
'v' Name
|
||||
'|' Operator
|
||||
' ' Text.Whitespace
|
||||
'"' Literal.String.Double
|
||||
'#{' Literal.String.Interpol
|
||||
'k' Name
|
||||
'}' Literal.String.Interpol
|
||||
'"' Literal.String.Double
|
||||
' ' Text.Whitespace
|
||||
'end' Keyword
|
||||
'.' Operator
|
||||
'join' Name
|
||||
' ' Text.Whitespace
|
||||
'}' Literal.String.Interpol
|
||||
'"' Literal.String.Double
|
||||
' ' Text.Whitespace
|
||||
'+' Operator
|
||||
' ' Text.Whitespace
|
||||
'"' Literal.String.Double
|
||||
'Z' Literal.String.Double
|
||||
'"' Literal.String.Double
|
||||
'\n' Text.Whitespace
|
@ -1,58 +0,0 @@
|
||||
---input---
|
||||
@[Link("some")]
|
||||
lib LibSome
|
||||
@[CallConvention("X86_StdCall")]
|
||||
fun foo="some.foo"(thing : Void*) : LibC::Int
|
||||
end
|
||||
|
||||
---tokens---
|
||||
'@[' Operator
|
||||
'Link' Name.Decorator
|
||||
'(' Punctuation
|
||||
'"' Literal.String.Double
|
||||
'some' Literal.String.Double
|
||||
'"' Literal.String.Double
|
||||
')' Punctuation
|
||||
']' Operator
|
||||
'\n' Text.Whitespace
|
||||
|
||||
'lib' Keyword
|
||||
' ' Text.Whitespace
|
||||
'LibSome' Name.Namespace
|
||||
'\n' Text.Whitespace
|
||||
|
||||
'@[' Operator
|
||||
'CallConvention' Name.Decorator
|
||||
'(' Punctuation
|
||||
'"' Literal.String.Double
|
||||
'X86_StdCall' Literal.String.Double
|
||||
'"' Literal.String.Double
|
||||
')' Punctuation
|
||||
']' Operator
|
||||
'\n' Text.Whitespace
|
||||
|
||||
'fun' Keyword
|
||||
' ' Text.Whitespace
|
||||
'foo' Name.Function
|
||||
'=' Operator
|
||||
'"' Literal.String.Double
|
||||
'some.foo' Literal.String.Double
|
||||
'"' Literal.String.Double
|
||||
'(' Punctuation
|
||||
'thing' Name
|
||||
' ' Text.Whitespace
|
||||
':' Punctuation
|
||||
' ' Text.Whitespace
|
||||
'Void' Name
|
||||
'*' Operator
|
||||
')' Punctuation
|
||||
' ' Text.Whitespace
|
||||
':' Punctuation
|
||||
' ' Text.Whitespace
|
||||
'LibC' Name
|
||||
'::' Operator
|
||||
'Int' Name
|
||||
'\n' Text.Whitespace
|
||||
|
||||
'end' Keyword
|
||||
'\n' Text.Whitespace
|
@ -1,76 +0,0 @@
|
||||
---input---
|
||||
def<=>(other : self) : Int
|
||||
{%for field in %w(first_name middle_name last_name)%}
|
||||
cmp={{field.id}}<=>other.{{field.id}}
|
||||
return cmp if cmp!=0
|
||||
{%end%}
|
||||
0
|
||||
end
|
||||
|
||||
---tokens---
|
||||
'def' Keyword
|
||||
'<=>' Name.Function
|
||||
'(' Punctuation
|
||||
'other' Name
|
||||
' ' Text.Whitespace
|
||||
':' Punctuation
|
||||
' ' Text.Whitespace
|
||||
'self' Keyword
|
||||
')' Punctuation
|
||||
' ' Text.Whitespace
|
||||
':' Punctuation
|
||||
' ' Text.Whitespace
|
||||
'Int' Name
|
||||
'\n' Text.Whitespace
|
||||
|
||||
'{%' Literal.String.Interpol
|
||||
'for' Keyword
|
||||
' ' Text.Whitespace
|
||||
'field' Name
|
||||
' ' Text.Whitespace
|
||||
'in' Keyword
|
||||
' ' Text.Whitespace
|
||||
'%w(' Literal.String.Other
|
||||
'first_name middle_name last_name' Literal.String.Other
|
||||
')' Literal.String.Other
|
||||
'%}' Literal.String.Interpol
|
||||
'\n' Text.Whitespace
|
||||
|
||||
'cmp' Name
|
||||
'=' Operator
|
||||
'{{' Literal.String.Interpol
|
||||
'field' Name
|
||||
'.' Operator
|
||||
'id' Name
|
||||
'}}' Literal.String.Interpol
|
||||
'<=>' Operator
|
||||
'other' Name
|
||||
'.' Operator
|
||||
'{{' Literal.String.Interpol
|
||||
'field' Name
|
||||
'.' Operator
|
||||
'id' Name
|
||||
'}}' Literal.String.Interpol
|
||||
'\n' Text.Whitespace
|
||||
|
||||
'return' Keyword
|
||||
' ' Text.Whitespace
|
||||
'cmp' Name
|
||||
' ' Text.Whitespace
|
||||
'if' Keyword
|
||||
' ' Text.Whitespace
|
||||
'cmp' Name
|
||||
'!=' Operator
|
||||
'0' Literal.Number.Integer
|
||||
'\n' Text.Whitespace
|
||||
|
||||
'{%' Literal.String.Interpol
|
||||
'end' Keyword
|
||||
'%}' Literal.String.Interpol
|
||||
'\n' Text.Whitespace
|
||||
|
||||
'0' Literal.Number.Integer
|
||||
'\n' Text.Whitespace
|
||||
|
||||
'end' Keyword
|
||||
'\n' Text.Whitespace
|
@ -1,84 +0,0 @@
|
||||
---input---
|
||||
# Integers
|
||||
0
|
||||
1
|
||||
1_000_000
|
||||
1u8
|
||||
11231231231121312i64
|
||||
|
||||
# Floats
|
||||
0.0
|
||||
1.0_f32
|
||||
1_f32
|
||||
0f64
|
||||
1e+4
|
||||
1e111
|
||||
1_234.567_890
|
||||
|
||||
# Error
|
||||
01
|
||||
0b2
|
||||
0x129g2
|
||||
0o12358
|
||||
|
||||
---tokens---
|
||||
'# Integers' Comment.Single
|
||||
'\n' Text.Whitespace
|
||||
|
||||
'0' Literal.Number.Integer
|
||||
'\n' Text.Whitespace
|
||||
|
||||
'1' Literal.Number.Integer
|
||||
'\n' Text.Whitespace
|
||||
|
||||
'1_000_000' Literal.Number.Integer
|
||||
'\n' Text.Whitespace
|
||||
|
||||
'1u8' Literal.Number.Integer
|
||||
'\n' Text.Whitespace
|
||||
|
||||
'11231231231121312i64' Literal.Number.Integer
|
||||
'\n\n' Text.Whitespace
|
||||
|
||||
'# Floats' Comment.Single
|
||||
'\n' Text.Whitespace
|
||||
|
||||
'0.0' Literal.Number.Float
|
||||
'\n' Text.Whitespace
|
||||
|
||||
'1.0_f32' Literal.Number.Float
|
||||
'\n' Text.Whitespace
|
||||
|
||||
'1_f32' Literal.Number.Float
|
||||
'\n' Text.Whitespace
|
||||
|
||||
'0f64' Literal.Number.Float
|
||||
'\n' Text.Whitespace
|
||||
|
||||
'1e+4' Literal.Number.Float
|
||||
'\n' Text.Whitespace
|
||||
|
||||
'1e111' Literal.Number.Float
|
||||
'\n' Text.Whitespace
|
||||
|
||||
'1_234.567_890' Literal.Number.Float
|
||||
'\n\n' Text.Whitespace
|
||||
|
||||
'# Error' Comment.Single
|
||||
'\n' Text.Whitespace
|
||||
|
||||
'0' Error
|
||||
'1' Literal.Number.Integer
|
||||
'\n' Text.Whitespace
|
||||
|
||||
'0' Error
|
||||
'b2' Name
|
||||
'\n' Text.Whitespace
|
||||
|
||||
'0' Error
|
||||
'x129g2' Name
|
||||
'\n' Text.Whitespace
|
||||
|
||||
'0' Error
|
||||
'o12358' Name
|
||||
'\n' Text.Whitespace
|
@ -1,18 +0,0 @@
|
||||
---input---
|
||||
([] of Int32).[]?(5)
|
||||
|
||||
---tokens---
|
||||
'(' Punctuation
|
||||
'[' Operator
|
||||
']' Operator
|
||||
' ' Text.Whitespace
|
||||
'of' Keyword
|
||||
' ' Text.Whitespace
|
||||
'Int32' Name
|
||||
')' Punctuation
|
||||
'.' Operator
|
||||
'[]?' Name.Operator
|
||||
'(' Punctuation
|
||||
'5' Literal.Number.Integer
|
||||
')' Punctuation
|
||||
'\n' Text.Whitespace
|
@ -1,41 +0,0 @@
|
||||
---input---
|
||||
%(hello ("world"))
|
||||
%[hello ["world"]]
|
||||
%{hello "world"}
|
||||
%<hello <"world">>
|
||||
%|hello "world"|
|
||||
|
||||
---tokens---
|
||||
'%(' Literal.String.Other
|
||||
'hello ' Literal.String.Other
|
||||
'(' Literal.String.Other
|
||||
'"world"' Literal.String.Other
|
||||
')' Literal.String.Other
|
||||
')' Literal.String.Other
|
||||
'\n' Text.Whitespace
|
||||
|
||||
'%[' Literal.String.Other
|
||||
'hello ' Literal.String.Other
|
||||
'[' Literal.String.Other
|
||||
'"world"' Literal.String.Other
|
||||
']' Literal.String.Other
|
||||
']' Literal.String.Other
|
||||
'\n' Text.Whitespace
|
||||
|
||||
'%{' Literal.String.Other
|
||||
'hello "world"' Literal.String.Other
|
||||
'}' Literal.String.Other
|
||||
'\n' Text.Whitespace
|
||||
|
||||
'%<' Literal.String.Other
|
||||
'hello ' Literal.String.Other
|
||||
'<' Literal.String.Other
|
||||
'"world"' Literal.String.Other
|
||||
'>' Literal.String.Other
|
||||
'>' Literal.String.Other
|
||||
'\n' Text.Whitespace
|
||||
|
||||
'%|' Literal.String.Other
|
||||
'hello "world"' Literal.String.Other
|
||||
'|' Literal.String.Other
|
||||
'\n' Text.Whitespace
|
@ -1,31 +0,0 @@
|
||||
---input---
|
||||
%Q(hello \n #{name})
|
||||
%q(hello \n #{name})
|
||||
%w(foo\nbar baz)
|
||||
|
||||
---tokens---
|
||||
'%Q(' Literal.String.Other
|
||||
'hello ' Literal.String.Other
|
||||
'\\n' Literal.String.Escape
|
||||
' ' Literal.String.Other
|
||||
'#{' Literal.String.Interpol
|
||||
'name' Name
|
||||
'}' Literal.String.Interpol
|
||||
')' Literal.String.Other
|
||||
'\n' Text.Whitespace
|
||||
|
||||
'%q(' Literal.String.Other
|
||||
'hello ' Literal.String.Other
|
||||
'\\' Literal.String.Other
|
||||
'n ' Literal.String.Other
|
||||
'#' Literal.String.Other
|
||||
'{name}' Literal.String.Other
|
||||
')' Literal.String.Other
|
||||
'\n' Text.Whitespace
|
||||
|
||||
'%w(' Literal.String.Other
|
||||
'foo' Literal.String.Other
|
||||
'\\' Literal.String.Other
|
||||
'nbar baz' Literal.String.Other
|
||||
')' Literal.String.Other
|
||||
'\n' Text.Whitespace
|
@ -1,20 +0,0 @@
|
||||
---input---
|
||||
record Cls do
|
||||
def_equals s
|
||||
end
|
||||
|
||||
---tokens---
|
||||
'record' Name.Builtin.Pseudo
|
||||
' ' Text.Whitespace
|
||||
'Cls' Name
|
||||
' ' Text.Whitespace
|
||||
'do' Keyword
|
||||
'\n' Text.Whitespace
|
||||
|
||||
'def_equals' Name.Builtin.Pseudo
|
||||
' ' Text.Whitespace
|
||||
's' Name
|
||||
'\n' Text.Whitespace
|
||||
|
||||
'end' Keyword
|
||||
'\n' Text.Whitespace
|
@ -1,50 +0,0 @@
|
||||
---input---
|
||||
def f(x : T, line = __LINE__) forall T
|
||||
if x.is_a?(String)
|
||||
pp! x
|
||||
end
|
||||
end
|
||||
|
||||
---tokens---
|
||||
'def' Keyword
|
||||
' ' Text.Whitespace
|
||||
'f' Name.Function
|
||||
'(' Punctuation
|
||||
'x' Name
|
||||
' ' Text.Whitespace
|
||||
':' Punctuation
|
||||
' ' Text.Whitespace
|
||||
'T' Name
|
||||
',' Punctuation
|
||||
' ' Text.Whitespace
|
||||
'line' Name
|
||||
' ' Text.Whitespace
|
||||
'=' Operator
|
||||
' ' Text.Whitespace
|
||||
'__LINE__' Keyword.Pseudo
|
||||
')' Punctuation
|
||||
' ' Text.Whitespace
|
||||
'forall' Keyword.Pseudo
|
||||
' ' Text.Whitespace
|
||||
'T' Name
|
||||
'\n' Text.Whitespace
|
||||
|
||||
'if' Keyword
|
||||
' ' Text.Whitespace
|
||||
'x' Name
|
||||
'.is_a?' Keyword.Pseudo
|
||||
'(' Punctuation
|
||||
'String' Name
|
||||
')' Punctuation
|
||||
'\n' Text.Whitespace
|
||||
|
||||
'pp!' Name.Builtin.Pseudo
|
||||
' ' Text.Whitespace
|
||||
'x' Name
|
||||
'\n' Text.Whitespace
|
||||
|
||||
'end' Keyword
|
||||
'\n' Text.Whitespace
|
||||
|
||||
'end' Keyword
|
||||
'\n' Text.Whitespace
|
@ -1,8 +0,0 @@
|
||||
---input---
|
||||
1...3
|
||||
|
||||
---tokens---
|
||||
'1' Literal.Number.Integer
|
||||
'...' Operator
|
||||
'3' Literal.Number.Integer
|
||||
'\n' Text.Whitespace
|
@ -1,10 +0,0 @@
|
||||
---input---
|
||||
1 .. 3
|
||||
|
||||
---tokens---
|
||||
'1' Literal.Number.Integer
|
||||
' ' Text.Whitespace
|
||||
'..' Operator
|
||||
' ' Text.Whitespace
|
||||
'3' Literal.Number.Integer
|
||||
'\n' Text.Whitespace
|
@ -1,58 +0,0 @@
|
||||
---input---
|
||||
"a\nz"
|
||||
"a\az"
|
||||
"a\xffz"
|
||||
"a\u1234z"
|
||||
"a\000z"
|
||||
"a\u{0}z"
|
||||
"a\u{10AfF9}z"
|
||||
|
||||
---tokens---
|
||||
'"' Literal.String.Double
|
||||
'a' Literal.String.Double
|
||||
'\\n' Literal.String.Escape
|
||||
'z' Literal.String.Double
|
||||
'"' Literal.String.Double
|
||||
'\n' Text.Whitespace
|
||||
|
||||
'"' Literal.String.Double
|
||||
'a' Literal.String.Double
|
||||
'\\a' Literal.String.Escape
|
||||
'z' Literal.String.Double
|
||||
'"' Literal.String.Double
|
||||
'\n' Text.Whitespace
|
||||
|
||||
'"' Literal.String.Double
|
||||
'a' Literal.String.Double
|
||||
'\\xff' Literal.String.Escape
|
||||
'z' Literal.String.Double
|
||||
'"' Literal.String.Double
|
||||
'\n' Text.Whitespace
|
||||
|
||||
'"' Literal.String.Double
|
||||
'a' Literal.String.Double
|
||||
'\\u1234' Literal.String.Escape
|
||||
'z' Literal.String.Double
|
||||
'"' Literal.String.Double
|
||||
'\n' Text.Whitespace
|
||||
|
||||
'"' Literal.String.Double
|
||||
'a' Literal.String.Double
|
||||
'\\000' Literal.String.Escape
|
||||
'z' Literal.String.Double
|
||||
'"' Literal.String.Double
|
||||
'\n' Text.Whitespace
|
||||
|
||||
'"' Literal.String.Double
|
||||
'a' Literal.String.Double
|
||||
'\\u{0}' Literal.String.Escape
|
||||
'z' Literal.String.Double
|
||||
'"' Literal.String.Double
|
||||
'\n' Text.Whitespace
|
||||
|
||||
'"' Literal.String.Double
|
||||
'a' Literal.String.Double
|
||||
'\\u{10AfF9}' Literal.String.Escape
|
||||
'z' Literal.String.Double
|
||||
'"' Literal.String.Double
|
||||
'\n' Text.Whitespace
|
@ -1,20 +0,0 @@
|
||||
---input---
|
||||
:sym_bol
|
||||
:あ
|
||||
:question?
|
||||
:"symbol"
|
||||
|
||||
---tokens---
|
||||
':sym_bol' Literal.String.Symbol
|
||||
'\n' Text.Whitespace
|
||||
|
||||
':あ' Literal.String.Symbol
|
||||
'\n' Text.Whitespace
|
||||
|
||||
':question?' Literal.String.Symbol
|
||||
'\n' Text.Whitespace
|
||||
|
||||
':"' Literal.String.Symbol
|
||||
'symbol' Literal.String.Symbol
|
||||
'"' Literal.String.Symbol
|
||||
'\n' Text.Whitespace
|
154
src/actions.cr
154
src/actions.cr
@ -1,5 +1,4 @@
|
||||
require "./actions"
|
||||
require "./constants"
|
||||
require "./formatter"
|
||||
require "./rules"
|
||||
require "./styles"
|
||||
@ -9,12 +8,33 @@ require "./tartrazine"
|
||||
# perform a list of actions. These actions can emit tokens
|
||||
# or change the state machine.
|
||||
module Tartrazine
|
||||
class Action
|
||||
property type : String
|
||||
property xml : XML::Node
|
||||
enum ActionType
|
||||
Bygroups
|
||||
Combined
|
||||
Include
|
||||
Pop
|
||||
Push
|
||||
Token
|
||||
Using
|
||||
Usingbygroup
|
||||
Usingself
|
||||
end
|
||||
|
||||
struct Action
|
||||
property actions : Array(Action) = [] of Action
|
||||
|
||||
def initialize(@type : String, @xml : XML::Node?)
|
||||
@content_index : Array(Int32) = [] of Int32
|
||||
@depth : Int32 = 0
|
||||
@lexer_index : Int32 = 0
|
||||
@lexer_name : String = ""
|
||||
@states : Array(String) = [] of String
|
||||
@states_to_push : Array(String) = [] of String
|
||||
@token_type : String = ""
|
||||
@type : ActionType = ActionType::Token
|
||||
|
||||
def initialize(t : String, xml : XML::Node?)
|
||||
@type = ActionType.parse(t.capitalize)
|
||||
|
||||
# Some actions may have actions in them, like this:
|
||||
# <bygroups>
|
||||
# <token type="GenericPrompt"/>
|
||||
@ -24,48 +44,56 @@ module Tartrazine
|
||||
#
|
||||
# The token actions match with the first 2 groups in the regex
|
||||
# the using action matches the 3rd and shunts it to another lexer
|
||||
@xml.children.each do |node|
|
||||
xml.children.each do |node|
|
||||
next unless node.element?
|
||||
@actions << Action.new(node.name, node)
|
||||
end
|
||||
|
||||
# Prefetch the attributes we ned from the XML and keep them
|
||||
case @type
|
||||
when ActionType::Token
|
||||
@token_type = xml["type"]
|
||||
when ActionType::Push
|
||||
@states_to_push = xml.attributes.select { |attrib|
|
||||
attrib.name == "state"
|
||||
}.map &.content
|
||||
when ActionType::Pop
|
||||
@depth = xml["depth"].to_i
|
||||
when ActionType::Using
|
||||
@lexer_name = xml["lexer"].downcase
|
||||
when ActionType::Combined
|
||||
@states = xml.attributes.select { |attrib|
|
||||
attrib.name == "state"
|
||||
}.map &.content
|
||||
when ActionType::Usingbygroup
|
||||
@lexer_index = xml["lexer"].to_i
|
||||
@content_index = xml["content"].split(",").map(&.to_i)
|
||||
end
|
||||
end
|
||||
|
||||
# ameba:disable Metrics/CyclomaticComplexity
|
||||
def emit(match : Regex::MatchData?, lexer : Lexer, match_group = 0) : Array(Token)
|
||||
case type
|
||||
when "token"
|
||||
raise Exception.new "Can't have a token without a match" if match.nil?
|
||||
[Token.new(type: xml["type"], value: match[match_group])]
|
||||
when "push"
|
||||
states_to_push = xml.attributes.select { |attrib|
|
||||
attrib.name == "state"
|
||||
}.map &.content
|
||||
if states_to_push.empty?
|
||||
# Push without a state means push the current state
|
||||
states_to_push = [lexer.state_stack.last]
|
||||
end
|
||||
states_to_push.each do |state|
|
||||
if state == "#pop"
|
||||
def emit(match : MatchData, tokenizer : Tokenizer, match_group = 0) : Array(Token)
|
||||
case @type
|
||||
when ActionType::Token
|
||||
raise Exception.new "Can't have a token without a match" if match.empty?
|
||||
[Token.new(type: @token_type, value: String.new(match[match_group].value))]
|
||||
when ActionType::Push
|
||||
to_push = @states_to_push.empty? ? [tokenizer.state_stack.last] : @states_to_push
|
||||
to_push.each do |state|
|
||||
if state == "#pop" && tokenizer.state_stack.size > 1
|
||||
# Pop the state
|
||||
Log.trace { "Popping state" }
|
||||
lexer.state_stack.pop
|
||||
tokenizer.state_stack.pop
|
||||
else
|
||||
# Really push
|
||||
lexer.state_stack << state
|
||||
Log.trace { "Pushed #{lexer.state_stack}" }
|
||||
tokenizer.state_stack << state
|
||||
end
|
||||
end
|
||||
[] of Token
|
||||
when "pop"
|
||||
depth = xml["depth"].to_i
|
||||
Log.trace { "Popping #{depth} states" }
|
||||
if lexer.state_stack.size <= depth
|
||||
Log.trace { "Can't pop #{depth} states, only have #{lexer.state_stack.size}" }
|
||||
else
|
||||
lexer.state_stack.pop(depth)
|
||||
end
|
||||
when ActionType::Pop
|
||||
to_pop = [@depth, tokenizer.state_stack.size - 1].min
|
||||
tokenizer.state_stack.pop(to_pop)
|
||||
[] of Token
|
||||
when "bygroups"
|
||||
when ActionType::Bygroups
|
||||
# FIXME: handle
|
||||
# ><bygroups>
|
||||
# <token type="Punctuation"/>
|
||||
@ -80,38 +108,50 @@ module Tartrazine
|
||||
# the action is skipped.
|
||||
result = [] of Token
|
||||
@actions.each_with_index do |e, i|
|
||||
next if match[i + 1]?.nil?
|
||||
result += e.emit(match, lexer, i + 1)
|
||||
begin
|
||||
next if match[i + 1].size == 0
|
||||
rescue IndexError
|
||||
# FIXME: This should not actually happen
|
||||
# No match for this group
|
||||
next
|
||||
end
|
||||
result += e.emit(match, tokenizer, i + 1)
|
||||
end
|
||||
result
|
||||
when "using"
|
||||
when ActionType::Using
|
||||
# Shunt to another lexer entirely
|
||||
return [] of Token if match.nil?
|
||||
lexer_name = xml["lexer"].downcase
|
||||
Log.trace { "to tokenize: #{match[match_group]}" }
|
||||
Tartrazine.lexer(lexer_name).tokenize(match[match_group], usingself: true)
|
||||
when "usingself"
|
||||
return [] of Token if match.empty?
|
||||
Tartrazine.lexer(@lexer_name).tokenizer(
|
||||
String.new(match[match_group].value),
|
||||
secondary: true).to_a
|
||||
when ActionType::Usingself
|
||||
# Shunt to another copy of this lexer
|
||||
return [] of Token if match.nil?
|
||||
|
||||
new_lexer = Lexer.from_xml(lexer.xml)
|
||||
Log.trace { "to tokenize: #{match[match_group]}" }
|
||||
new_lexer.tokenize(match[match_group], usingself: true)
|
||||
when "combined"
|
||||
# Combine two states into one anonymous state
|
||||
states = xml.attributes.select { |attrib|
|
||||
attrib.name == "state"
|
||||
}.map &.content
|
||||
new_state = states.map { |name|
|
||||
lexer.states[name]
|
||||
return [] of Token if match.empty?
|
||||
tokenizer.lexer.tokenizer(
|
||||
String.new(match[match_group].value),
|
||||
secondary: true).to_a
|
||||
when ActionType::Combined
|
||||
# Combine two or more states into one anonymous state
|
||||
new_state = @states.map { |name|
|
||||
tokenizer.lexer.states[name]
|
||||
}.reduce { |state1, state2|
|
||||
state1 + state2
|
||||
}
|
||||
lexer.states[new_state.name] = new_state
|
||||
lexer.state_stack << new_state.name
|
||||
tokenizer.lexer.states[new_state.name] = new_state
|
||||
tokenizer.state_stack << new_state.name
|
||||
[] of Token
|
||||
when ActionType::Usingbygroup
|
||||
# Shunt to content-specified lexer
|
||||
return [] of Token if match.empty?
|
||||
content = ""
|
||||
@content_index.each do |i|
|
||||
content += String.new(match[i].value)
|
||||
end
|
||||
Tartrazine.lexer(String.new(match[@lexer_index].value)).tokenizer(
|
||||
content,
|
||||
secondary: true).to_a
|
||||
else
|
||||
raise Exception.new("Unknown action type: #{type}: #{xml}")
|
||||
raise Exception.new("Unknown action type: #{@type}")
|
||||
end
|
||||
end
|
||||
end
|
||||
|
73
src/bytes_regex.cr
Normal file
73
src/bytes_regex.cr
Normal file
@ -0,0 +1,73 @@
|
||||
module BytesRegex
|
||||
extend self
|
||||
|
||||
class Regex
|
||||
def initialize(pattern : String, multiline = false, dotall = false, ignorecase = false, anchored = false)
|
||||
flags = LibPCRE2::UTF | LibPCRE2::UCP | LibPCRE2::NO_UTF_CHECK
|
||||
flags |= LibPCRE2::MULTILINE if multiline
|
||||
flags |= LibPCRE2::DOTALL if dotall
|
||||
flags |= LibPCRE2::CASELESS if ignorecase
|
||||
flags |= LibPCRE2::ANCHORED if anchored
|
||||
if @re = LibPCRE2.compile(
|
||||
pattern,
|
||||
pattern.bytesize,
|
||||
flags,
|
||||
out errorcode,
|
||||
out erroroffset,
|
||||
nil)
|
||||
else
|
||||
msg = String.new(256) do |buffer|
|
||||
bytesize = LibPCRE2.get_error_message(errorcode, buffer, 256)
|
||||
{bytesize, 0}
|
||||
end
|
||||
raise Exception.new "Error #{msg} compiling regex at offset #{erroroffset}"
|
||||
end
|
||||
@match_data = LibPCRE2.match_data_create_from_pattern(@re, nil)
|
||||
end
|
||||
|
||||
def finalize
|
||||
LibPCRE2.match_data_free(@match_data)
|
||||
LibPCRE2.code_free(@re)
|
||||
end
|
||||
|
||||
def match(str : Bytes, pos = 0) : Array(Match)
|
||||
rc = LibPCRE2.match(
|
||||
@re,
|
||||
str,
|
||||
str.size,
|
||||
pos,
|
||||
LibPCRE2::NO_UTF_CHECK,
|
||||
@match_data,
|
||||
nil)
|
||||
if rc > 0
|
||||
ovector = LibPCRE2.get_ovector_pointer(@match_data)
|
||||
(0...rc).map do |i|
|
||||
m_start = ovector[2 * i]
|
||||
m_end = ovector[2 * i + 1]
|
||||
if m_start == m_end
|
||||
m_value = Bytes.new(0)
|
||||
else
|
||||
m_value = str[m_start...m_end]
|
||||
end
|
||||
Match.new(m_value, m_start, m_end - m_start)
|
||||
end
|
||||
else
|
||||
[] of Match
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
struct Match
|
||||
property value : Bytes
|
||||
property start : UInt64
|
||||
property size : UInt64
|
||||
|
||||
def initialize(@value : Bytes, @start : UInt64, @size : UInt64)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# pattern = "foo"
|
||||
# str = "foo bar"
|
||||
# re = BytesRegex::Regex.new(pattern)
|
||||
# p! String.new(re.match(str.to_slice)[0].value)
|
1184
src/constants/lexers.cr
Normal file
1184
src/constants/lexers.cr
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,92 +1,100 @@
|
||||
module Tartrazine
|
||||
Abbreviations = {
|
||||
"Background" => "b",
|
||||
"Text" => "t",
|
||||
"CodeLine" => "cl",
|
||||
"Comment" => "c",
|
||||
"CommentHashbang" => "ch",
|
||||
"CommentMultiline" => "cm",
|
||||
"CommentPreproc" => "cp",
|
||||
"CommentPreprocFile" => "cpf",
|
||||
"CommentSingle" => "cs",
|
||||
"CommentSpecial" => "cs",
|
||||
"NameVariable" => "nv",
|
||||
"Keyword" => "k",
|
||||
"NameFunction" => "nf",
|
||||
"Punctuation" => "p",
|
||||
"Operator" => "o",
|
||||
"LiteralNumberInteger" => "lni",
|
||||
"NameBuiltin" => "nb",
|
||||
"Name" => "n",
|
||||
"OperatorWord" => "ow",
|
||||
"LiteralStringSingle" => "lss",
|
||||
"Literal" => "l",
|
||||
"NameClass" => "nc",
|
||||
"CommentMultiline" => "cm",
|
||||
"LiteralStringRegex" => "lsr",
|
||||
"KeywordDeclaration" => "kd",
|
||||
"KeywordConstant" => "kc",
|
||||
"NameOther" => "no",
|
||||
"LiteralNumberFloat" => "lnf",
|
||||
"LiteralNumberHex" => "lnh",
|
||||
"LiteralStringDouble" => "lsd",
|
||||
"KeywordType" => "kt",
|
||||
"NameNamespace" => "nn",
|
||||
"NameAttribute" => "na",
|
||||
"KeywordReserved" => "kr",
|
||||
"CommentPreproc" => "cp",
|
||||
"KeywordNamespace" => "kn",
|
||||
"NameConstant" => "nc",
|
||||
"NameLabel" => "nl",
|
||||
"LiteralString" => "ls",
|
||||
"LiteralStringChar" => "lsc",
|
||||
"TextWhitespace" => "tw",
|
||||
"LiteralStringEscape" => "lse",
|
||||
"LiteralNumber" => "ln",
|
||||
"Other" => "o",
|
||||
"LiteralStringBoolean" => "lsb",
|
||||
"NameProperty" => "np",
|
||||
"Comment" => "c",
|
||||
"NameTag" => "nt",
|
||||
"LiteralStringOther" => "lso",
|
||||
"NameVariableGlobal" => "nvg",
|
||||
"NameBuiltinPseudo" => "nbp",
|
||||
"LiteralNumberBin" => "lnb",
|
||||
"KeywordPseudo" => "kp",
|
||||
"CommentPreprocFile" => "cpf",
|
||||
"LiteralStringAffix" => "lsa",
|
||||
"LiteralStringDelimiter" => "lsd",
|
||||
"LiteralNumberOct" => "lno",
|
||||
"Error" => "e",
|
||||
"Generic" => "g",
|
||||
"LiteralNumberIntegerLong" => "lnil",
|
||||
"NameDecorator" => "nd",
|
||||
"LiteralStringInterpol" => "lsi",
|
||||
"LiteralStringBacktick" => "lsb",
|
||||
"GenericPrompt" => "gp",
|
||||
"GenericOutput" => "go",
|
||||
"LiteralStringName" => "lsn",
|
||||
"LiteralStringHeredoc" => "lsh",
|
||||
"LiteralStringSymbol" => "lss",
|
||||
"NameVariableInstance" => "nvi",
|
||||
"LiteralOther" => "lo",
|
||||
"NameVariableClass" => "nvc",
|
||||
"NameOperator" => "no",
|
||||
"None" => "n",
|
||||
"LiteralStringDoc" => "lsd",
|
||||
"NameException" => "ne",
|
||||
"GenericSubheading" => "gs",
|
||||
"GenericStrong" => "gs",
|
||||
"GenericDeleted" => "gd",
|
||||
"GenericInserted" => "gi",
|
||||
"GenericHeading" => "gh",
|
||||
"NameEntity" => "ne",
|
||||
"NamePseudo" => "np",
|
||||
"CommentHashbang" => "ch",
|
||||
"TextPunctuation" => "tp",
|
||||
"NameVariableAnonymous" => "nva",
|
||||
"NameVariableMagic" => "nvm",
|
||||
"NameFunctionMagic" => "nfm",
|
||||
"GenericEmph" => "ge",
|
||||
"GenericUnderline" => "gu",
|
||||
"LiteralStringAtom" => "lsa",
|
||||
"LiteralDate" => "ld",
|
||||
"GenericError" => "ge",
|
||||
"TextSymbol" => "ts",
|
||||
"GenericHeading" => "gh",
|
||||
"GenericInserted" => "gi",
|
||||
"GenericOutput" => "go",
|
||||
"GenericPrompt" => "gp",
|
||||
"GenericStrong" => "gs",
|
||||
"GenericSubheading" => "gs",
|
||||
"GenericTraceback" => "gt",
|
||||
"GenericUnderline" => "gu",
|
||||
"Highlight" => "hl",
|
||||
"Keyword" => "k",
|
||||
"KeywordConstant" => "kc",
|
||||
"KeywordDeclaration" => "kd",
|
||||
"KeywordNamespace" => "kn",
|
||||
"KeywordPseudo" => "kp",
|
||||
"KeywordReserved" => "kr",
|
||||
"KeywordType" => "kt",
|
||||
"LineHighlight" => "lh",
|
||||
"LineNumbers" => "ln",
|
||||
"LineNumbersTable" => "lnt",
|
||||
"LineTable" => "lt",
|
||||
"LineTableTD" => "lttd",
|
||||
"Literal" => "l",
|
||||
"LiteralDate" => "ld",
|
||||
"LiteralNumber" => "ln",
|
||||
"LiteralNumberBin" => "lnb",
|
||||
"LiteralNumberFloat" => "lnf",
|
||||
"LiteralNumberHex" => "lnh",
|
||||
"LiteralNumberInteger" => "lni",
|
||||
"LiteralNumberIntegerLong" => "lnil",
|
||||
"LiteralNumberOct" => "lno",
|
||||
"LiteralOther" => "lo",
|
||||
"LiteralString" => "ls",
|
||||
"LiteralStringAffix" => "lsa",
|
||||
"LiteralStringAtom" => "lsa",
|
||||
"LiteralStringBacktick" => "lsb",
|
||||
"LiteralStringBoolean" => "lsb",
|
||||
"LiteralStringChar" => "lsc",
|
||||
"LiteralStringDelimiter" => "lsd",
|
||||
"LiteralStringDoc" => "lsd",
|
||||
"LiteralStringDouble" => "lsd",
|
||||
"LiteralStringEscape" => "lse",
|
||||
"LiteralStringHeredoc" => "lsh",
|
||||
"LiteralStringInterpol" => "lsi",
|
||||
"LiteralStringName" => "lsn",
|
||||
"LiteralStringOther" => "lso",
|
||||
"LiteralStringRegex" => "lsr",
|
||||
"LiteralStringSingle" => "lss",
|
||||
"LiteralStringSymbol" => "lss",
|
||||
"Name" => "n",
|
||||
"NameAttribute" => "na",
|
||||
"NameBuiltin" => "nb",
|
||||
"NameBuiltinPseudo" => "nbp",
|
||||
"NameClass" => "nc",
|
||||
"NameConstant" => "nc",
|
||||
"NameDecorator" => "nd",
|
||||
"NameEntity" => "ne",
|
||||
"NameException" => "ne",
|
||||
"NameFunction" => "nf",
|
||||
"NameFunctionMagic" => "nfm",
|
||||
"NameKeyword" => "nk",
|
||||
"NameLabel" => "nl",
|
||||
"NameNamespace" => "nn",
|
||||
"NameOperator" => "no",
|
||||
"NameOther" => "no",
|
||||
"NameProperty" => "np",
|
||||
"NamePseudo" => "np",
|
||||
"NameTag" => "nt",
|
||||
"NameVariable" => "nv",
|
||||
"NameVariableAnonymous" => "nva",
|
||||
"NameVariableClass" => "nvc",
|
||||
"NameVariableGlobal" => "nvg",
|
||||
"NameVariableInstance" => "nvi",
|
||||
"NameVariableMagic" => "nvm",
|
||||
"None" => "n",
|
||||
"Operator" => "o",
|
||||
"OperatorWord" => "ow",
|
||||
"Other" => "o",
|
||||
"Punctuation" => "p",
|
||||
"Text" => "t",
|
||||
"TextPunctuation" => "tp",
|
||||
"TextSymbol" => "ts",
|
||||
"TextWhitespace" => "tw",
|
||||
}
|
||||
end
|
104
src/formatter.cr
104
src/formatter.cr
@ -1,5 +1,4 @@
|
||||
require "./actions"
|
||||
require "./constants"
|
||||
require "./formatter"
|
||||
require "./rules"
|
||||
require "./styles"
|
||||
@ -10,102 +9,27 @@ module Tartrazine
|
||||
# This is the base class for all formatters.
|
||||
abstract class Formatter
|
||||
property name : String = ""
|
||||
property theme : Theme = Tartrazine.theme("default-dark")
|
||||
|
||||
def format(text : String, lexer : Lexer, theme : Theme) : String
|
||||
# Format the text using the given lexer.
|
||||
def format(text : String, lexer : Lexer, io : IO = nil) : Nil
|
||||
raise Exception.new("Not implemented")
|
||||
end
|
||||
|
||||
def get_style_defs(theme : Theme) : String
|
||||
def format(text : String, lexer : Lexer) : String
|
||||
outp = String::Builder.new("")
|
||||
format(text, lexer, outp)
|
||||
outp.to_s
|
||||
end
|
||||
|
||||
# Return the styles, if the formatter supports it.
|
||||
def style_defs : String
|
||||
raise Exception.new("Not implemented")
|
||||
end
|
||||
end
|
||||
|
||||
class Ansi < Formatter
|
||||
def format(text : String, lexer : Lexer, theme : Theme) : String
|
||||
output = String.build do |outp|
|
||||
lexer.tokenize(text).each do |token|
|
||||
outp << self.colorize(token[:value], token[:type], theme)
|
||||
end
|
||||
end
|
||||
output
|
||||
end
|
||||
|
||||
def colorize(text : String, token : String, theme : Theme) : String
|
||||
style = theme.styles.fetch(token, nil)
|
||||
return text if style.nil?
|
||||
if theme.styles.has_key?(token)
|
||||
s = theme.styles[token]
|
||||
else
|
||||
# Themes don't contain information for each specific
|
||||
# token type. However, they may contain information
|
||||
# for a parent style. Worst case, we go to the root
|
||||
# (Background) style.
|
||||
s = theme.styles[theme.style_parents(token).reverse.find { |parent|
|
||||
theme.styles.has_key?(parent)
|
||||
}]
|
||||
end
|
||||
colorized = text.colorize
|
||||
s.color.try { |c| colorized = colorized.fore(c.colorize) }
|
||||
# Intentionally not setting background color
|
||||
colorized.mode(:bold) if s.bold
|
||||
colorized.mode(:italic) if s.italic
|
||||
colorized.mode(:underline) if s.underline
|
||||
colorized.to_s
|
||||
end
|
||||
end
|
||||
|
||||
class Html < Formatter
|
||||
def format(text : String, lexer : Lexer, theme : Theme) : String
|
||||
output = String.build do |outp|
|
||||
outp << "<html><head><style>"
|
||||
outp << get_style_defs(theme)
|
||||
outp << "</style></head><body>"
|
||||
outp << "<pre class=\"#{get_css_class("Background", theme)}\"><code class=\"#{get_css_class("Background", theme)}\">"
|
||||
lexer.tokenize(text).each do |token|
|
||||
fragment = "<span class=\"#{get_css_class(token[:type], theme)}\">#{token[:value]}</span>"
|
||||
outp << fragment
|
||||
end
|
||||
outp << "</code></pre></body></html>"
|
||||
end
|
||||
output
|
||||
end
|
||||
|
||||
# ameba:disable Metrics/CyclomaticComplexity
|
||||
def get_style_defs(theme : Theme) : String
|
||||
output = String.build do |outp|
|
||||
theme.styles.each do |token, style|
|
||||
outp << ".#{get_css_class(token, theme)} {"
|
||||
# These are set or nil
|
||||
outp << "color: #{style.color.try &.hex};" if style.color
|
||||
outp << "background-color: #{style.background.try &.hex};" if style.background
|
||||
outp << "border: 1px solid #{style.border.try &.hex};" if style.border
|
||||
|
||||
# These are true/false/nil
|
||||
outp << "border: none;" if style.border == false
|
||||
outp << "font-weight: bold;" if style.bold
|
||||
outp << "font-weight: 400;" if style.bold == false
|
||||
outp << "font-style: italic;" if style.italic
|
||||
outp << "font-style: normal;" if style.italic == false
|
||||
outp << "text-decoration: underline;" if style.underline
|
||||
outp << "text-decoration: none;" if style.underline == false
|
||||
|
||||
outp << "}"
|
||||
end
|
||||
end
|
||||
output
|
||||
end
|
||||
|
||||
# Given a token type, return the CSS class to use.
|
||||
def get_css_class(token, theme)
|
||||
return Abbreviations[token] if theme.styles.has_key?(token)
|
||||
|
||||
# Themes don't contain information for each specific
|
||||
# token type. However, they may contain information
|
||||
# for a parent style. Worst case, we go to the root
|
||||
# (Background) style.
|
||||
Abbreviations[theme.style_parents(token).reverse.find { |parent|
|
||||
theme.styles.has_key?(parent)
|
||||
}]
|
||||
# Is this line in the highlighted ranges?
|
||||
def highlighted?(line : Int) : Bool
|
||||
highlight_lines.any?(&.includes?(line))
|
||||
end
|
||||
end
|
||||
end
|
||||
|
57
src/formatters/ansi.cr
Normal file
57
src/formatters/ansi.cr
Normal file
@ -0,0 +1,57 @@
|
||||
require "../formatter"
|
||||
|
||||
module Tartrazine
|
||||
def self.to_ansi(text : String, language : String,
|
||||
theme : String = "default-dark",
|
||||
line_numbers : Bool = false) : String
|
||||
Tartrazine::Ansi.new(
|
||||
theme: Tartrazine.theme(theme),
|
||||
line_numbers: line_numbers
|
||||
).format(text, Tartrazine.lexer(name: language))
|
||||
end
|
||||
|
||||
class Ansi < Formatter
|
||||
property? line_numbers : Bool = false
|
||||
|
||||
def initialize(@theme : Theme = Tartrazine.theme("default-dark"), @line_numbers : Bool = false)
|
||||
end
|
||||
|
||||
private def line_label(i : Int32) : String
|
||||
"#{i + 1}".rjust(4).ljust(5)
|
||||
end
|
||||
|
||||
def format(text : String, lexer : BaseLexer, outp : IO) : Nil
|
||||
tokenizer = lexer.tokenizer(text)
|
||||
i = 0
|
||||
outp << line_label(i) if line_numbers?
|
||||
tokenizer.each do |token|
|
||||
outp << colorize(token[:value], token[:type])
|
||||
if token[:value].includes?("\n")
|
||||
i += 1
|
||||
outp << line_label(i) if line_numbers?
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def colorize(text : String, token : String) : String
|
||||
if theme.styles.has_key?(token)
|
||||
s = theme.styles[token]
|
||||
else
|
||||
# Themes don't contain information for each specific
|
||||
# token type. However, they may contain information
|
||||
# for a parent style. Worst case, we go to the root
|
||||
# (Background) style.
|
||||
s = theme.styles[theme.style_parents(token).reverse.find { |parent|
|
||||
theme.styles.has_key?(parent)
|
||||
}]
|
||||
end
|
||||
colorized = text.colorize
|
||||
s.color.try { |col| colorized = colorized.fore(col.colorize) }
|
||||
# Intentionally not setting background color
|
||||
colorized.mode(:bold) if s.bold
|
||||
colorized.mode(:italic) if s.italic
|
||||
colorized.mode(:underline) if s.underline
|
||||
colorized.to_s
|
||||
end
|
||||
end
|
||||
end
|
149
src/formatters/html.cr
Normal file
149
src/formatters/html.cr
Normal file
@ -0,0 +1,149 @@
|
||||
require "../constants/token_abbrevs.cr"
|
||||
require "../formatter"
|
||||
require "html"
|
||||
|
||||
module Tartrazine
|
||||
def self.to_html(text : String, language : String,
|
||||
theme : String = "default-dark",
|
||||
standalone : Bool = true,
|
||||
line_numbers : Bool = false) : String
|
||||
Tartrazine::Html.new(
|
||||
theme: Tartrazine.theme(theme),
|
||||
standalone: standalone,
|
||||
line_numbers: line_numbers
|
||||
).format(text, Tartrazine.lexer(name: language))
|
||||
end
|
||||
|
||||
class Html < Formatter
|
||||
# property line_number_in_table : Bool = false
|
||||
# property with_classes : Bool = true
|
||||
property class_prefix : String = ""
|
||||
property highlight_lines : Array(Range(Int32, Int32)) = [] of Range(Int32, Int32)
|
||||
property line_number_id_prefix : String = "line-"
|
||||
property line_number_start : Int32 = 1
|
||||
property tab_width = 8
|
||||
property? line_numbers : Bool = false
|
||||
property? linkable_line_numbers : Bool = true
|
||||
property? standalone : Bool = false
|
||||
property? surrounding_pre : Bool = true
|
||||
property? wrap_long_lines : Bool = false
|
||||
property weight_of_bold : Int32 = 600
|
||||
property template : String = <<-TEMPLATE
|
||||
<!DOCTYPE html><html><head><style>
|
||||
{{style_defs}}
|
||||
</style></head><body>
|
||||
{{body}}
|
||||
</body></html>
|
||||
TEMPLATE
|
||||
|
||||
property theme : Theme
|
||||
|
||||
def initialize(@theme : Theme = Tartrazine.theme("default-dark"), *,
|
||||
@highlight_lines = [] of Range(Int32, Int32),
|
||||
@class_prefix : String = "",
|
||||
@line_number_id_prefix = "line-",
|
||||
@line_number_start = 1,
|
||||
@tab_width = 8,
|
||||
@line_numbers : Bool = false,
|
||||
@linkable_line_numbers : Bool = true,
|
||||
@standalone : Bool = false,
|
||||
@surrounding_pre : Bool = true,
|
||||
@wrap_long_lines : Bool = false,
|
||||
@weight_of_bold : Int32 = 600,
|
||||
@template : String = @template)
|
||||
end
|
||||
|
||||
def format(text : String, lexer : Lexer) : String
|
||||
outp = String::Builder.new("")
|
||||
format(text, lexer, outp)
|
||||
outp.to_s
|
||||
end
|
||||
|
||||
def format(text : String, lexer : BaseLexer, io : IO) : Nil
|
||||
pre, post = wrap_standalone
|
||||
io << pre if standalone?
|
||||
format_text(text, lexer, io)
|
||||
io << post if standalone?
|
||||
end
|
||||
|
||||
# Wrap text into a full HTML document, including the CSS for the theme
|
||||
def wrap_standalone
|
||||
output = String.build do |outp|
|
||||
if @template.includes? "{{style_defs}}"
|
||||
outp << @template.split("{{style_defs}}")[0]
|
||||
outp << style_defs
|
||||
outp << @template.split("{{style_defs}}")[1].split("{{body}}")[0]
|
||||
else
|
||||
outp << @template.split("{{body}}")[0]
|
||||
end
|
||||
end
|
||||
{output.to_s, @template.split("{{body}}")[1]}
|
||||
end
|
||||
|
||||
private def line_label(i : Int32) : String
|
||||
line_label = "#{i + 1}".rjust(4).ljust(5)
|
||||
line_class = highlighted?(i + 1) ? "class=\"#{get_css_class("LineHighlight")}\"" : ""
|
||||
line_id = linkable_line_numbers? ? "id=\"#{line_number_id_prefix}#{i + 1}\"" : ""
|
||||
"<span #{line_id} #{line_class} style=\"user-select: none;\">#{line_label} </span>"
|
||||
end
|
||||
|
||||
def format_text(text : String, lexer : BaseLexer, outp : IO)
|
||||
tokenizer = lexer.tokenizer(text)
|
||||
i = 0
|
||||
if surrounding_pre?
|
||||
pre_style = wrap_long_lines? ? "style=\"white-space: pre-wrap; word-break: break-word;\"" : ""
|
||||
outp << "<pre class=\"#{get_css_class("Background")}\" #{pre_style}>"
|
||||
end
|
||||
outp << "<code class=\"#{get_css_class("Background")}\">"
|
||||
outp << line_label(i) if line_numbers?
|
||||
tokenizer.each do |token|
|
||||
outp << "<span class=\"#{get_css_class(token[:type])}\">#{HTML.escape(token[:value])}</span>"
|
||||
if token[:value].ends_with? "\n"
|
||||
i += 1
|
||||
outp << line_label(i) if line_numbers?
|
||||
end
|
||||
end
|
||||
outp << "</code></pre>"
|
||||
end
|
||||
|
||||
# ameba:disable Metrics/CyclomaticComplexity
|
||||
def style_defs : String
|
||||
output = String.build do |outp|
|
||||
theme.styles.each do |token, style|
|
||||
outp << ".#{get_css_class(token)} {"
|
||||
# These are set or nil
|
||||
outp << "color: ##{style.color.try &.hex};" if style.color
|
||||
outp << "background-color: ##{style.background.try &.hex};" if style.background
|
||||
outp << "border: 1px solid ##{style.border.try &.hex};" if style.border
|
||||
|
||||
# These are true/false/nil
|
||||
outp << "border: none;" if style.border == false
|
||||
outp << "font-weight: #{@weight_of_bold};" if style.bold
|
||||
outp << "font-style: italic;" if style.italic
|
||||
outp << "font-style: normal;" if style.italic == false
|
||||
outp << "text-decoration: underline;" if style.underline
|
||||
outp << "text-decoration: none;" if style.underline == false
|
||||
outp << "tab-size: #{tab_width};" if token == "Background"
|
||||
|
||||
outp << "}"
|
||||
end
|
||||
end
|
||||
output
|
||||
end
|
||||
|
||||
# Given a token type, return the CSS class to use.
|
||||
def get_css_class(token : String) : String
|
||||
if !theme.styles.has_key? token
|
||||
# Themes don't contain information for each specific
|
||||
# token type. However, they may contain information
|
||||
# for a parent style. Worst case, we go to the root
|
||||
# (Background) style.
|
||||
parent = theme.style_parents(token).reverse.find { |dad|
|
||||
theme.styles.has_key?(dad)
|
||||
}
|
||||
theme.styles[token] = theme.styles[parent]
|
||||
end
|
||||
class_prefix + Abbreviations[token]
|
||||
end
|
||||
end
|
||||
end
|
18
src/formatters/json.cr
Normal file
18
src/formatters/json.cr
Normal file
@ -0,0 +1,18 @@
|
||||
require "../formatter"
|
||||
|
||||
module Tartrazine
|
||||
class Json < Formatter
|
||||
property name = "json"
|
||||
|
||||
def format(text : String, lexer : BaseLexer) : String
|
||||
outp = String::Builder.new("")
|
||||
format(text, lexer, outp)
|
||||
outp.to_s
|
||||
end
|
||||
|
||||
def format(text : String, lexer : BaseLexer, io : IO) : Nil
|
||||
tokenizer = lexer.tokenizer(text)
|
||||
io << Tartrazine::RegexLexer.collapse_tokens(tokenizer.to_a).to_json
|
||||
end
|
||||
end
|
||||
end
|
117
src/formatters/png.cr
Normal file
117
src/formatters/png.cr
Normal file
@ -0,0 +1,117 @@
|
||||
require "../formatter"
|
||||
require "compress/gzip"
|
||||
require "digest/sha1"
|
||||
require "stumpy_png"
|
||||
require "stumpy_utils"
|
||||
|
||||
module Tartrazine
|
||||
def self.to_png(text : String, language : String,
|
||||
theme : String = "default-dark",
|
||||
line_numbers : Bool = false) : String
|
||||
buf = IO::Memory.new
|
||||
|
||||
Tartrazine::Png.new(
|
||||
theme: Tartrazine.theme(theme),
|
||||
line_numbers: line_numbers
|
||||
).format(text, Tartrazine.lexer(name: language), buf)
|
||||
buf.to_s
|
||||
end
|
||||
|
||||
class FontFiles
|
||||
extend BakedFileSystem
|
||||
bake_folder "../../fonts", __DIR__
|
||||
end
|
||||
|
||||
class Png < Formatter
|
||||
include StumpyPNG
|
||||
property? line_numbers : Bool = false
|
||||
@font_regular : PCFParser::Font
|
||||
@font_bold : PCFParser::Font
|
||||
@font_oblique : PCFParser::Font
|
||||
@font_bold_oblique : PCFParser::Font
|
||||
@font_width = 15
|
||||
@font_height = 24
|
||||
|
||||
def initialize(@theme : Theme = Tartrazine.theme("default-dark"), @line_numbers : Bool = false)
|
||||
@font_regular = load_font("/courier-regular.pcf.gz")
|
||||
@font_bold = load_font("/courier-bold.pcf.gz")
|
||||
@font_oblique = load_font("/courier-oblique.pcf.gz")
|
||||
@font_bold_oblique = load_font("/courier-bold-oblique.pcf.gz")
|
||||
end
|
||||
|
||||
private def load_font(name : String) : PCFParser::Font
|
||||
compressed = FontFiles.get(name)
|
||||
uncompressed = Compress::Gzip::Reader.open(compressed) do |gzip|
|
||||
gzip.gets_to_end
|
||||
end
|
||||
PCFParser::Font.new(IO::Memory.new uncompressed)
|
||||
end
|
||||
|
||||
private def line_label(i : Int32) : String
|
||||
"#{i + 1}".rjust(4).ljust(5)
|
||||
end
|
||||
|
||||
def format(text : String, lexer : BaseLexer, outp : IO) : Nil
|
||||
# Create canvas of correct size
|
||||
lines = text.split("\n")
|
||||
canvas_height = lines.size * @font_height
|
||||
canvas_width = lines.max_of(&.size)
|
||||
canvas_width += 5 if line_numbers?
|
||||
canvas_width *= @font_width
|
||||
|
||||
bg_color = RGBA.from_hex("##{theme.styles["Background"].background.try &.hex}")
|
||||
canvas = Canvas.new(canvas_width, canvas_height, bg_color)
|
||||
|
||||
tokenizer = lexer.tokenizer(text)
|
||||
x = 0
|
||||
y = @font_height
|
||||
i = 0
|
||||
if line_numbers?
|
||||
canvas.text(x, y, line_label(i), @font_regular, RGBA.from_hex("##{theme.styles["Background"].color.try &.hex}"))
|
||||
x += 5 * @font_width
|
||||
end
|
||||
|
||||
tokenizer.each do |token|
|
||||
font, color = token_style(token[:type])
|
||||
# These fonts are very limited
|
||||
t = token[:value].gsub(/[^[:ascii:]]/, "?")
|
||||
canvas.text(x, y, t.rstrip("\n"), font, color)
|
||||
if token[:value].includes?("\n")
|
||||
x = 0
|
||||
y += @font_height
|
||||
i += 1
|
||||
if line_numbers?
|
||||
canvas.text(x, y, line_label(i), @font_regular, RGBA.from_hex("##{theme.styles["Background"].color.try &.hex}"))
|
||||
x += 4 * @font_width
|
||||
end
|
||||
end
|
||||
|
||||
x += token[:value].size * @font_width
|
||||
end
|
||||
|
||||
StumpyPNG.write(canvas, outp)
|
||||
end
|
||||
|
||||
def token_style(token : String) : {PCFParser::Font, RGBA}
|
||||
if theme.styles.has_key?(token)
|
||||
s = theme.styles[token]
|
||||
else
|
||||
# Themes don't contain information for each specific
|
||||
# token type. However, they may contain information
|
||||
# for a parent style. Worst case, we go to the root
|
||||
# (Background) style.
|
||||
s = theme.styles[theme.style_parents(token).reverse.find { |parent|
|
||||
theme.styles.has_key?(parent)
|
||||
}]
|
||||
end
|
||||
|
||||
color = RGBA.from_hex("##{theme.styles["Background"].color.try &.hex}")
|
||||
color = RGBA.from_hex("##{s.color.try &.hex}") if s.color
|
||||
|
||||
return {@font_bold_oblique, color} if s.bold && s.italic
|
||||
return {@font_bold, color} if s.bold
|
||||
return {@font_oblique, color} if s.italic
|
||||
return {@font_regular, color}
|
||||
end
|
||||
end
|
||||
end
|
129
src/formatters/svg.cr
Normal file
129
src/formatters/svg.cr
Normal file
@ -0,0 +1,129 @@
|
||||
require "../constants/token_abbrevs.cr"
|
||||
require "../formatter"
|
||||
require "html"
|
||||
|
||||
module Tartrazine
|
||||
def self.to_svg(text : String, language : String,
|
||||
theme : String = "default-dark",
|
||||
standalone : Bool = true,
|
||||
line_numbers : Bool = false) : String
|
||||
Tartrazine::Svg.new(
|
||||
theme: Tartrazine.theme(theme),
|
||||
standalone: standalone,
|
||||
line_numbers: line_numbers
|
||||
).format(text, Tartrazine.lexer(name: language))
|
||||
end
|
||||
|
||||
class Svg < Formatter
|
||||
property highlight_lines : Array(Range(Int32, Int32)) = [] of Range(Int32, Int32)
|
||||
property line_number_id_prefix : String = "line-"
|
||||
property line_number_start : Int32 = 1
|
||||
property tab_width = 8
|
||||
property? line_numbers : Bool = false
|
||||
property? linkable_line_numbers : Bool = true
|
||||
property? standalone : Bool = false
|
||||
property weight_of_bold : Int32 = 600
|
||||
property fs : Int32
|
||||
property ystep : Int32
|
||||
|
||||
property theme : Theme
|
||||
|
||||
def initialize(@theme : Theme = Tartrazine.theme("default-dark"), *,
|
||||
@highlight_lines = [] of Range(Int32, Int32),
|
||||
@class_prefix : String = "",
|
||||
@line_number_id_prefix = "line-",
|
||||
@line_number_start = 1,
|
||||
@tab_width = 8,
|
||||
@line_numbers : Bool = false,
|
||||
@linkable_line_numbers : Bool = true,
|
||||
@standalone : Bool = false,
|
||||
@weight_of_bold : Int32 = 600,
|
||||
@font_family : String = "monospace",
|
||||
@font_size : String = "14px")
|
||||
if font_size.ends_with? "px"
|
||||
@fs = font_size[0...-2].to_i
|
||||
else
|
||||
@fs = font_size.to_i
|
||||
end
|
||||
@ystep = @fs + 5
|
||||
end
|
||||
|
||||
def format(text : String, lexer : BaseLexer, io : IO) : Nil
|
||||
pre, post = wrap_standalone
|
||||
io << pre if standalone?
|
||||
format_text(text, lexer, io)
|
||||
io << post if standalone?
|
||||
end
|
||||
|
||||
# Wrap text into a full HTML document, including the CSS for the theme
|
||||
def wrap_standalone
|
||||
output = String.build do |outp|
|
||||
outp << %(<?xml version="1.0" encoding="utf-8"?>
|
||||
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.0//EN" "http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd">
|
||||
<svg xmlns="http://www.w3.org/2000/svg">
|
||||
<g font-family="#{self.@font_family}" font-size="#{self.@font_size}">)
|
||||
end
|
||||
{output.to_s, "</g></svg>"}
|
||||
end
|
||||
|
||||
private def line_label(i : Int32, x : Int32, y : Int32) : String
|
||||
line_label = "#{i + 1}".rjust(4).ljust(5)
|
||||
line_style = highlighted?(i + 1) ? "font-weight=\"#{@weight_of_bold}\"" : ""
|
||||
line_id = linkable_line_numbers? ? "id=\"#{line_number_id_prefix}#{i + 1}\"" : ""
|
||||
%(<text #{line_style} #{line_id} x="#{4*ystep}" y="#{y}" text-anchor="end">#{line_label}</text>)
|
||||
end
|
||||
|
||||
def format_text(text : String, lexer : BaseLexer, outp : IO)
|
||||
x = 0
|
||||
y = ystep
|
||||
i = 0
|
||||
line_x = x
|
||||
line_x += 5 * ystep if line_numbers?
|
||||
tokenizer = lexer.tokenizer(text)
|
||||
outp << line_label(i, x, y) if line_numbers?
|
||||
outp << %(<text x="#{line_x}" y="#{y}" xml:space="preserve">)
|
||||
tokenizer.each do |token|
|
||||
if token[:value].ends_with? "\n"
|
||||
outp << "<tspan #{get_style(token[:type])}>#{HTML.escape(token[:value][0...-1])}</tspan>"
|
||||
outp << "</text>"
|
||||
x = 0
|
||||
y += ystep
|
||||
i += 1
|
||||
outp << line_label(i, x, y) if line_numbers?
|
||||
outp << %(<text x="#{line_x}" y="#{y}" xml:space="preserve">)
|
||||
else
|
||||
outp << "<tspan#{get_style(token[:type])}>#{HTML.escape(token[:value])}</tspan>"
|
||||
x += token[:value].size * ystep
|
||||
end
|
||||
end
|
||||
outp << "</text>"
|
||||
end
|
||||
|
||||
# Given a token type, return the style.
|
||||
def get_style(token : String) : String
|
||||
if !theme.styles.has_key? token
|
||||
# Themes don't contain information for each specific
|
||||
# token type. However, they may contain information
|
||||
# for a parent style. Worst case, we go to the root
|
||||
# (Background) style.
|
||||
parent = theme.style_parents(token).reverse.find { |dad|
|
||||
theme.styles.has_key?(dad)
|
||||
}
|
||||
theme.styles[token] = theme.styles[parent]
|
||||
end
|
||||
output = String.build do |outp|
|
||||
style = theme.styles[token]
|
||||
outp << " fill=\"##{style.color.try &.hex}\"" if style.color
|
||||
# No support for background color or border in SVG
|
||||
|
||||
outp << " font-weight=\"#{@weight_of_bold}\"" if style.bold
|
||||
outp << " font-weight=\"normal\"" if style.bold == false
|
||||
outp << " font-style=\"italic\"" if style.italic
|
||||
outp << " font-style=\"normal\"" if style.italic == false
|
||||
outp << " text-decoration=\"underline\"" if style.underline
|
||||
outp << " text-decoration=\"none" if style.underline == false
|
||||
end
|
||||
output
|
||||
end
|
||||
end
|
||||
end
|
81
src/heuristics.cr
Normal file
81
src/heuristics.cr
Normal file
@ -0,0 +1,81 @@
|
||||
require "yaml"
|
||||
|
||||
# Use linguist's heuristics to disambiguate between languages
|
||||
# This is *shamelessly* stolen from https://github.com/github-linguist/linguist
|
||||
# and ported to Crystal. Deepest thanks to the authors of Linguist
|
||||
# for licensing it liberally.
|
||||
#
|
||||
# Consider this code (c) 2017 GitHub, Inc. even if I wrote it.
|
||||
module Linguist
|
||||
class Heuristic
|
||||
include YAML::Serializable
|
||||
|
||||
property disambiguations : Array(Disambiguation)
|
||||
property named_patterns : Hash(String, String | Array(String))
|
||||
|
||||
# Run the heuristics on the given filename and content
|
||||
def run(filename, content)
|
||||
ext = File.extname filename
|
||||
disambiguation = disambiguations.find do |item|
|
||||
item.extensions.includes? ext
|
||||
end
|
||||
disambiguation.try &.run(content, named_patterns)
|
||||
end
|
||||
end
|
||||
|
||||
class Disambiguation
|
||||
include YAML::Serializable
|
||||
property extensions : Array(String)
|
||||
property rules : Array(LangRule)
|
||||
|
||||
def run(content, named_patterns)
|
||||
rules.each do |rule|
|
||||
if rule.match(content, named_patterns)
|
||||
return rule.language
|
||||
end
|
||||
end
|
||||
nil
|
||||
end
|
||||
end
|
||||
|
||||
class LangRule
|
||||
include YAML::Serializable
|
||||
property pattern : (String | Array(String))?
|
||||
property negative_pattern : (String | Array(String))?
|
||||
property named_pattern : String?
|
||||
property and : Array(LangRule)?
|
||||
property language : String | Array(String)?
|
||||
|
||||
# ameba:disable Metrics/CyclomaticComplexity
|
||||
def match(content, named_patterns)
|
||||
# This rule matches without conditions
|
||||
return true if !pattern && !negative_pattern && !named_pattern && !and
|
||||
|
||||
if pattern
|
||||
p_arr = [] of String
|
||||
p_arr << pattern.as(String) if pattern.is_a? String
|
||||
p_arr = pattern.as(Array(String)) if pattern.is_a? Array(String)
|
||||
return true if p_arr.any? { |pat| ::Regex.new(pat).matches?(content) }
|
||||
end
|
||||
if negative_pattern
|
||||
p_arr = [] of String
|
||||
p_arr << negative_pattern.as(String) if negative_pattern.is_a? String
|
||||
p_arr = negative_pattern.as(Array(String)) if negative_pattern.is_a? Array(String)
|
||||
return true if p_arr.none? { |pat| ::Regex.new(pat).matches?(content) }
|
||||
end
|
||||
if named_pattern
|
||||
p_arr = [] of String
|
||||
if named_patterns[named_pattern].is_a? String
|
||||
p_arr << named_patterns[named_pattern].as(String)
|
||||
else
|
||||
p_arr = named_patterns[named_pattern].as(Array(String))
|
||||
end
|
||||
result = p_arr.any? { |pat| ::Regex.new(pat).matches?(content) }
|
||||
end
|
||||
if and
|
||||
result = and.as(Array(LangRule)).all?(&.match(content, named_patterns))
|
||||
end
|
||||
result
|
||||
end
|
||||
end
|
||||
end
|
433
src/lexer.cr
Normal file
433
src/lexer.cr
Normal file
@ -0,0 +1,433 @@
|
||||
require "./constants/lexers"
|
||||
require "./heuristics"
|
||||
require "baked_file_system"
|
||||
require "crystal/syntax_highlighter"
|
||||
|
||||
module Tartrazine
|
||||
class LexerFiles
|
||||
extend BakedFileSystem
|
||||
|
||||
macro bake_selected_lexers
|
||||
{% for lexer in env("TT_LEXERS").split "," %}
|
||||
bake_file {{ lexer }}+".xml", {{ read_file "#{__DIR__}/../lexers/" + lexer + ".xml" }}
|
||||
{% end %}
|
||||
end
|
||||
|
||||
{% if flag?(:nolexers) %}
|
||||
bake_selected_lexers
|
||||
{% else %}
|
||||
bake_folder "../lexers", __DIR__
|
||||
{% end %}
|
||||
end
|
||||
|
||||
# Get the lexer object for a language name
|
||||
def self.lexer(name : String? = nil, filename : String? = nil, mimetype : String? = nil) : BaseLexer
|
||||
return lexer_by_name(name) if name && name != "autodetect"
|
||||
return lexer_by_filename(filename) if filename
|
||||
return lexer_by_mimetype(mimetype) if mimetype
|
||||
|
||||
RegexLexer.from_xml(LexerFiles.get("/#{LEXERS_BY_NAME["plaintext"]}.xml").gets_to_end)
|
||||
end
|
||||
|
||||
private def self.lexer_by_mimetype(mimetype : String) : BaseLexer
|
||||
lexer_file_name = LEXERS_BY_MIMETYPE.fetch(mimetype, nil)
|
||||
raise Exception.new("Unknown mimetype: #{mimetype}") if lexer_file_name.nil?
|
||||
|
||||
RegexLexer.from_xml(LexerFiles.get("/#{lexer_file_name}.xml").gets_to_end)
|
||||
end
|
||||
|
||||
private def self.lexer_by_name(name : String) : BaseLexer
|
||||
return CrystalLexer.new if name == "crystal"
|
||||
lexer_file_name = LEXERS_BY_NAME.fetch(name.downcase, nil)
|
||||
return create_delegating_lexer(name) if lexer_file_name.nil? && name.includes? "+"
|
||||
raise Exception.new("Unknown lexer: #{name}") if lexer_file_name.nil?
|
||||
|
||||
RegexLexer.from_xml(LexerFiles.get("/#{lexer_file_name}.xml").gets_to_end)
|
||||
rescue ex : BakedFileSystem::NoSuchFileError
|
||||
raise Exception.new("Unknown lexer: #{name}")
|
||||
end
|
||||
|
||||
private def self.lexer_by_filename(filename : String) : BaseLexer
|
||||
if filename.ends_with?(".cr")
|
||||
return CrystalLexer.new
|
||||
end
|
||||
|
||||
candidates = Set(String).new
|
||||
LEXERS_BY_FILENAME.each do |k, v|
|
||||
candidates += v.to_set if File.match?(k, File.basename(filename))
|
||||
end
|
||||
|
||||
case candidates.size
|
||||
when 0
|
||||
lexer_file_name = LEXERS_BY_NAME["plaintext"]
|
||||
when 1
|
||||
lexer_file_name = candidates.first
|
||||
else
|
||||
lexer_file_name = self.lexer_by_content(filename)
|
||||
begin
|
||||
return self.lexer(lexer_file_name)
|
||||
rescue ex : Exception
|
||||
raise Exception.new("Multiple lexers match the filename: #{candidates.to_a.join(", ")}, heuristics suggest #{lexer_file_name} but there is no matching lexer.")
|
||||
end
|
||||
end
|
||||
|
||||
RegexLexer.from_xml(LexerFiles.get("/#{lexer_file_name}.xml").gets_to_end)
|
||||
end
|
||||
|
||||
private def self.lexer_by_content(fname : String) : String?
|
||||
h = Linguist::Heuristic.from_yaml(LexerFiles.get("/heuristics.yml").gets_to_end)
|
||||
result = h.run(fname, File.read(fname))
|
||||
case result
|
||||
when Nil
|
||||
raise Exception.new "No lexer found for #{fname}"
|
||||
when String
|
||||
result.as(String)
|
||||
when Array(String)
|
||||
result.first
|
||||
end
|
||||
end
|
||||
|
||||
private def self.create_delegating_lexer(name : String) : BaseLexer
|
||||
language, root = name.split("+", 2)
|
||||
language_lexer = lexer(language)
|
||||
root_lexer = lexer(root)
|
||||
DelegatingLexer.new(language_lexer, root_lexer)
|
||||
end
|
||||
|
||||
# Return a list of all lexers
|
||||
def self.lexers : Array(String)
|
||||
file_map = LexerFiles.files.map(&.path)
|
||||
LEXERS_BY_NAME.keys.select { |k| file_map.includes?("/#{k}.xml") }.sort!
|
||||
end
|
||||
|
||||
# A token, the output of the tokenizer
|
||||
alias Token = NamedTuple(type: String, value: String)
|
||||
|
||||
abstract class BaseTokenizer
|
||||
end
|
||||
|
||||
class Tokenizer < BaseTokenizer
|
||||
include Iterator(Token)
|
||||
property lexer : BaseLexer
|
||||
property text : Bytes
|
||||
property pos : Int32 = 0
|
||||
@dq = Deque(Token).new
|
||||
property state_stack = ["root"]
|
||||
|
||||
def initialize(@lexer : BaseLexer, text : String, secondary = false)
|
||||
# Respect the `ensure_nl` config option
|
||||
if text.size > 0 && text[-1] != '\n' && @lexer.config[:ensure_nl] && !secondary
|
||||
text += "\n"
|
||||
end
|
||||
@text = text.to_slice
|
||||
end
|
||||
|
||||
def next : Iterator::Stop | Token
|
||||
if @dq.size > 0
|
||||
return @dq.shift
|
||||
end
|
||||
if pos == @text.size
|
||||
return stop
|
||||
end
|
||||
|
||||
matched = false
|
||||
while @pos < @text.size
|
||||
@lexer.states[@state_stack.last].rules.each do |rule|
|
||||
matched, new_pos, new_tokens = rule.match(@text, @pos, self)
|
||||
if matched
|
||||
@pos = new_pos
|
||||
split_tokens(new_tokens).each { |token| @dq << token }
|
||||
break
|
||||
end
|
||||
end
|
||||
if !matched
|
||||
if @text[@pos] == 10u8
|
||||
@dq << {type: "Text", value: "\n"}
|
||||
@state_stack = ["root"]
|
||||
else
|
||||
@dq << {type: "Error", value: String.new(@text[@pos..@pos])}
|
||||
end
|
||||
@pos += 1
|
||||
break
|
||||
end
|
||||
end
|
||||
self.next
|
||||
end
|
||||
|
||||
# If a token contains a newline, split it into two tokens
|
||||
def split_tokens(tokens : Array(Token)) : Array(Token)
|
||||
split_tokens = [] of Token
|
||||
tokens.each do |token|
|
||||
if token[:value].includes?("\n")
|
||||
values = token[:value].split("\n")
|
||||
values.each_with_index do |value, index|
|
||||
value += "\n" if index < values.size - 1
|
||||
split_tokens << {type: token[:type], value: value}
|
||||
end
|
||||
else
|
||||
split_tokens << token
|
||||
end
|
||||
end
|
||||
split_tokens
|
||||
end
|
||||
end
|
||||
|
||||
alias BaseLexer = Lexer
|
||||
|
||||
abstract class Lexer
|
||||
property config = {
|
||||
name: "",
|
||||
priority: 0.0,
|
||||
case_insensitive: false,
|
||||
dot_all: false,
|
||||
not_multiline: false,
|
||||
ensure_nl: false,
|
||||
}
|
||||
property states = {} of String => State
|
||||
|
||||
def tokenizer(text : String, secondary = false) : BaseTokenizer
|
||||
Tokenizer.new(self, text, secondary)
|
||||
end
|
||||
end
|
||||
|
||||
# This implements a lexer for Pygments RegexLexers as expressed
|
||||
# in Chroma's XML serialization.
|
||||
#
|
||||
# For explanations on what actions and states do
|
||||
# the Pygments documentation is a good place to start.
|
||||
# https://pygments.org/docs/lexerdevelopment/
|
||||
class RegexLexer < BaseLexer
|
||||
# Collapse consecutive tokens of the same type for easier comparison
|
||||
# and smaller output
|
||||
def self.collapse_tokens(tokens : Array(Tartrazine::Token)) : Array(Tartrazine::Token)
|
||||
result = [] of Tartrazine::Token
|
||||
tokens = tokens.reject { |token| token[:value] == "" }
|
||||
tokens.each do |token|
|
||||
if result.empty?
|
||||
result << token
|
||||
next
|
||||
end
|
||||
last = result.last
|
||||
if last[:type] == token[:type]
|
||||
new_token = {type: last[:type], value: last[:value] + token[:value]}
|
||||
result.pop
|
||||
result << new_token
|
||||
else
|
||||
result << token
|
||||
end
|
||||
end
|
||||
result
|
||||
end
|
||||
|
||||
def self.from_xml(xml : String) : Lexer
|
||||
l = RegexLexer.new
|
||||
lexer = XML.parse(xml).first_element_child
|
||||
if lexer
|
||||
config = lexer.children.find { |node|
|
||||
node.name == "config"
|
||||
}
|
||||
if config
|
||||
l.config = {
|
||||
name: xml_to_s(config, name) || "",
|
||||
priority: xml_to_f(config, priority) || 0.0,
|
||||
not_multiline: xml_to_s(config, not_multiline) == "true",
|
||||
dot_all: xml_to_s(config, dot_all) == "true",
|
||||
case_insensitive: xml_to_s(config, case_insensitive) == "true",
|
||||
ensure_nl: xml_to_s(config, ensure_nl) == "true",
|
||||
}
|
||||
end
|
||||
|
||||
rules = lexer.children.find { |node|
|
||||
node.name == "rules"
|
||||
}
|
||||
if rules
|
||||
# Rules contains states 🤷
|
||||
rules.children.select { |node|
|
||||
node.name == "state"
|
||||
}.each do |state_node|
|
||||
state = State.new
|
||||
state.name = state_node["name"]
|
||||
if l.states.has_key?(state.name)
|
||||
raise Exception.new("Duplicate state: #{state.name}")
|
||||
else
|
||||
l.states[state.name] = state
|
||||
end
|
||||
# And states contain rules 🤷
|
||||
state_node.children.select { |node|
|
||||
node.name == "rule"
|
||||
}.each do |rule_node|
|
||||
case rule_node["pattern"]?
|
||||
when nil
|
||||
if rule_node.first_element_child.try &.name == "include"
|
||||
rule = IncludeStateRule.new(rule_node)
|
||||
else
|
||||
rule = UnconditionalRule.new(rule_node)
|
||||
end
|
||||
else
|
||||
rule = Rule.new(rule_node,
|
||||
multiline: !l.config[:not_multiline],
|
||||
dotall: l.config[:dot_all],
|
||||
ignorecase: l.config[:case_insensitive])
|
||||
end
|
||||
state.rules << rule
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
l
|
||||
end
|
||||
end
|
||||
|
||||
# A lexer that takes two lexers as arguments. A root lexer
|
||||
# and a language lexer. Everything is scalled using the
|
||||
# language lexer, afterwards all `Other` tokens are lexed
|
||||
# using the root lexer.
|
||||
#
|
||||
# This is useful for things like template languages, where
|
||||
# you have Jinja + HTML or Jinja + CSS and so on.
|
||||
class DelegatingLexer < Lexer
|
||||
property language_lexer : BaseLexer
|
||||
property root_lexer : BaseLexer
|
||||
|
||||
def initialize(@language_lexer : BaseLexer, @root_lexer : BaseLexer)
|
||||
end
|
||||
|
||||
def tokenizer(text : String, secondary = false) : DelegatingTokenizer
|
||||
DelegatingTokenizer.new(self, text, secondary)
|
||||
end
|
||||
end
|
||||
|
||||
# This Tokenizer works with a DelegatingLexer. It first tokenizes
|
||||
# using the language lexer, and "Other" tokens are tokenized using
|
||||
# the root lexer.
|
||||
class DelegatingTokenizer < BaseTokenizer
|
||||
include Iterator(Token)
|
||||
@dq = Deque(Token).new
|
||||
@language_tokenizer : BaseTokenizer
|
||||
|
||||
def initialize(@lexer : DelegatingLexer, text : String, secondary = false)
|
||||
# Respect the `ensure_nl` config option
|
||||
if text.size > 0 && text[-1] != '\n' && @lexer.config[:ensure_nl] && !secondary
|
||||
text += "\n"
|
||||
end
|
||||
@language_tokenizer = @lexer.language_lexer.tokenizer(text, true)
|
||||
end
|
||||
|
||||
def next : Iterator::Stop | Token
|
||||
if @dq.size > 0
|
||||
return @dq.shift
|
||||
end
|
||||
token = @language_tokenizer.next
|
||||
if token.is_a? Iterator::Stop
|
||||
return stop
|
||||
elsif token.as(Token).[:type] == "Other"
|
||||
root_tokenizer = @lexer.root_lexer.tokenizer(token.as(Token).[:value], true)
|
||||
root_tokenizer.each do |root_token|
|
||||
@dq << root_token
|
||||
end
|
||||
else
|
||||
@dq << token.as(Token)
|
||||
end
|
||||
self.next
|
||||
end
|
||||
end
|
||||
|
||||
# A Lexer state. A state has a name and a list of rules.
|
||||
# The state machine has a state stack containing references
|
||||
# to states to decide which rules to apply.
|
||||
struct State
|
||||
property name : String = ""
|
||||
property rules = [] of BaseRule
|
||||
|
||||
def +(other : State)
|
||||
new_state = State.new
|
||||
new_state.name = Random.base58(8)
|
||||
new_state.rules = rules + other.rules
|
||||
new_state
|
||||
end
|
||||
end
|
||||
|
||||
class CustomCrystalHighlighter < Crystal::SyntaxHighlighter
|
||||
@tokens = [] of Token
|
||||
|
||||
def highlight(text)
|
||||
super
|
||||
rescue ex : Crystal::SyntaxException
|
||||
# Fallback to Ruby highlighting
|
||||
@tokens = Tartrazine.lexer("ruby").tokenizer(text).to_a
|
||||
end
|
||||
|
||||
def render_delimiter(&block)
|
||||
@tokens << {type: "LiteralString", value: block.call.to_s}
|
||||
end
|
||||
|
||||
def render_interpolation(&block)
|
||||
@tokens << {type: "LiteralStringInterpol", value: "\#{"}
|
||||
@tokens << {type: "Text", value: block.call.to_s}
|
||||
@tokens << {type: "LiteralStringInterpol", value: "}"}
|
||||
end
|
||||
|
||||
def render_string_array(&block)
|
||||
@tokens << {type: "LiteralString", value: block.call.to_s}
|
||||
end
|
||||
|
||||
# ameba:disable Metrics/CyclomaticComplexity
|
||||
def render(type : TokenType, value : String)
|
||||
case type
|
||||
when .comment?
|
||||
@tokens << {type: "Comment", value: value}
|
||||
when .number?
|
||||
@tokens << {type: "LiteralNumber", value: value}
|
||||
when .char?
|
||||
@tokens << {type: "LiteralStringChar", value: value}
|
||||
when .symbol?
|
||||
@tokens << {type: "LiteralStringSymbol", value: value}
|
||||
when .const?
|
||||
@tokens << {type: "NameConstant", value: value}
|
||||
when .string?
|
||||
@tokens << {type: "LiteralString", value: value}
|
||||
when .ident?
|
||||
@tokens << {type: "NameVariable", value: value}
|
||||
when .keyword?, .self?
|
||||
@tokens << {type: "NameKeyword", value: value}
|
||||
when .primitive_literal?
|
||||
@tokens << {type: "Literal", value: value}
|
||||
when .operator?
|
||||
@tokens << {type: "Operator", value: value}
|
||||
when Crystal::SyntaxHighlighter::TokenType::DELIMITED_TOKEN, Crystal::SyntaxHighlighter::TokenType::DELIMITER_START, Crystal::SyntaxHighlighter::TokenType::DELIMITER_END
|
||||
@tokens << {type: "LiteralString", value: value}
|
||||
else
|
||||
@tokens << {type: "Text", value: value}
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
class CrystalTokenizer < Tartrazine::BaseTokenizer
|
||||
include Iterator(Token)
|
||||
@hl = CustomCrystalHighlighter.new
|
||||
@lexer : BaseLexer
|
||||
@iter : Iterator(Token)
|
||||
|
||||
# delegate next, to: @iter
|
||||
|
||||
def initialize(@lexer : BaseLexer, text : String, secondary = false)
|
||||
# Respect the `ensure_nl` config option
|
||||
if text.size > 0 && text[-1] != '\n' && @lexer.config[:ensure_nl] && !secondary
|
||||
text += "\n"
|
||||
end
|
||||
# Just do the tokenizing
|
||||
@hl.highlight(text)
|
||||
@iter = @hl.@tokens.each
|
||||
end
|
||||
|
||||
def next : Iterator::Stop | Token
|
||||
@iter.next
|
||||
end
|
||||
end
|
||||
|
||||
class CrystalLexer < BaseLexer
|
||||
def tokenizer(text : String, secondary = false) : BaseTokenizer
|
||||
CrystalTokenizer.new(self, text, secondary)
|
||||
end
|
||||
end
|
||||
end
|
125
src/main.cr
125
src/main.cr
@ -1,5 +1,122 @@
|
||||
require "./**"
|
||||
require "docopt"
|
||||
require "./tartrazine"
|
||||
|
||||
lexer = Tartrazine.lexer("crystal")
|
||||
theme = Tartrazine.theme(ARGV[1])
|
||||
puts Tartrazine::Html.new.format(File.read(ARGV[0]), lexer, theme)
|
||||
HELP = <<-HELP
|
||||
tartrazine: a syntax highlighting tool
|
||||
|
||||
You can use the CLI to generate HTML, terminal, JSON or SVG output
|
||||
from a source file using different themes.
|
||||
Keep in mind that not all formatters support all features.
|
||||
|
||||
Usage:
|
||||
tartrazine (-h, --help)
|
||||
tartrazine FILE -f html [-t theme][--standalone [--template file]]
|
||||
[--line-numbers][-l lexer][-o output]
|
||||
tartrazine -f html -t theme --css
|
||||
tartrazine FILE -f terminal [-t theme][-l lexer][--line-numbers]
|
||||
[-o output]
|
||||
tartrazine FILE -f svg [-t theme][--standalone][--line-numbers]
|
||||
[-l lexer][-o output]
|
||||
tartrazine FILE -f png [-t theme][--line-numbers]
|
||||
[-l lexer][-o output]
|
||||
tartrazine FILE -f json [-o output]
|
||||
tartrazine --list-themes
|
||||
tartrazine --list-lexers
|
||||
tartrazine --list-formatters
|
||||
tartrazine --version
|
||||
|
||||
Options:
|
||||
-f <formatter> Format to use (html, terminal, json, svg)
|
||||
-t <theme> Theme to use, see --list-themes [default: default-dark]
|
||||
-l <lexer> Lexer (language) to use, see --list-lexers. Use more than
|
||||
one lexer with "+" (e.g. jinja+yaml) [default: autodetect]
|
||||
-o <output> Output file. Default is stdout.
|
||||
--standalone Generate a standalone HTML file, which includes
|
||||
all style information. If not given, it will generate just
|
||||
a HTML fragment ready to include in your own page.
|
||||
--css Generate a CSS file for the theme called <theme>.css
|
||||
--template <file> Use a custom template for the HTML output [default: none]
|
||||
--line-numbers Include line numbers in the output
|
||||
-h, --help Show this screen
|
||||
-v, --version Show version number
|
||||
HELP
|
||||
|
||||
options = Docopt.docopt(HELP, ARGV)
|
||||
|
||||
# Handle version manually
|
||||
if options["--version"]
|
||||
puts "tartrazine #{Tartrazine::VERSION}"
|
||||
exit 0
|
||||
end
|
||||
|
||||
if options["--list-themes"]
|
||||
puts Tartrazine.themes.join("\n")
|
||||
exit 0
|
||||
end
|
||||
|
||||
if options["--list-lexers"]
|
||||
puts Tartrazine.lexers.join("\n")
|
||||
exit 0
|
||||
end
|
||||
|
||||
if options["--list-formatters"]
|
||||
puts "html\njson\nterminal"
|
||||
exit 0
|
||||
end
|
||||
|
||||
theme = Tartrazine.theme(options["-t"].as(String))
|
||||
template = options["--template"].as(String)
|
||||
if template != "none" # Otherwise we will use the default template
|
||||
template = File.open(template).gets_to_end
|
||||
else
|
||||
template = nil
|
||||
end
|
||||
|
||||
if options["-f"]
|
||||
formatter = options["-f"].as(String)
|
||||
case formatter
|
||||
when "html"
|
||||
formatter = Tartrazine::Html.new
|
||||
formatter.standalone = options["--standalone"] != nil
|
||||
formatter.line_numbers = options["--line-numbers"] != nil
|
||||
formatter.theme = theme
|
||||
formatter.template = template if template
|
||||
when "terminal"
|
||||
formatter = Tartrazine::Ansi.new
|
||||
formatter.line_numbers = options["--line-numbers"] != nil
|
||||
formatter.theme = theme
|
||||
when "json"
|
||||
formatter = Tartrazine::Json.new
|
||||
when "svg"
|
||||
formatter = Tartrazine::Svg.new
|
||||
formatter.standalone = options["--standalone"] != nil
|
||||
formatter.line_numbers = options["--line-numbers"] != nil
|
||||
formatter.theme = theme
|
||||
when "png"
|
||||
formatter = Tartrazine::Png.new
|
||||
formatter.line_numbers = options["--line-numbers"] != nil
|
||||
formatter.theme = theme
|
||||
else
|
||||
puts "Invalid formatter: #{formatter}"
|
||||
exit 1
|
||||
end
|
||||
|
||||
if formatter.is_a?(Tartrazine::Html) && options["--css"]
|
||||
File.open("#{options["-t"].as(String)}.css", "w") do |outf|
|
||||
outf << formatter.style_defs
|
||||
end
|
||||
exit 0
|
||||
end
|
||||
|
||||
lexer = Tartrazine.lexer(name: options["-l"].as(String), filename: options["FILE"].as(String))
|
||||
|
||||
input = File.open(options["FILE"].as(String)).gets_to_end
|
||||
|
||||
if options["-o"].nil?
|
||||
outf = STDOUT
|
||||
else
|
||||
outf = File.open(options["-o"].as(String), "w")
|
||||
end
|
||||
formatter.format(input, lexer, outf)
|
||||
outf.close
|
||||
end
|
||||
|
105
src/rules.cr
105
src/rules.cr
@ -1,9 +1,9 @@
|
||||
require "./actions"
|
||||
require "./constants"
|
||||
require "./bytes_regex"
|
||||
require "./formatter"
|
||||
require "./lexer"
|
||||
require "./rules"
|
||||
require "./styles"
|
||||
require "./tartrazine"
|
||||
|
||||
# These are lexer rules. They match with the text being parsed
|
||||
# and perform actions, either emitting tokens or changing the
|
||||
@ -11,37 +11,14 @@ require "./tartrazine"
|
||||
module Tartrazine
|
||||
# This rule matches via a regex pattern
|
||||
|
||||
class Rule
|
||||
property pattern : Regex = Re2.new ""
|
||||
property actions : Array(Action) = [] of Action
|
||||
property xml : String = "foo"
|
||||
alias Regex = BytesRegex::Regex
|
||||
alias Match = BytesRegex::Match
|
||||
alias MatchData = Array(Match)
|
||||
|
||||
def match(text, pos, lexer) : Tuple(Bool, Int32, Array(Token))
|
||||
match = pattern.match(text, pos)
|
||||
# We don't match if the match doesn't move the cursor
|
||||
# because that causes infinite loops
|
||||
return false, pos, [] of Token if match.nil? || match.end == 0
|
||||
# Log.trace { "#{match}, #{pattern.inspect}, #{text}, #{pos}" }
|
||||
tokens = [] of Token
|
||||
# Emit the tokens
|
||||
actions.each do |action|
|
||||
# Emit the token
|
||||
tokens += action.emit(match, lexer)
|
||||
end
|
||||
Log.trace { "#{xml}, #{match.end}, #{tokens}" }
|
||||
return true, match.end, tokens
|
||||
end
|
||||
abstract struct BaseRule
|
||||
abstract def match(text : Bytes, pos : Int32, tokenizer : Tokenizer) : Tuple(Bool, Int32, Array(Token))
|
||||
|
||||
def initialize(node : XML::Node, multiline, dotall, ignorecase)
|
||||
@xml = node.to_s
|
||||
@pattern = Re2.new(
|
||||
node["pattern"],
|
||||
multiline,
|
||||
dotall,
|
||||
ignorecase,
|
||||
anchored: true)
|
||||
add_actions(node)
|
||||
end
|
||||
@actions : Array(Action) = [] of Action
|
||||
|
||||
def add_actions(node : XML::Node)
|
||||
node.children.each do |child|
|
||||
@ -51,23 +28,39 @@ module Tartrazine
|
||||
end
|
||||
end
|
||||
|
||||
struct Rule < BaseRule
|
||||
property pattern : Regex = Regex.new ""
|
||||
|
||||
def match(text : Bytes, pos, tokenizer) : Tuple(Bool, Int32, Array(Token))
|
||||
match = pattern.match(text, pos)
|
||||
|
||||
# No match
|
||||
return false, pos, [] of Token if match.size == 0
|
||||
return true, pos + match[0].size, @actions.flat_map(&.emit(match, tokenizer))
|
||||
end
|
||||
|
||||
def initialize(node : XML::Node, multiline, dotall, ignorecase)
|
||||
pattern = node["pattern"]
|
||||
pattern = "(?m)" + pattern if multiline
|
||||
@pattern = Regex.new(pattern, multiline, dotall, ignorecase, true)
|
||||
add_actions(node)
|
||||
end
|
||||
end
|
||||
|
||||
# This rule includes another state. If any of the rules of the
|
||||
# included state matches, this rule matches.
|
||||
class IncludeStateRule < Rule
|
||||
property state : String = ""
|
||||
struct IncludeStateRule < BaseRule
|
||||
@state : String = ""
|
||||
|
||||
def match(text, pos, lexer) : Tuple(Bool, Int32, Array(Token))
|
||||
Log.trace { "Including state #{state} from #{lexer.state_stack.last}" }
|
||||
lexer.states[state].rules.each do |rule|
|
||||
matched, new_pos, new_tokens = rule.match(text, pos, lexer)
|
||||
Log.trace { "#{xml}, #{new_pos}, #{new_tokens}" } if matched
|
||||
def match(text : Bytes, pos : Int32, tokenizer : Tokenizer) : Tuple(Bool, Int32, Array(Token))
|
||||
tokenizer.@lexer.states[@state].rules.each do |rule|
|
||||
matched, new_pos, new_tokens = rule.match(text, pos, tokenizer)
|
||||
return true, new_pos, new_tokens if matched
|
||||
end
|
||||
return false, pos, [] of Token
|
||||
end
|
||||
|
||||
def initialize(node : XML::Node)
|
||||
@xml = node.to_s
|
||||
include_node = node.children.find { |child|
|
||||
child.name == "include"
|
||||
}
|
||||
@ -77,39 +70,15 @@ module Tartrazine
|
||||
end
|
||||
|
||||
# This rule always matches, unconditionally
|
||||
class UnconditionalRule < Rule
|
||||
def match(text, pos, lexer) : Tuple(Bool, Int32, Array(Token))
|
||||
tokens = [] of Token
|
||||
actions.each do |action|
|
||||
tokens += action.emit(nil, lexer)
|
||||
end
|
||||
return true, pos, tokens
|
||||
struct UnconditionalRule < BaseRule
|
||||
NO_MATCH = [] of Match
|
||||
|
||||
def match(text, pos, tokenizer) : Tuple(Bool, Int32, Array(Token))
|
||||
return true, pos, @actions.flat_map(&.emit(NO_MATCH, tokenizer))
|
||||
end
|
||||
|
||||
def initialize(node : XML::Node)
|
||||
@xml = node.to_s
|
||||
add_actions(node)
|
||||
end
|
||||
end
|
||||
|
||||
# This is a hack to workaround that Crystal seems to disallow
|
||||
# having regexes multiline but not dot_all
|
||||
class Re2 < Regex
|
||||
@source = "fa"
|
||||
@options = Regex::Options::None
|
||||
@jit = true
|
||||
|
||||
def initialize(pattern : String, multiline = false, dotall = false, ignorecase = false, anchored = false)
|
||||
flags = LibPCRE2::UTF | LibPCRE2::DUPNAMES |
|
||||
LibPCRE2::UCP
|
||||
flags |= LibPCRE2::MULTILINE if multiline
|
||||
flags |= LibPCRE2::DOTALL if dotall
|
||||
flags |= LibPCRE2::CASELESS if ignorecase
|
||||
flags |= LibPCRE2::ANCHORED if anchored
|
||||
flags |= LibPCRE2::NO_UTF_CHECK
|
||||
@re = Regex::PCRE2.compile(pattern, flags) do |error_message|
|
||||
raise Exception.new(error_message)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
1
src/run_tests.cr
Normal file
1
src/run_tests.cr
Normal file
@ -0,0 +1 @@
|
||||
require "../spec/tartrazine_spec.cr"
|
@ -1,5 +1,4 @@
|
||||
require "./actions"
|
||||
require "./constants"
|
||||
require "./formatter"
|
||||
require "./rules"
|
||||
require "./styles"
|
||||
@ -10,17 +9,50 @@ require "xml"
|
||||
module Tartrazine
|
||||
alias Color = Sixteen::Color
|
||||
|
||||
def self.theme(name : String) : Theme
|
||||
return Theme.from_base16(name[7..]) if name.starts_with? "base16_"
|
||||
Theme.from_xml(ThemeFiles.get("/#{name}.xml").gets_to_end)
|
||||
end
|
||||
|
||||
class ThemeFiles
|
||||
struct ThemeFiles
|
||||
extend BakedFileSystem
|
||||
bake_folder "../styles", __DIR__
|
||||
|
||||
macro bake_selected_themes
|
||||
{% if env("TT_THEMES") %}
|
||||
{% for theme in env("TT_THEMES").split "," %}
|
||||
bake_file {{ theme }}+".xml", {{ read_file "#{__DIR__}/../styles/" + theme + ".xml" }}
|
||||
{% end %}
|
||||
{% end %}
|
||||
end
|
||||
|
||||
class Style
|
||||
{% if flag?(:nothemes) %}
|
||||
bake_selected_themes
|
||||
{% else %}
|
||||
bake_folder "../styles", __DIR__
|
||||
{% end %}
|
||||
end
|
||||
|
||||
def self.theme(name : String) : Theme
|
||||
begin
|
||||
return Theme.from_base16(name)
|
||||
rescue ex : Exception
|
||||
raise ex unless ex.message.try &.includes? "Theme not found"
|
||||
end
|
||||
begin
|
||||
Theme.from_xml(ThemeFiles.get("/#{name}.xml").gets_to_end)
|
||||
rescue ex : Exception
|
||||
raise Exception.new("Error loading theme #{name}: #{ex.message}")
|
||||
end
|
||||
end
|
||||
|
||||
# Return a list of all themes
|
||||
def self.themes
|
||||
themes = Set(String).new
|
||||
ThemeFiles.files.each do |file|
|
||||
themes << file.path.split("/").last.split(".").first
|
||||
end
|
||||
Sixteen::DataFiles.files.each do |file|
|
||||
themes << file.path.split("/").last.split(".").first
|
||||
end
|
||||
themes.to_a.sort!
|
||||
end
|
||||
|
||||
struct Style
|
||||
# These properties are tri-state.
|
||||
# true means it's set
|
||||
# false means it's not set
|
||||
@ -60,32 +92,11 @@ module Tartrazine
|
||||
end
|
||||
end
|
||||
|
||||
class Theme
|
||||
struct Theme
|
||||
property name : String = ""
|
||||
|
||||
property styles = {} of String => Style
|
||||
|
||||
# Get the style for a token.
|
||||
def style(token)
|
||||
styles[token] = Style.new unless styles.has_key?(token)
|
||||
s = styles[token]
|
||||
|
||||
# We already got the data from the style hierarchy
|
||||
return s if s.complete?
|
||||
|
||||
# Form the hierarchy of parent styles
|
||||
parents = style_parents(token)
|
||||
|
||||
s = parents.map do |parent|
|
||||
styles[parent]
|
||||
end.reduce(s) do |acc, style|
|
||||
acc + style
|
||||
end
|
||||
s.complete = true
|
||||
styles[token] = s
|
||||
s
|
||||
end
|
||||
|
||||
def style_parents(token)
|
||||
parents = ["Background"]
|
||||
parts = token.underscore.split("_").map(&.capitalize)
|
||||
@ -103,7 +114,8 @@ module Tartrazine
|
||||
# The color assignments are adapted from
|
||||
# https://github.com/mohd-akram/base16-pygments/
|
||||
|
||||
theme.styles["Background"] = Style.new(color: t["base05"], background: t["base00"])
|
||||
theme.styles["Background"] = Style.new(color: t["base05"], background: t["base00"], bold: true)
|
||||
theme.styles["LineHighlight"] = Style.new(color: t["base0D"], background: t["base01"])
|
||||
theme.styles["Text"] = Style.new(color: t["base05"])
|
||||
theme.styles["Error"] = Style.new(color: t["base08"])
|
||||
theme.styles["Comment"] = Style.new(color: t["base03"])
|
||||
@ -162,7 +174,26 @@ module Tartrazine
|
||||
|
||||
theme.styles[node["type"]] = s
|
||||
end
|
||||
# We really want a LineHighlight class
|
||||
if !theme.styles.has_key?("LineHighlight")
|
||||
theme.styles["LineHighlight"] = Style.new
|
||||
theme.styles["LineHighlight"].background = make_highlight_color(theme.styles["Background"].background)
|
||||
theme.styles["LineHighlight"].bold = true
|
||||
end
|
||||
theme
|
||||
end
|
||||
|
||||
# If the color is dark, make it brighter and viceversa
|
||||
def self.make_highlight_color(base_color)
|
||||
if base_color.nil?
|
||||
# WHo knows
|
||||
return Color.new(127, 127, 127)
|
||||
end
|
||||
if base_color.dark?
|
||||
base_color.lighter(0.2)
|
||||
else
|
||||
base_color.darker(0.2)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
@ -1,6 +1,6 @@
|
||||
require "./actions"
|
||||
require "./constants"
|
||||
require "./formatter"
|
||||
require "./formatters/**"
|
||||
require "./rules"
|
||||
require "./styles"
|
||||
require "./tartrazine"
|
||||
@ -12,189 +12,9 @@ require "xml"
|
||||
|
||||
module Tartrazine
|
||||
extend self
|
||||
VERSION = "0.1.1"
|
||||
VERSION = {{ `shards version #{__DIR__}`.chomp.stringify }}
|
||||
|
||||
Log = ::Log.for("tartrazine")
|
||||
|
||||
# This implements a lexer for Pygments RegexLexers as expressed
|
||||
# in Chroma's XML serialization.
|
||||
#
|
||||
# For explanations on what actions and states do
|
||||
# the Pygments documentation is a good place to start.
|
||||
# https://pygments.org/docs/lexerdevelopment/
|
||||
|
||||
# A Lexer state. A state has a name and a list of rules.
|
||||
# The state machine has a state stack containing references
|
||||
# to states to decide which rules to apply.
|
||||
class State
|
||||
property name : String = ""
|
||||
property rules = [] of Rule
|
||||
|
||||
def +(other : State)
|
||||
new_state = State.new
|
||||
new_state.name = Random.base58(8)
|
||||
new_state.rules = rules + other.rules
|
||||
new_state
|
||||
end
|
||||
end
|
||||
|
||||
class LexerFiles
|
||||
extend BakedFileSystem
|
||||
|
||||
bake_folder "../lexers", __DIR__
|
||||
end
|
||||
|
||||
# A token, the output of the tokenizer
|
||||
alias Token = NamedTuple(type: String, value: String)
|
||||
|
||||
class Lexer
|
||||
property config = {
|
||||
name: "",
|
||||
aliases: [] of String,
|
||||
filenames: [] of String,
|
||||
mime_types: [] of String,
|
||||
priority: 0.0,
|
||||
case_insensitive: false,
|
||||
dot_all: false,
|
||||
not_multiline: false,
|
||||
ensure_nl: false,
|
||||
}
|
||||
property xml : String = ""
|
||||
|
||||
property states = {} of String => State
|
||||
|
||||
property state_stack = ["root"]
|
||||
|
||||
# Turn the text into a list of tokens. The `usingself` parameter
|
||||
# is true when the lexer is being used to tokenize a string
|
||||
# from a larger text that is already being tokenized.
|
||||
# So, when it's true, we don't modify the text.
|
||||
def tokenize(text, usingself = false) : Array(Token)
|
||||
@state_stack = ["root"]
|
||||
tokens = [] of Token
|
||||
pos = 0
|
||||
matched = false
|
||||
|
||||
# Respect the `ensure_nl` config option
|
||||
if text.size > 0 && text[-1] != '\n' && config[:ensure_nl] && !usingself
|
||||
text += "\n"
|
||||
end
|
||||
|
||||
# Loop through the text, applying rules
|
||||
while pos < text.size
|
||||
state = states[@state_stack.last]
|
||||
# Log.trace { "Stack is #{@state_stack} State is #{state.name}, pos is #{pos}, text is #{text[pos..pos + 10]}" }
|
||||
state.rules.each do |rule|
|
||||
matched, new_pos, new_tokens = rule.match(text, pos, self)
|
||||
if matched
|
||||
# Move position forward, save the tokens,
|
||||
# tokenize from the new position
|
||||
# Log.trace { "MATCHED: #{rule.xml}" }
|
||||
pos = new_pos
|
||||
tokens += new_tokens
|
||||
break
|
||||
end
|
||||
# Log.trace { "NOT MATCHED: #{rule.xml}" }
|
||||
end
|
||||
# If no rule matches, emit an error token
|
||||
unless matched
|
||||
# Log.trace { "Error at #{pos}" }
|
||||
tokens << {type: "Error", value: "#{text[pos]}"}
|
||||
pos += 1
|
||||
end
|
||||
end
|
||||
Lexer.collapse_tokens(tokens)
|
||||
end
|
||||
|
||||
# Collapse consecutive tokens of the same type for easier comparison
|
||||
# and smaller output
|
||||
def self.collapse_tokens(tokens : Array(Tartrazine::Token)) : Array(Tartrazine::Token)
|
||||
result = [] of Tartrazine::Token
|
||||
tokens = tokens.reject { |token| token[:value] == "" }
|
||||
tokens.each do |token|
|
||||
if result.empty?
|
||||
result << token
|
||||
next
|
||||
end
|
||||
last = result.last
|
||||
if last[:type] == token[:type]
|
||||
new_token = {type: last[:type], value: last[:value] + token[:value]}
|
||||
result.pop
|
||||
result << new_token
|
||||
else
|
||||
result << token
|
||||
end
|
||||
end
|
||||
result
|
||||
end
|
||||
|
||||
# ameba:disable Metrics/CyclomaticComplexity
|
||||
def self.from_xml(xml : String) : Lexer
|
||||
l = Lexer.new
|
||||
l.xml = xml
|
||||
lexer = XML.parse(xml).first_element_child
|
||||
if lexer
|
||||
config = lexer.children.find { |node|
|
||||
node.name == "config"
|
||||
}
|
||||
if config
|
||||
l.config = {
|
||||
name: xml_to_s(config, name) || "",
|
||||
aliases: xml_to_a(config, _alias) || [] of String,
|
||||
filenames: xml_to_a(config, filename) || [] of String,
|
||||
mime_types: xml_to_a(config, mime_type) || [] of String,
|
||||
priority: xml_to_f(config, priority) || 0.0,
|
||||
not_multiline: xml_to_s(config, not_multiline) == "true",
|
||||
dot_all: xml_to_s(config, dot_all) == "true",
|
||||
case_insensitive: xml_to_s(config, case_insensitive) == "true",
|
||||
ensure_nl: xml_to_s(config, ensure_nl) == "true",
|
||||
}
|
||||
end
|
||||
|
||||
rules = lexer.children.find { |node|
|
||||
node.name == "rules"
|
||||
}
|
||||
if rules
|
||||
# Rules contains states 🤷
|
||||
rules.children.select { |node|
|
||||
node.name == "state"
|
||||
}.each do |state_node|
|
||||
state = State.new
|
||||
state.name = state_node["name"]
|
||||
if l.states.has_key?(state.name)
|
||||
raise Exception.new("Duplicate state: #{state.name}")
|
||||
else
|
||||
l.states[state.name] = state
|
||||
end
|
||||
# And states contain rules 🤷
|
||||
state_node.children.select { |node|
|
||||
node.name == "rule"
|
||||
}.each do |rule_node|
|
||||
case rule_node["pattern"]?
|
||||
when nil
|
||||
if rule_node.first_element_child.try &.name == "include"
|
||||
rule = IncludeStateRule.new(rule_node)
|
||||
else
|
||||
rule = UnconditionalRule.new(rule_node)
|
||||
end
|
||||
else
|
||||
rule = Rule.new(rule_node,
|
||||
multiline: !l.config[:not_multiline],
|
||||
dotall: l.config[:dot_all],
|
||||
ignorecase: l.config[:case_insensitive])
|
||||
end
|
||||
state.rules << rule
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
l
|
||||
end
|
||||
end
|
||||
|
||||
def self.lexer(name : String) : Lexer
|
||||
Lexer.from_xml(LexerFiles.get("/#{name}.xml").gets_to_end)
|
||||
end
|
||||
end
|
||||
|
||||
# Convenience macros to parse XML
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user