fix: Don't log when falling back to ruby, it breaks stuff

feat: Support custom template for HTML standalone output
fix: when the internal crystal highlighter fails, fallback to ruby. Fixes #13
2025-08-23 05:22:08 +00:00 · 2025-03-09 18:38:42 -03:00 · 2025-02-21 19:43:09 -03:00 · 2025-02-20 13:24:53 -03:00 · 2025-02-19 09:38:12 -03:00 · 2025-02-18 21:45:26 -03:00
365 changed files with 6398 additions and 2590 deletions
--- a/.ameba.yml
+++ b/.ameba.yml
@@ -1,15 +1,15 @@
 # This configuration file was generated by `ameba --gen-config`
-# on 2024-08-04 23:09:09 UTC using Ameba version 1.6.1.
+# on 2024-09-21 14:59:30 UTC using Ameba version 1.6.1.
 # The point is for the user to remove these configuration records
 # one by one as the reported problems are removed from the code base.

-# Problems found: 2
+# Problems found: 3
 # Run `ameba --only Documentation/DocumentationAdmonition` for details
 Documentation/DocumentationAdmonition:
  Description: Reports documentation admonitions
  Timezone: UTC
  Excluded:
-  - src/tartrazine.cr
+  - src/lexer.cr
  - src/actions.cr
  Admonitions:
  - TODO
@@ -17,3 +17,18 @@ Documentation/DocumentationAdmonition:
  - BUG
  Enabled: true
  Severity: Warning
+
+# Problems found: 1
+# Run `ameba --only Lint/SpecFilename` for details
+Lint/SpecFilename:
+  Description: Enforces spec filenames to have `_spec` suffix
+  Excluded:
+  - spec/examples/crystal/hello.cr
+  IgnoredDirs:
+  - spec/support
+  - spec/fixtures
+  - spec/data
+  IgnoredFilenames:
+  - spec_helper
+  Enabled: true
+  Severity: Warning
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,26 @@
+name: Tests
+on:
+  # This can't yet run automatically, because tests fail because of
+  # different versions of chroma. Need to get the same one in my
+  # local env and in CI
+  workflow_dispatch:
+  push:
+permissions:
+  contents: read
+jobs:
+  build:
+    runs-on: ubuntu-22.04
+    steps:
+      - name: Download source
+        uses: actions/checkout@v4
+      - name: Install Crystal
+        uses: crystal-lang/install-crystal@v1
+      - name: Run tests
+        run: |
+          wget https://github.com/alecthomas/chroma/releases/download/v2.14.0/chroma-2.14.0-linux-amd64.tar.gz
+          tar xzvf chroma-2.14.0*gz
+          mkdir ~/.local/bin -p
+          sudo mv chroma ~/.local/bin
+          shards install
+          crystal tool format --check
+          crystal spec -v
--- a/.github/workflows/coverage.yml
+++ b/.github/workflows/coverage.yml
@@ -0,0 +1,30 @@
+name: Coverage
+on:
+  workflow_dispatch:
+  schedule:
+    - cron: "0 1 * * *"
+permissions:
+  contents: read
+jobs:
+  build:
+    runs-on: ubuntu-22.04
+    steps:
+      - name: Download source
+        uses: actions/checkout@v4
+      - name: Install Crystal
+        uses: crystal-lang/install-crystal@v1
+      - name: Run tests using kcov
+        run: |
+          sudo apt update && sudo apt upgrade && sudo apt install -y kcov
+          wget https://github.com/alecthomas/chroma/releases/download/v2.14.0/chroma-2.14.0-linux-amd64.tar.gz
+          tar xzvf chroma-2.14.0*gz
+          mkdir ~/.local/bin -p
+          sudo mv chroma ~/.local/bin
+          shards install
+          crystal build src/run_tests.cr
+          kcov --clean --include-path=./src $PWD/coverage ./run_tests
+          curl -Os https://uploader.codecov.io/latest/linux/codecov
+          chmod +x codecov
+          ./codecov -t ${CODECOV_TOKEN} -s coverage
+        env:
+          CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
--- a/.gitignore
+++ b/.gitignore
@@ -7,3 +7,11 @@ chroma/
 pygments/
 shard.lock
 .vscode/
+.crystal/
+venv/
+.croupier
+coverage/
+run_tests
+
+# We use the internal crystal lexer
+lexers/crystal.xml
--- a/.md.rb
+++ b/.md.rb
@@ -0,0 +1,3 @@
+exclude_rule 'MD033' # Inline HTML
+exclude_rule 'MD005' # 3-space indent for lists
+exclude_rule 'MD024' # Repeated headings
--- a/.mdlrc
+++ b/.mdlrc
@@ -0,0 +1 @@
+style ".md.rb"
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,35 @@
+# See https://pre-commit.com for more information
+# See https://pre-commit.com/hooks.html for more hooks
+
+repos:
+    - repo: https://github.com/pre-commit/pre-commit-hooks
+      rev: v4.6.0
+      hooks:
+          - id: trailing-whitespace
+          - id: end-of-file-fixer
+          - id: check-yaml
+          - id: check-added-large-files
+          - id: check-merge-conflict
+    - repo: https://github.com/jumanjihouse/pre-commit-hooks
+      rev: 3.0.0
+      hooks:
+          - id: shellcheck
+          - id: markdownlint
+            exclude: '^content'
+    - repo: https://github.com/mrtazz/checkmake
+      rev: 0.2.2
+      hooks:
+          - id: checkmake
+            exclude: lexers/makefile.xml
+    - repo: https://github.com/python-jsonschema/check-jsonschema
+      rev: 0.29.2
+      hooks:
+          - id: check-github-workflows
+    - repo: https://github.com/commitizen-tools/commitizen
+      rev: v3.29.0 # automatically updated by Commitizen
+      hooks:
+        - id: commitizen
+        - id: commitizen-branch
+          stages:
+            - post-commit
+            - push
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -0,0 +1,142 @@
+# Changelog
+
+All notable changes to this project will be documented in this file.
+
+## [0.12.0] - 2025-01-21
+
+### 🚀 Features
+
+- Bumped to latest chroma release
+
+### ⚙️ Miscellaneous Tasks
+
+- Pin ubuntu version in CI
+- Mark more mcfunction tests as bad
+
+### Build
+
+- Automate AUR release
+
+## [0.11.1] - 2024-10-14
+
+### 🐛 Bug Fixes
+
+- Support choosing lexers when used as a library
+
+## [0.11.0] - 2024-10-14
+
+### 🚀 Features
+
+- Support selecting only some themes
+
+## [0.10.0] - 2024-09-26
+
+### 🚀 Features
+
+- Optional conditional baking of lexers
+
+### 🐛 Bug Fixes
+
+- Strip binaries for release artifacts
+- Fix metadata to show crystal
+
+## [0.9.1] - 2024-09-22
+
+### 🐛 Bug Fixes
+
+- Terminal formatter was skipping things that it could highlight
+- Bug in high-level API for png formatter
+
+### 🧪 Testing
+
+- Added minimal tests for svg and png formatters
+
+## [0.9.0] - 2024-09-21
+
+### 🚀 Features
+
+- PNG writer based on Stumpy libs
+
+### ⚙️ Miscellaneous Tasks
+
+- Clean
+- Detect version bump in release script
+- Improve changelog handling
+
+## [0.8.0] - 2024-09-21
+
+### 🚀 Features
+
+- SVG formatter
+
+### 🐛 Bug Fixes
+
+- HTML formatter was setting bold wrong
+
+### 📚 Documentation
+
+- Added instructions to add as a dependency
+
+### 🧪 Testing
+
+- Add basic tests for crystal and delegating lexers
+- Added tests for CSS generation
+
+### ⚙ Miscellaneous Tasks
+
+- Fix example code in README
+
+## [0.7.0] - 2024-09-10
+
+### 🚀 Features
+
+- Higher level API (`to_html` and `to_ansi`)
+- Use the native crystal highlighter
+
+### 🐛 Bug Fixes
+
+- Ameba
+- Variable bame in Hacefile
+- Make it easier to import the Ansi formatter
+- Renamed BaseLexer to Lexer and Lexer to RegexLexer to make API nicer
+- Make install work
+
+### 📚 Documentation
+
+- Mention AUR package
+
+### 🧪 Testing
+
+- Add CI workflows
+
+### ⚙️ Miscellaneous Tasks
+
+- Pre-commit hooks
+- Git-cliff config
+- Started changelog
+- Force conventional commit messages
+- Force conventional commit messages
+- Updated pre-commit
+- *(ignore)* Fix tests
+- Added badges
+- Added badges
+- *(ignore)* Removed random file
+
+### Build
+
+- Switch from Makefile to Hacefile
+- Added do_release script
+- Fix markdown check
+
+### Bump
+
+- Release v0.6.4
+- Release v0.6.4
+
+## [0.6.1] - 2024-08-25
+
+### 📚 Documentation
+
+- Improve readme and help message
+
+<!-- generated by git-cliff -->
--- a/Hacefile.yml
+++ b/Hacefile.yml
@@ -0,0 +1,137 @@
+variables:
+  FLAGS: "-d --error-trace"
+  NAME: "tartrazine"
+
+tasks:
+  build:
+    default: true
+    dependencies:
+      - src
+      - shard.lock
+      - shard.yml
+      - Hacefile.yml
+      - lexers/*xml
+      - styles/*xml
+    outputs:
+      - bin/{{NAME}}
+    commands: |
+      shards build {{FLAGS}}
+
+  get-deps:
+    dependencies:
+      - shard.yml
+    outputs:
+      - shard.lock
+    commands: |
+      shards install
+
+  build-release:
+    phony: true
+    always_run: true
+    commands: |
+      hace build FLAGS="--release"
+
+  install:
+    phony: true
+    always_run: true
+    dependencies:
+      - bin/{{NAME}}
+    commands: |
+      rm ${HOME}/.local/bin/{{NAME}} -f
+      cp bin/{{NAME}} ${HOME}/.local/bin/{{NAME}}
+
+  static:
+    outputs:
+      - bin/{{NAME}}-static-linux-amd64
+      - bin/{{NAME}}-static-linux-arm64
+    commands: |
+      hace clean
+      ./build_static.sh
+
+  test:
+    dependencies:
+      - src
+      - spec
+      - shard.lock
+      - shard.yml
+    commands: |
+      crystal spec -v --error-trace
+    phony: true
+    always_run: true
+
+  lint:
+    dependencies:
+      - src
+      - spec
+      - shard.lock
+      - shard.yml
+    commands: |
+      crystal tool format src/*.cr spec/*.cr
+      ameba --fix
+    always_run: true
+    phony: true
+
+  docs:
+    dependencies:
+      - src
+      - shard.lock
+      - shard.yml
+      - README.md
+    commands: |
+      crystal docs
+    outputs:
+      - docs/index.html
+
+  pre-commit:
+    default: true
+    outputs:
+      - .git/hooks/commit-msg
+      - .git/hooks/pre-commit
+    dependencies:
+      - .pre-commit-config.yaml
+    commands: |
+      pre-commit install --hook-type commit-msg
+      pre-commit install
+
+  clean:
+    phony: true
+    always_run: true
+    commands: |
+      rm -rf shard.lock bin lib
+
+  coverage:
+    dependencies:
+      - src
+      - spec
+      - shard.lock
+      - shard.yml
+    commands: |
+      shards install
+      crystal build -o bin/run_tests src/run_tests.cr
+      rm -rf coverage/
+      mkdir coverage
+      kcov --clean --include-path=./src ${PWD}/coverage ./bin/run_tests
+    outputs:
+      - coverage/index.html
+
+  loc:
+    phony: true
+    always_run: true
+    dependencies:
+      - src
+    commands: |
+      tokei src -e src/constants/
+
+  aur:
+    phony: true
+    always_run: true
+    commands: |
+      rm -rf aur-{{NAME}}
+      git clone ssh://aur@aur.archlinux.org/{{NAME}}.git aur-{{NAME}}
+      sed s/pkgver=.*/pkgver=$(shards version)/ -i aur-{{NAME}}/PKGBUILD
+      sed s/pkgrel=.*/pkgrel=1/ -i aur-{{NAME}}/PKGBUILD
+      cd aur-{{NAME}} && updpkgsums && makepkg --printsrcinfo > .SRCINFO
+      cd aur-{{NAME}} && makepkg -fsr
+      cd aur-{{NAME}} && git add PKGBUILD .SRCINFO
+      cd aur-{{NAME}} && git commit -a -m "Update to $(shards version)"
+      cd aur-{{NAME}} && git push
--- a/7
+++ b/7
@@ -1,7 +0,0 @@
-build: $(wildcard src/**/*.cr) $(wildcard lexers/*xml) $(wildcard styles/*xml) shard.yml
-	shards build -Dstrict_multi_assign -Dno_number_autocast
-release: $(wildcard src/**/*.cr) $(wildcard lexers/*xml) $(wildcard styles/*xml) shard.yml
-	shards build --release
-static: $(wildcard src/**/*.cr) $(wildcard lexers/*xml) $(wildcard styles/*xml) shard.yml
-	shards build --release --static
-	strip bin/tartrazine
--- a/README.md
+++ b/README.md
@@ -1,57 +1,159 @@
 # TARTRAZINE

+[![Tests](https://github.com/ralsina/tartrazine/actions/workflows/ci.yml/badge.svg)](https://github.com/ralsina/tartrazine/actions/workflows/ci.yml)
+[![codecov](https://codecov.io/gh/ralsina/tartrazine/branch/main/graph/badge.svg?token=52XBPNL99F)](https://codecov.io/gh/ralsina/tartrazine)
+
 Tartrazine is a library to syntax-highlight code. It is
 a port of [Pygments](https://pygments.org/) to
-[Crystal](https://crystal-lang.org/). Kind of.
+[Crystal](https://crystal-lang.org/).

-It's not currently usable because it's not finished, but:
+It also provides a CLI tool which can be used to highlight many things in many styles.

-* The lexers work for the implemented languages
-* The provided styles work
-* There is a very very simple HTML formatter
-
-# A port of what? Why "kind of"?
-
-Because I did not read the Pygments code. And this is actually
-based on [Chroma](https://github.com/alecthomas/chroma) ...
-although I did not read that code either.
-
-Chroma has taken most of the Pygments lexers and turned them into
-XML descriptions. What I did was take those XML files from Chroma
-and a pile of test cases from Pygments, and I slapped them together
-until the tests passed and my code produced the same output as
-Chroma. Think of it as *extreme TDD*.
-
-Currently the pass rate for tests in the supported languages
-is `96.8%`, which is *not bad for a couple days hacking*.
-
-This only covers the RegexLexers, which are the most common ones,
-but it means the supported languages are a subset of Chroma's, which
-is a subset of Pygments'.
-
-Currently Tartrazine supports ... 241 languages.
-
-It has 332 themes (64 from Chroma, the rest are base16 themes via
-[Sixteen](https://github.com/ralsina/sixteen)
+Currently Tartrazine supports 247 languages and has 331 themes (63 from Chroma,
+the rest are base16 themes via [Sixteen](https://github.com/ralsina/sixteen)

 ## Installation

-This will have a CLI tool that can be installed, but it's not
-there yet.
+If you are using Arch: Use yay or your favourite AUR helper, package name is `tartrazine`.

+From prebuilt binaries:

-## Usage
+Each release provides statically-linked binaries that should
+work on any Linux. Get them from the [releases page](https://github.com/ralsina/tartrazine/releases)
+and put them in your PATH.

-This works:
+To build from source:
+
+1. Clone this repo
+2. Run `make` to build the `tartrazine` binary
+3. Copy the binary somewhere in your PATH.
+
+## Usage as a CLI tool
+
+Show a syntax highlighted version of a C source file in your terminal:
+
+```shell
+tartrazine whatever.c -l c -t catppuccin-macchiato --line-numbers -f terminal
+```
+
+Generate a standalone HTML file from a C source file with the syntax highlighted:
+
+```shell
+$ tartrazine whatever.c -t catppuccin-macchiato --line-numbers \
+  --standalone -f html -o whatever.html
+```
+
+## Usage as a Library
+
+Add to your `shard.yml`:
+
+```yaml
+dependencies:
+  tartrazine:
+    github: ralsina/tartrazine
+```
+
+This is the high level API:

 ```crystal
 require "tartrazine"

-lexer = Tartrazine.lexer("crystal")
-theme = Tartrazine.theme("catppuccin-macchiato")
-puts Tartrazine::Html.new.format(File.read(ARGV[0]), lexer, theme)
+html = Tartrazine.to_html(
+  "puts \"Hello, world!\"",
+  language: "crystal",
+  theme: "catppuccin-macchiato",
+  standalone: true,
+  line_numbers: true
+)
 ```

+This does more or less the same thing, but more manually:
+
+```crystal
+lexer = Tartrazine.lexer("crystal")
+formatter = Tartrazine::Html.new(
+  theme: Tartrazine.theme("catppuccin-macchiato"),
+  line_numbers: true,
+  standalone: true,
+)
+puts formatter.format("puts \"Hello, world!\"", lexer)
+```
+
+The reason you may want to use the manual version is to reuse
+the lexer and formatter objects for performance reasons.
+
+## Choosing what Lexers you want
+
+By default Tartrazine will support all its lexers by embedding
+them in the binary. This makes the binary large. If you are
+using it as a library, you may want to just include a selection of lexers. To do that:
+
+* Pass the `-Dnolexers` flag to the compiler
+* Set the `TT_LEXERS` environment variable to a
+  comma-separated list of lexers you want to include.
+
+
+This builds a binary with only the python, markdown, bash and yaml lexers (enough to highlight this `README.md`):
+
+```bash
+> TT_LEXERS=python,markdown,bash,yaml shards build -Dnolexers -d --error-trace
+Dependencies are satisfied
+Building: tartrazine
+```
+
+## Choosing what themes you want
+
+Themes come from two places, tartrazine itself and [Sixteen](https://github.com/ralsina/sixteen).
+
+To only embed selected themes, build your project with the `-Dnothemes` option, and
+you can set two environment variables to control which themes are included:
+
+* `TT_THEMES` is a comma-separated list of themes to include from tartrazine (see the styles directory in the source)
+* `SIXTEEN_THEMES` is a comma-separated list of themes to include from Sixteen (see the base16 directory in the sixteen source)
+
+For example (using the tartrazine CLI as the project):
+
+```bash
+$ TT_THEMES=colorful,autumn SIXTEEN_THEMES=pasque,pico shards build -Dnothemes
+Dependencies are satisfied
+Building: tartrazine
+
+$ ./bin/tartrazine  --list-themes
+autumn
+colorful
+pasque
+pico
+```
+
+Be careful not to build without any themes at all, nothing will work.
+
+## Templates for standalone HTML output
+
+If you are using the HTML formatter, you can pass a template to use for the output. The template is a string where the following placeholders will be replaced:
+
+* `{{style_defs}}` will be replaced by the CSS styles needed for the theme
+* `{{code}}` will be replaced by the highlighted code
+
+This is an example template that changes the padding around the code:
+
+```jinja2
+<!DOCTYPE html>
+<html>
+  <head>
+    <style>
+      {{style_defs}}
+      pre {
+      padding: 1em;
+      }
+    </style>
+  </head>
+  <body>
+    {{body}}
+  </body>
+</html>
+```
+
+
 ## Contributing

 1. Fork it (<https://github.com/ralsina/tartrazine/fork>)
@@ -63,3 +165,30 @@ puts Tartrazine::Html.new.format(File.read(ARGV[0]), lexer, theme)
 ## Contributors

 - [Roberto Alsina](https://github.com/ralsina) - creator and maintainer
+
+## A port of what, and why "kind of"
+
+Pygments is a staple of the Python ecosystem, and it's great.
+It lets you highlight code in many languages, and it has many
+themes. Chroma is "Pygments for Go", it's actually a port of
+Pygments to Go, and it's great too.
+
+I wanted that in Crystal, so I started this project. But I did
+not read much of the Pygments code. Or much of Chroma's.
+
+Chroma has taken most of the Pygments lexers and turned them into
+XML descriptions. What I did was take those XML files from Chroma
+and a pile of test cases from Pygments, and I slapped them together
+until the tests passed and my code produced the same output as
+Chroma. Think of it as [*extreme TDD*](https://ralsina.me/weblog/posts/tartrazine-reimplementing-pygments.html)
+
+Currently the pass rate for tests in the supported languages
+is `96.8%`, which is *not bad for a couple days hacking*.
+
+This only covers the RegexLexers, which are the most common ones,
+but it means the supported languages are a subset of Chroma's, which
+is a subset of Pygments' and DelegatingLexers (useful for things like template languages)
+
+Then performance was bad, so I hacked and hacked and made it significantly
+[faster than chroma](https://ralsina.me/weblog/posts/a-tale-of-optimization.html)
+which is fun.
--- a/TODO.md
+++ b/TODO.md
@@ -2,6 +2,14 @@

 ## TODO

-* Implement styles
-* Implement formatters
-* Implement lexer loader that respects aliases, etc
+* ✅ Implement styles
+* ✅ Implement formatters
+* ✅ Implement CLI
+* ✅ Implement lexer loader that respects aliases
+* ✅ Implement lexer loader by file extension
+* ✅ Add --line-numbers to terminal formatter
+* ✅ Implement lexer loader by mime type
+* ✅ Implement Delegating lexers
+* ✅ Add RstLexer
+* Add Mako template lexer
+* ✅ Implement heuristic lexer detection
--- a/build_static.sh
+++ b/build_static.sh
@@ -7,10 +7,10 @@ docker run --rm --privileged \

 # Build for AMD64
 docker build . -f Dockerfile.static -t tartrazine-builder
-docker run -ti --rm -v "$PWD":/app --user="$UID" tartrazine-builder /bin/sh -c "cd /app && rm -rf lib shard.lock && make static"
+docker run -ti --rm -v "$PWD":/app --user="$UID" tartrazine-builder /bin/sh -c "cd /app && rm -rf lib shard.lock && shards build --static --release && strip bin/tartrazine"
 mv bin/tartrazine bin/tartrazine-static-linux-amd64

 # Build for ARM64
 docker build . -f Dockerfile.static --platform linux/arm64 -t tartrazine-builder
-docker run -ti --rm -v "$PWD":/app --platform linux/arm64 --user="$UID" tartrazine-builder /bin/sh -c "cd /app && rm -rf lib shard.lock && make static"
+docker run -ti --rm -v "$PWD":/app --platform linux/arm64 --user="$UID" tartrazine-builder /bin/sh -c "cd /app && rm -rf lib shard.lock && shards build --static --release && strip bin/tartrazine"
 mv bin/tartrazine bin/tartrazine-static-linux-arm64
--- a/cliff.toml
+++ b/cliff.toml
@@ -0,0 +1,79 @@
+# git-cliff ~ default configuration file
+# https://git-cliff.org/docs/configuration
+#
+# Lines starting with "#" are comments.
+# Configuration options are organized into tables and keys.
+# See documentation for more information on available options.
+
+[changelog]
+# template for the changelog header
+header = """
+# Changelog\n
+All notable changes to this project will be documented in this file.\n
+"""
+# template for the changelog body
+# https://keats.github.io/tera/docs/#introduction
+body = """
+{% if version %}\
+    ## [{{ version | trim_start_matches(pat="v") }}] - {{ timestamp | date(format="%Y-%m-%d") }}
+{% else %}\
+    ## [unreleased]
+{% endif %}\
+{% for group, commits in commits | group_by(attribute="group") %}
+    ### {{ group | striptags | trim | upper_first }}
+    {% for commit in commits %}
+        - {% if commit.scope %}*({{ commit.scope }})* {% endif %}\
+            {% if commit.breaking %}[**breaking**] {% endif %}\
+            {{ commit.message | upper_first }}\
+    {% endfor %}
+{% endfor %}\n
+"""
+# template for the changelog footer
+footer = """
+<!-- generated by git-cliff -->
+"""
+# remove the leading and trailing s
+trim = true
+# postprocessors
+postprocessors = [
+  # { pattern = '<REPO>', replace = "https://github.com/orhun/git-cliff" }, # replace repository URL
+]
+
+[git]
+# parse the commits based on https://www.conventionalcommits.org
+conventional_commits = true
+# filter out the commits that are not conventional
+filter_unconventional = true
+# process each line of a commit as an individual commit
+split_commits = false
+# regex for preprocessing the commit messages
+commit_preprocessors = [
+  # Replace issue numbers
+  #{ pattern = '\((\w+\s)?#([0-9]+)\)', replace = "([#${2}](<REPO>/issues/${2}))"},
+  # Check spelling of the commit with https://github.com/crate-ci/typos
+  # If the spelling is incorrect, it will be automatically fixed.
+  #{ pattern = '.*', replace_command = 'typos --write-changes -' },
+]
+# regex for parsing and grouping commits
+commit_parsers = [
+  { message = "^feat", group = "<!-- 0 -->🚀 Features" },
+  { message = "^fix", group = "<!-- 1 -->🐛 Bug Fixes" },
+  { message = "^doc", group = "<!-- 3 -->📚 Documentation" },
+  { message = "^perf", group = "<!-- 4 -->⚡ Performance" },
+  { message = "^refactor", group = "<!-- 2 -->🚜 Refactor" },
+  { message = "^style", group = "<!-- 5 -->🎨 Styling" },
+  { message = "^test", group = "<!-- 6 -->🧪 Testing" },
+  { message = "^chore\\(release\\): prepare for", skip = true },
+  { message = "^chore\\(deps.*\\)", skip = true },
+  { message = "^chore\\(pr\\)", skip = true },
+  { message = "^chore\\(pull\\)", skip = true },
+  { message = "^chore|^ci", group = "<!-- 7 -->⚙️ Miscellaneous Tasks" },
+  { body = ".*security", group = "<!-- 8 -->🛡️ Security" },
+  { message = "^revert", group = "<!-- 9 -->◀️ Revert" },
+]
+# filter out the commits that are not matched by commit parsers
+filter_commits = false
+# sort the tags topologically
+topo_order = false
+# sort the commits inside sections by oldest/newest order
+sort_commits = "oldest"
--- a/do_release.sh
+++ b/do_release.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+set e
+
+PKGNAME=$(basename "$PWD")
+VERSION=$(git cliff --bumped-version --unreleased |cut -dv -f2)
+
+sed "s/^version:.*$/version: $VERSION/g" -i shard.yml
+git add shard.yml
+hace lint test
+git cliff --bump -u -p CHANGELOG.md
+git commit -a -m "bump: Release v$VERSION"
+hace static
+git tag "v$VERSION"
+git push --tags
+gh release create "v$VERSION" "bin/$PKGNAME-static-linux-amd64" "bin/$PKGNAME-static-linux-arm64" --title "Release v$VERSION" --notes "$(git cliff -l -s all)"
--- a/fonts/courier-bold-oblique.pcf.gz
+++ b/fonts/courier-bold-oblique.pcf.gz
--- a/fonts/courier-bold.pcf.gz
+++ b/fonts/courier-bold.pcf.gz
--- a/fonts/courier-oblique.pcf.gz
+++ b/fonts/courier-oblique.pcf.gz
--- a/fonts/courier-regular.pcf.gz
+++ b/fonts/courier-regular.pcf.gz
--- a/lexers/LICENSE-heuristics
+++ b/lexers/LICENSE-heuristics
@@ -0,0 +1,22 @@
+Copyright (c) 2017 GitHub, Inc.
+
+Permission is hereby granted, free of charge, to any person
+obtaining a copy of this software and associated documentation
+files (the "Software"), to deal in the Software without
+restriction, including without limitation the rights to use,
+copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the
+Software is furnished to do so, subject to the following
+conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.
--- a/lexers/LiquidLexer.xml
+++ b/lexers/LiquidLexer.xml
@@ -0,0 +1,129 @@
+
+<lexer>
+  <config>
+    <name>liquid</name>
+    <alias>liquid</alias>
+    <filename>*.liquid</filename>
+  </config>
+  <rules>
+    <state name="root">
+      <rule pattern="[^{]+"><token type="Text"/></rule>
+      <rule pattern="(\{%)(\s*)"><bygroups><token type="Punctuation"/><token type="TextWhitespace"/></bygroups><push state="tag-or-block"/></rule>
+      <rule pattern="(\{\{)(\s*)([^\s}]+)"><bygroups><token type="Punctuation"/><token type="TextWhitespace"/><usingself state="generic"/></bygroups><push state="output"/></rule>
+      <rule pattern="\{"><token type="Text"/></rule>
+    </state>
+    <state name="tag-or-block">
+      <rule pattern="(if|unless|elsif|case)(?=\s+)"><token type="KeywordReserved"/><push state="condition"/></rule>
+      <rule pattern="(when)(\s+)"><bygroups><token type="KeywordReserved"/><token type="TextWhitespace"/></bygroups><combined state="end-of-block" state="whitespace" state="generic"/></rule>
+      <rule pattern="(else)(\s*)(%\})"><bygroups><token type="KeywordReserved"/><token type="TextWhitespace"/><token type="Punctuation"/></bygroups><pop depth="1"/></rule>
+      <rule pattern="(capture)(\s+)([^\s%]+)(\s*)(%\})"><bygroups><token type="NameTag"/><token type="TextWhitespace"/><usingself state="variable"/><token type="TextWhitespace"/><token type="Punctuation"/></bygroups><pop depth="1"/></rule>
+      <rule pattern="(comment)(\s*)(%\})"><bygroups><token type="NameTag"/><token type="TextWhitespace"/><token type="Punctuation"/></bygroups><push state="comment"/></rule>
+      <rule pattern="(raw)(\s*)(%\})"><bygroups><token type="NameTag"/><token type="TextWhitespace"/><token type="Punctuation"/></bygroups><push state="raw"/></rule>
+      <rule pattern="(end(case|unless|if))(\s*)(%\})"><bygroups><token type="KeywordReserved"/>None<token type="TextWhitespace"/><token type="Punctuation"/></bygroups><pop depth="1"/></rule>
+      <rule pattern="(end([^\s%]+))(\s*)(%\})"><bygroups><token type="NameTag"/>None<token type="TextWhitespace"/><token type="Punctuation"/></bygroups><pop depth="1"/></rule>
+      <rule pattern="(cycle)(\s+)(?:([^\s:]*)(:))?(\s*)"><bygroups><token type="NameTag"/><token type="TextWhitespace"/><usingself state="generic"/><token type="Punctuation"/><token type="TextWhitespace"/></bygroups><push state="variable-tag-markup"/></rule>
+      <rule pattern="([^\s%]+)(\s*)"><bygroups><token type="NameTag"/><token type="TextWhitespace"/></bygroups><push state="tag-markup"/></rule>
+    </state>
+    <state name="output">
+      <rule><include state="whitespace"/></rule>
+      <rule pattern="\}\}"><token type="Punctuation"/><pop depth="1"/></rule>
+      <rule pattern="\|"><token type="Punctuation"/><push state="filters"/></rule>
+    </state>
+    <state name="filters">
+      <rule><include state="whitespace"/></rule>
+      <rule pattern="\}\}"><token type="Punctuation"/><push state="#pop" state="#pop"/></rule>
+      <rule pattern="([^\s|:]+)(:?)(\s*)"><bygroups><token type="NameFunction"/><token type="Punctuation"/><token type="TextWhitespace"/></bygroups><push state="filter-markup"/></rule>
+    </state>
+    <state name="filter-markup">
+      <rule pattern="\|"><token type="Punctuation"/><pop depth="1"/></rule>
+      <rule><include state="end-of-tag"/></rule>
+      <rule><include state="default-param-markup"/></rule>
+    </state>
+    <state name="condition">
+      <rule><include state="end-of-block"/></rule>
+      <rule><include state="whitespace"/></rule>
+      <rule pattern="([^\s=!&gt;&lt;]+)(\s*)([=!&gt;&lt;]=?)(\s*)(\S+)(\s*)(%\})"><bygroups><usingself state="generic"/><token type="TextWhitespace"/><token type="Operator"/><token type="TextWhitespace"/><usingself state="generic"/><token type="TextWhitespace"/><token type="Punctuation"/></bygroups></rule>
+      <rule pattern="\b!"><token type="Operator"/></rule>
+      <rule pattern="\bnot\b"><token type="OperatorWord"/></rule>
+      <rule pattern="([\w.\&#x27;&quot;]+)(\s+)(contains)(\s+)([\w.\&#x27;&quot;]+)"><bygroups><usingself state="generic"/><token type="TextWhitespace"/><token type="OperatorWord"/><token type="TextWhitespace"/><usingself state="generic"/></bygroups></rule>
+      <rule><include state="generic"/></rule>
+      <rule><include state="whitespace"/></rule>
+    </state>
+    <state name="generic-value">
+      <rule><include state="generic"/></rule>
+      <rule><include state="end-at-whitespace"/></rule>
+    </state>
+    <state name="operator">
+      <rule pattern="(\s*)((=|!|&gt;|&lt;)=?)(\s*)"><bygroups><token type="TextWhitespace"/><token type="Operator"/>None<token type="TextWhitespace"/></bygroups><pop depth="1"/></rule>
+      <rule pattern="(\s*)(\bcontains\b)(\s*)"><bygroups><token type="TextWhitespace"/><token type="OperatorWord"/><token type="TextWhitespace"/></bygroups><pop depth="1"/></rule>
+    </state>
+    <state name="end-of-tag">
+      <rule pattern="\}\}"><token type="Punctuation"/><pop depth="1"/></rule>
+    </state>
+    <state name="end-of-block">
+      <rule pattern="%\}"><token type="Punctuation"/><push state="#pop" state="#pop"/></rule>
+    </state>
+    <state name="end-at-whitespace">
+      <rule pattern="\s+"><token type="TextWhitespace"/><pop depth="1"/></rule>
+    </state>
+    <state name="param-markup">
+      <rule><include state="whitespace"/></rule>
+      <rule pattern="([^\s=:]+)(\s*)(=|:)"><bygroups><token type="NameAttribute"/><token type="TextWhitespace"/><token type="Operator"/></bygroups></rule>
+      <rule pattern="(\{\{)(\s*)([^\s}])(\s*)(\}\})"><bygroups><token type="Punctuation"/><token type="TextWhitespace"/><usingself state="variable"/><token type="TextWhitespace"/><token type="Punctuation"/></bygroups></rule>
+      <rule><include state="string"/></rule>
+      <rule><include state="number"/></rule>
+      <rule><include state="keyword"/></rule>
+      <rule pattern=","><token type="Punctuation"/></rule>
+    </state>
+    <state name="default-param-markup">
+      <rule><include state="param-markup"/></rule>
+      <rule pattern="."><token type="Text"/></rule>
+    </state>
+    <state name="variable-param-markup">
+      <rule><include state="param-markup"/></rule>
+      <rule><include state="variable"/></rule>
+      <rule pattern="."><token type="Text"/></rule>
+    </state>
+    <state name="tag-markup">
+      <rule pattern="%\}"><token type="Punctuation"/><push state="#pop" state="#pop"/></rule>
+      <rule><include state="default-param-markup"/></rule>
+    </state>
+    <state name="variable-tag-markup">
+      <rule pattern="%\}"><token type="Punctuation"/><push state="#pop" state="#pop"/></rule>
+      <rule><include state="variable-param-markup"/></rule>
+    </state>
+    <state name="keyword">
+      <rule pattern="\b(false|true)\b"><token type="KeywordConstant"/></rule>
+    </state>
+    <state name="variable">
+      <rule pattern="[a-zA-Z_]\w*"><token type="NameVariable"/></rule>
+      <rule pattern="(?&lt;=\w)\.(?=\w)"><token type="Punctuation"/></rule>
+    </state>
+    <state name="string">
+      <rule pattern="&#x27;[^&#x27;]*&#x27;"><token type="LiteralStringSingle"/></rule>
+      <rule pattern="&quot;[^&quot;]*&quot;"><token type="LiteralStringDouble"/></rule>
+    </state>
+    <state name="number">
+      <rule pattern="\d+\.\d+"><token type="LiteralNumberFloat"/></rule>
+      <rule pattern="\d+"><token type="LiteralNumberInteger"/></rule>
+    </state>
+    <state name="generic">
+      <rule><include state="keyword"/></rule>
+      <rule><include state="string"/></rule>
+      <rule><include state="number"/></rule>
+      <rule><include state="variable"/></rule>
+    </state>
+    <state name="whitespace">
+      <rule pattern="[ \t]+"><token type="TextWhitespace"/></rule>
+    </state>
+    <state name="comment">
+      <rule pattern="(\{%)(\s*)(endcomment)(\s*)(%\})"><bygroups><token type="Punctuation"/><token type="TextWhitespace"/><token type="NameTag"/><token type="TextWhitespace"/><token type="Punctuation"/></bygroups><push state="#pop" state="#pop"/></rule>
+      <rule pattern="."><token type="Comment"/></rule>
+    </state>
+    <state name="raw">
+      <rule pattern="[^{]+"><token type="Text"/></rule>
+      <rule pattern="(\{%)(\s*)(endraw)(\s*)(%\})"><bygroups><token type="Punctuation"/><token type="TextWhitespace"/><token type="NameTag"/><token type="TextWhitespace"/><token type="Punctuation"/></bygroups><pop depth="1"/></rule>
+      <rule pattern="\{"><token type="Text"/></rule>
+    </state>
+  </rules>
+</lexer>
--- a/lexers/VelocityLexer.xml
+++ b/lexers/VelocityLexer.xml
@@ -0,0 +1,54 @@
+
+<lexer>
+  <config>
+    <name>Velocity</name>
+    <alias>velocity</alias>
+    <filename>*.vm</filename>
+    <filename>*.fhtml</filename>
+    <dot_all>true</dot_all>
+  </config>
+  <rules>
+    <state name="root">
+      <rule pattern="[^{#$]+"><token type="Other"/></rule>
+      <rule pattern="(#)(\*.*?\*)(#)"><bygroups><token type="CommentPreproc"/><token type="Comment"/><token type="CommentPreproc"/></bygroups></rule>
+      <rule pattern="(##)(.*?$)"><bygroups><token type="CommentPreproc"/><token type="Comment"/></bygroups></rule>
+      <rule pattern="(#\{?)([a-zA-Z_]\w*)(\}?)(\s?\()"><bygroups><token type="CommentPreproc"/><token type="NameFunction"/><token type="CommentPreproc"/><token type="Punctuation"/></bygroups><push state="directiveparams"/></rule>
+      <rule pattern="(#\{?)([a-zA-Z_]\w*)(\}|\b)"><bygroups><token type="CommentPreproc"/><token type="NameFunction"/><token type="CommentPreproc"/></bygroups></rule>
+      <rule pattern="\$!?\{?"><token type="Punctuation"/><push state="variable"/></rule>
+    </state>
+    <state name="variable">
+      <rule pattern="[a-zA-Z_]\w*"><token type="NameVariable"/></rule>
+      <rule pattern="\("><token type="Punctuation"/><push state="funcparams"/></rule>
+      <rule pattern="(\.)([a-zA-Z_]\w*)"><bygroups><token type="Punctuation"/><token type="NameVariable"/></bygroups><push/></rule>
+      <rule pattern="\}"><token type="Punctuation"/><pop depth="1"/></rule>
+      <rule><pop depth="1"/></rule>
+    </state>
+    <state name="directiveparams">
+      <rule pattern="(&amp;&amp;|\|\||==?|!=?|[-&lt;&gt;+*%&amp;|^/])|\b(eq|ne|gt|lt|ge|le|not|in)\b"><token type="Operator"/></rule>
+      <rule pattern="\["><token type="Operator"/><push state="rangeoperator"/></rule>
+      <rule pattern="\b[a-zA-Z_]\w*\b"><token type="NameFunction"/></rule>
+      <rule><include state="funcparams"/></rule>
+    </state>
+    <state name="rangeoperator">
+      <rule pattern="\.\."><token type="Operator"/></rule>
+      <rule><include state="funcparams"/></rule>
+      <rule pattern="\]"><token type="Operator"/><pop depth="1"/></rule>
+    </state>
+    <state name="funcparams">
+      <rule pattern="\$!?\{?"><token type="Punctuation"/><push state="variable"/></rule>
+      <rule pattern="\s+"><token type="Text"/></rule>
+      <rule pattern="[,:]"><token type="Punctuation"/></rule>
+      <rule pattern="&quot;(\\\\|\\[^\\]|[^&quot;\\])*&quot;"><token type="LiteralStringDouble"/></rule>
+      <rule pattern="&#x27;(\\\\|\\[^\\]|[^&#x27;\\])*&#x27;"><token type="LiteralStringSingle"/></rule>
+      <rule pattern="0[xX][0-9a-fA-F]+[Ll]?"><token type="LiteralNumber"/></rule>
+      <rule pattern="\b[0-9]+\b"><token type="LiteralNumber"/></rule>
+      <rule pattern="(true|false|null)\b"><token type="KeywordConstant"/></rule>
+      <rule pattern="\("><token type="Punctuation"/><push/></rule>
+      <rule pattern="\)"><token type="Punctuation"/><pop depth="1"/></rule>
+      <rule pattern="\{"><token type="Punctuation"/><push/></rule>
+      <rule pattern="\}"><token type="Punctuation"/><pop depth="1"/></rule>
+      <rule pattern="\["><token type="Punctuation"/><push/></rule>
+      <rule pattern="\]"><token type="Punctuation"/><pop depth="1"/></rule>
+    </state>
+  </rules>
+</lexer>
--- a/lexers/agda.xml
+++ b/lexers/agda.xml
@@ -63,4 +63,3 @@
    </state>
  </rules>
 </lexer>
-
--- a/lexers/alloy.xml
+++ b/lexers/alloy.xml
@@ -55,4 +55,3 @@
    </state>
  </rules>
 </lexer>
-
--- a/lexers/atl.xml
+++ b/lexers/atl.xml
@@ -0,0 +1,165 @@
+<lexer>
+  <config>
+    <name>ATL</name>
+    <alias>atl</alias>
+    <filename>*.atl</filename>
+    <mime_type>text/x-atl</mime_type>
+    <dot_all>true</dot_all>
+  </config>
+  <rules>
+    <state name="root">
+      <rule pattern="(--.*?)(\n)">
+        <bygroups>
+          <token type="CommentSingle" />
+          <token type="TextWhitespace" />
+        </bygroups>
+      </rule>
+      <rule pattern="(and|distinct|endif|else|for|foreach|if|implies|in|let|not|or|self|super|then|thisModule|xor)\b">
+        <token type="Keyword" />
+      </rule>
+      <rule pattern="(OclUndefined|true|false|#\w+)\b">
+        <token type="KeywordConstant" />
+      </rule>
+      <rule pattern="(module|query|library|create|from|to|uses)\b">
+         <token type="KeywordNamespace" />
+      </rule>
+      <rule pattern="(do)(\s*)({)">
+        <bygroups>
+          <token type="KeywordNamespace" />
+          <token type="TextWhitespace" />
+          <token type="Punctuation" />
+        </bygroups>
+      </rule>
+      <rule pattern="(abstract|endpoint|entrypoint|lazy|unique)(\s+)">
+        <bygroups>
+          <token type="KeywordDeclaration" />
+          <token type="TextWhitespace" />
+        </bygroups>
+      </rule>
+      <rule pattern="(rule)(\s+)">
+        <bygroups>
+          <token type="KeywordNamespace" />
+          <token type="TextWhitespace" />
+        </bygroups>
+      </rule>
+      <rule pattern="(helper)(\s+)">
+        <bygroups>
+          <token type="KeywordNamespace" />
+          <token type="TextWhitespace" />
+        </bygroups>
+      </rule>
+      <rule pattern="(context)(\s+)">
+        <bygroups>
+          <token type="KeywordNamespace" />
+          <token type="TextWhitespace" />
+        </bygroups>
+      </rule>
+      <rule pattern="(def)(\s*)(:)(\s*)">
+        <bygroups>
+          <token type="KeywordNamespace" />
+          <token type="TextWhitespace" />
+          <token type="Punctuation" />
+          <token type="TextWhitespace" />
+        </bygroups>
+      </rule>
+      <rule pattern="(Bag|Boolean|Integer|OrderedSet|Real|Sequence|Set|String|Tuple)">
+        <token type="KeywordType" />
+      </rule>
+      <rule pattern="(\w+)(\s*)(&lt;-|&lt;:=)">
+        <bygroups>
+          <token type="NameNamespace" />
+          <token type="TextWhitespace" />
+          <token type="Punctuation" />
+        </bygroups>
+      </rule>
+      <rule pattern="#&quot;">
+        <token type="KeywordConstant" />
+        <push state="quotedenumliteral" />
+      </rule>
+      <rule pattern="&quot;">
+        <token type="NameNamespace" />
+        <push state="quotedname" />
+      </rule>
+      <rule pattern="[^\S\n]+">
+        <token type="TextWhitespace" />
+      </rule>
+      <rule pattern="&#x27;">
+        <token type="LiteralString" />
+        <push state="string" />
+      </rule>
+      <rule
+        pattern="[0-9]*\.[0-9]+">
+        <token type="LiteralNumberFloat" />
+      </rule>
+      <rule pattern="0|[1-9][0-9]*">
+        <token type="LiteralNumberInteger" />
+      </rule>
+      <rule pattern="[*&lt;&gt;+=/-]">
+        <token type="Operator" />
+      </rule>
+      <rule pattern="([{}();:.,!|]|-&gt;)">
+        <token type="Punctuation" />
+      </rule>
+      <rule pattern="\n">
+        <token type="TextWhitespace" />
+      </rule>
+      <rule pattern="\w+">
+        <token type="NameNamespace" />
+      </rule>
+    </state>
+    <state name="string">
+      <rule pattern="[^\\&#x27;]+">
+        <token type="LiteralString" />
+      </rule>
+      <rule pattern="\\\\">
+        <token type="LiteralString" />
+      </rule>
+      <rule pattern="\\&#x27;">
+        <token type="LiteralString" />
+      </rule>
+      <rule pattern="\\">
+        <token type="LiteralString" />
+      </rule>
+      <rule pattern="&#x27;">
+        <token type="LiteralString" />
+        <pop depth="1" />
+      </rule>
+    </state>
+    <state name="quotedname">
+      <rule pattern="[^\\&quot;]+">
+        <token type="NameNamespace" />
+      </rule>
+      <rule pattern="\\\\">
+        <token type="NameNamespace" />
+      </rule>
+      <rule pattern="\\&quot;">
+        <token type="NameNamespace" />
+      </rule>
+      <rule pattern="\\">
+        <token type="NameNamespace" />
+      </rule>
+      <rule pattern="&quot;">
+        <token type="NameNamespace" />
+        <pop depth="1" />
+      </rule>
+    </state>
+    <state name="quotedenumliteral">
+      <rule pattern="[^\\&quot;]+">
+        <token type="KeywordConstant" />
+      </rule>
+      <rule pattern="\\\\">
+        <token type="KeywordConstant" />
+      </rule>
+      <rule pattern="\\&quot;">
+        <token type="KeywordConstant" />
+      </rule>
+      <rule pattern="\\">
+        <token type="KeywordConstant" />
+      </rule>
+      <rule pattern="&quot;">
+        <token type="KeywordConstant" />
+        <pop depth="1" />
+      </rule>
+    </state>
+  </rules>
+</lexer>
--- a/lexers/autohotkey.xml
+++ b/lexers/autohotkey.xml
@@ -75,4 +75,3 @@
    </state>
  </rules>
 </lexer>
-
--- a/lexers/autoit.xml
+++ b/lexers/autoit.xml
@@ -67,4 +67,3 @@
    </state>
  </rules>
 </lexer>
-
--- a/lexers/bbcode.xml
+++ b/lexers/bbcode.xml
@@ -0,0 +1,21 @@
+
+<lexer>
+  <config>
+    <name>BBCode</name>
+    <alias>bbcode</alias>
+    <mime_type>text/x-bbcode</mime_type>
+  </config>
+  <rules>
+    <state name="root">
+      <rule pattern="[^[]+"><token type="Text"/></rule>
+      <rule pattern="\[/?\w+"><token type="Keyword"/><push state="tag"/></rule>
+      <rule pattern="\["><token type="Text"/></rule>
+    </state>
+    <state name="tag">
+      <rule pattern="\s+"><token type="Text"/></rule>
+      <rule pattern="(\w+)(=)(&quot;?[^\s&quot;\]]+&quot;?)"><bygroups><token type="NameAttribute"/><token type="Operator"/><token type="LiteralString"/></bygroups></rule>
+      <rule pattern="(=)(&quot;?[^\s&quot;\]]+&quot;?)"><bygroups><token type="Operator"/><token type="LiteralString"/></bygroups></rule>
+      <rule pattern="\]"><token type="Keyword"/><pop depth="1"/></rule>
+    </state>
+  </rules>
+</lexer>
--- a/lexers/beef.xml
+++ b/lexers/beef.xml
@@ -0,0 +1,120 @@
+<lexer>
+  <config>
+    <name>Beef</name>
+    <alias>beef</alias>
+    <filename>*.bf</filename>
+    <mime_type>text/x-beef</mime_type>
+    <dot_all>true</dot_all>
+    <ensure_nl>true</ensure_nl>
+  </config>
+  <rules>
+    <state name="root">
+      <rule pattern="^\s*\[.*?\]">
+        <token type="NameAttribute"/>
+      </rule>
+      <rule pattern="[^\S\n]+">
+        <token type="Text"/>
+      </rule>
+      <rule pattern="\\\n">
+        <token type="Text"/>
+      </rule>
+      <rule pattern="///[^\n\r]*">
+        <token type="CommentSpecial"/>
+      </rule>
+      <rule pattern="//[^\n\r]*">
+        <token type="CommentSingle"/>
+      </rule>
+      <rule pattern="/[*].*?[*]/">
+        <token type="CommentMultiline"/>
+      </rule>
+      <rule pattern="\n">
+        <token type="Text"/>
+      </rule>
+      <rule pattern="[~!%^&amp;*()+=|\[\]:;,.&lt;&gt;/?-]">
+        <token type="Punctuation"/>
+      </rule>
+      <rule pattern="[{}]">
+        <token type="Punctuation"/>
+      </rule>
+      <rule pattern="@&#34;(&#34;&#34;|[^&#34;])*&#34;">
+        <token type="LiteralString"/>
+      </rule>
+      <rule pattern="\$@?&#34;(&#34;&#34;|[^&#34;])*&#34;">
+        <token type="LiteralString"/>
+      </rule>
+      <rule pattern="&#34;(\\\\|\\&#34;|[^&#34;\n])*[&#34;\n]">
+        <token type="LiteralString"/>
+      </rule>
+      <rule pattern="&#39;\\.&#39;|&#39;[^\\]&#39;">
+        <token type="LiteralStringChar"/>
+      </rule>
+      <rule pattern="0[xX][0-9a-fA-F]+[Ll]?|\d[_\d]*(\.\d*)?([eE][+-]?\d+)?[flFLdD]?">
+        <token type="LiteralNumber"/>
+      </rule>
+      <rule pattern="#[ \t]*(if|endif|else|elif|define|undef|line|error|warning|region|endregion|pragma|nullable)\b">
+        <token type="CommentPreproc"/>
+      </rule>
+      <rule pattern="\b(extern)(\s+)(alias)\b">
+        <bygroups>
+          <token type="Keyword"/>
+          <token type="Text"/>
+          <token type="Keyword"/>
+        </bygroups>
+      </rule>
+      <rule pattern="(as|await|base|break|by|case|catch|checked|continue|default|delegate|else|event|finally|fixed|for|repeat|goto|if|in|init|is|let|lock|new|scope|on|out|params|readonly|ref|return|sizeof|stackalloc|switch|this|throw|try|typeof|unchecked|virtual|void|while|get|set|new|yield|add|remove|value|alias|ascending|descending|from|group|into|orderby|select|thenby|where|join|equals)\b">
+        <token type="Keyword"/>
+      </rule>
+      <rule pattern="(global)(::)">
+        <bygroups>
+          <token type="Keyword"/>
+          <token type="Punctuation"/>
+        </bygroups>
+      </rule>
+      <rule pattern="(abstract|async|const|enum|explicit|extern|implicit|internal|operator|override|partial|extension|private|protected|public|static|sealed|unsafe|volatile)\b">
+        <token type="KeywordDeclaration"/>
+      </rule>
+      <rule pattern="(bool|byte|char8|char16|char32|decimal|double|float|int|int8|int16|int32|int64|long|object|sbyte|short|string|uint|uint8|uint16|uint32|uint64|uint|let|var)\b\??">
+        <token type="KeywordType"/>
+      </rule>
+      <rule pattern="(true|false|null)\b">
+        <token type="KeywordConstant"/>
+      </rule>
+      <rule pattern="(class|struct|record|interface)(\s+)">
+        <bygroups>
+          <token type="Keyword"/>
+          <token type="Text"/>
+        </bygroups>
+        <push state="class"/>
+      </rule>
+      <rule pattern="(namespace|using)(\s+)">
+        <bygroups>
+          <token type="Keyword"/>
+          <token type="Text"/>
+        </bygroups>
+        <push state="namespace"/>
+      </rule>
+      <rule pattern="@?[_a-zA-Z]\w*">
+        <token type="Name"/>
+      </rule>
+    </state>
+    <state name="class">
+      <rule pattern="@?[_a-zA-Z]\w*">
+        <token type="NameClass"/>
+        <pop depth="1"/>
+      </rule>
+      <rule>
+        <pop depth="1"/>
+      </rule>
+    </state>
+    <state name="namespace">
+      <rule pattern="(?=\()">
+        <token type="Text"/>
+        <pop depth="1"/>
+      </rule>
+      <rule pattern="(@?[_a-zA-Z]\w*|\.)+">
+        <token type="NameNamespace"/>
+        <pop depth="1"/>
+      </rule>
+    </state>
+  </rules>
+</lexer>
--- a/lexers/crystal.xml
+++ b/lexers/crystal.xml
@@ -1,762 +0,0 @@
-<lexer>
-  <config>
-    <name>Crystal</name>
-    <alias>cr</alias>
-    <alias>crystal</alias>
-    <filename>*.cr</filename>
-    <mime_type>text/x-crystal</mime_type>
-    <dot_all>true</dot_all>
-  </config>
-  <rules>
-    <state name="pa-intp-string">
-      <rule pattern="\\[\(]">
-        <token type="LiteralStringOther"/>
-      </rule>
-      <rule pattern="\(">
-        <token type="LiteralStringOther"/>
-        <push/>
-      </rule>
-      <rule pattern="\)">
-        <token type="LiteralStringOther"/>
-        <pop depth="1"/>
-      </rule>
-      <rule>
-        <include state="string-intp-escaped"/>
-      </rule>
-      <rule pattern="[\\#()]">
-        <token type="LiteralStringOther"/>
-      </rule>
-      <rule pattern="[^\\#()]+">
-        <token type="LiteralStringOther"/>
-      </rule>
-    </state>
-    <state name="ab-regex">
-      <rule pattern="\\[\\&lt;&gt;]">
-        <token type="LiteralStringRegex"/>
-      </rule>
-      <rule pattern="&lt;">
-        <token type="LiteralStringRegex"/>
-        <push/>
-      </rule>
-      <rule pattern="&gt;[imsx]*">
-        <token type="LiteralStringRegex"/>
-        <pop depth="1"/>
-      </rule>
-      <rule>
-        <include state="string-intp"/>
-      </rule>
-      <rule pattern="[\\#&lt;&gt;]">
-        <token type="LiteralStringRegex"/>
-      </rule>
-      <rule pattern="[^\\#&lt;&gt;]+">
-        <token type="LiteralStringRegex"/>
-      </rule>
-    </state>
-    <state name="cb-regex">
-      <rule pattern="\\[\\{}]">
-        <token type="LiteralStringRegex"/>
-      </rule>
-      <rule pattern="\{">
-        <token type="LiteralStringRegex"/>
-        <push/>
-      </rule>
-      <rule pattern="\}[imsx]*">
-        <token type="LiteralStringRegex"/>
-        <pop depth="1"/>
-      </rule>
-      <rule>
-        <include state="string-intp"/>
-      </rule>
-      <rule pattern="[\\#{}]">
-        <token type="LiteralStringRegex"/>
-      </rule>
-      <rule pattern="[^\\#{}]+">
-        <token type="LiteralStringRegex"/>
-      </rule>
-    </state>
-    <state name="simple-backtick">
-      <rule>
-        <include state="string-intp-escaped"/>
-      </rule>
-      <rule pattern="[^\\`#]+">
-        <token type="LiteralStringBacktick"/>
-      </rule>
-      <rule pattern="[\\#]">
-        <token type="LiteralStringBacktick"/>
-      </rule>
-      <rule pattern="`">
-        <token type="LiteralStringBacktick"/>
-        <pop depth="1"/>
-      </rule>
-    </state>
-    <state name="string-intp">
-      <rule pattern="#\{">
-        <token type="LiteralStringInterpol"/>
-        <push state="in-intp"/>
-      </rule>
-    </state>
-    <state name="interpolated-regex">
-      <rule>
-        <include state="string-intp"/>
-      </rule>
-      <rule pattern="[\\#]">
-        <token type="LiteralStringRegex"/>
-      </rule>
-      <rule pattern="[^\\#]+">
-        <token type="LiteralStringRegex"/>
-      </rule>
-    </state>
-    <state name="cb-string">
-      <rule pattern="\\[\\{}]">
-        <token type="LiteralStringOther"/>
-      </rule>
-      <rule pattern="\{">
-        <token type="LiteralStringOther"/>
-        <push/>
-      </rule>
-      <rule pattern="\}">
-        <token type="LiteralStringOther"/>
-        <pop depth="1"/>
-      </rule>
-      <rule pattern="[\\#{}]">
-        <token type="LiteralStringOther"/>
-      </rule>
-      <rule pattern="[^\\#{}]+">
-        <token type="LiteralStringOther"/>
-      </rule>
-    </state>
-    <state name="in-macro-control">
-      <rule pattern="\{%">
-        <token type="LiteralStringInterpol"/>
-        <push/>
-      </rule>
-      <rule pattern="%\}">
-        <token type="LiteralStringInterpol"/>
-        <pop depth="1"/>
-      </rule>
-      <rule pattern="for\b|in\b">
-        <token type="Keyword"/>
-      </rule>
-      <rule>
-        <include state="root"/>
-      </rule>
-    </state>
-    <state name="interpolated-string">
-      <rule>
-        <include state="string-intp"/>
-      </rule>
-      <rule pattern="[\\#]">
-        <token type="LiteralStringOther"/>
-      </rule>
-      <rule pattern="[^\\#]+">
-        <token type="LiteralStringOther"/>
-      </rule>
-    </state>
-    <state name="in-macro-expr">
-      <rule pattern="\{\{">
-        <token type="LiteralStringInterpol"/>
-        <push/>
-      </rule>
-      <rule pattern="\}\}">
-        <token type="LiteralStringInterpol"/>
-        <pop depth="1"/>
-      </rule>
-      <rule>
-        <include state="root"/>
-      </rule>
-    </state>
-    <state name="simple-string">
-      <rule>
-        <include state="string-intp-escaped"/>
-      </rule>
-      <rule pattern="[^\\&#34;#]+">
-        <token type="LiteralStringDouble"/>
-      </rule>
-      <rule pattern="[\\#]">
-        <token type="LiteralStringDouble"/>
-      </rule>
-      <rule pattern="&#34;">
-        <token type="LiteralStringDouble"/>
-        <pop depth="1"/>
-      </rule>
-    </state>
-    <state name="cb-intp-string">
-      <rule pattern="\\[\{]">
-        <token type="LiteralStringOther"/>
-      </rule>
-      <rule pattern="\{">
-        <token type="LiteralStringOther"/>
-        <push/>
-      </rule>
-      <rule pattern="\}">
-        <token type="LiteralStringOther"/>
-        <pop depth="1"/>
-      </rule>
-      <rule>
-        <include state="string-intp-escaped"/>
-      </rule>
-      <rule pattern="[\\#{}]">
-        <token type="LiteralStringOther"/>
-      </rule>
-      <rule pattern="[^\\#{}]+">
-        <token type="LiteralStringOther"/>
-      </rule>
-    </state>
-    <state name="string-intp-escaped">
-      <rule>
-        <include state="string-intp"/>
-      </rule>
-      <rule>
-        <include state="string-escaped"/>
-      </rule>
-    </state>
-    <state name="sb-regex">
-      <rule pattern="\\[\\\[\]]">
-        <token type="LiteralStringRegex"/>
-      </rule>
-      <rule pattern="\[">
-        <token type="LiteralStringRegex"/>
-        <push/>
-      </rule>
-      <rule pattern="\][imsx]*">
-        <token type="LiteralStringRegex"/>
-        <pop depth="1"/>
-      </rule>
-      <rule>
-        <include state="string-intp"/>
-      </rule>
-      <rule pattern="[\\#\[\]]">
-        <token type="LiteralStringRegex"/>
-      </rule>
-      <rule pattern="[^\\#\[\]]+">
-        <token type="LiteralStringRegex"/>
-      </rule>
-    </state>
-    <state name="classname">
-      <rule pattern="[A-Z_]\w*">
-        <token type="NameClass"/>
-      </rule>
-      <rule pattern="(\()(\s*)([A-Z_]\w*)(\s*)(\))">
-        <bygroups>
-          <token type="Punctuation"/>
-          <token type="Text"/>
-          <token type="NameClass"/>
-          <token type="Text"/>
-          <token type="Punctuation"/>
-        </bygroups>
-      </rule>
-      <rule>
-        <pop depth="1"/>
-      </rule>
-    </state>
-    <state name="string-escaped">
-      <rule pattern="\\([\\befnstv#&#34;\&#39;]|x[a-fA-F0-9]{1,2}|[0-7]{1,3})">
-        <token type="LiteralStringEscape"/>
-      </rule>
-    </state>
-    <state name="sb-intp-string">
-      <rule pattern="\\[\[]">
-        <token type="LiteralStringOther"/>
-      </rule>
-      <rule pattern="\[">
-        <token type="LiteralStringOther"/>
-        <push/>
-      </rule>
-      <rule pattern="\]">
-        <token type="LiteralStringOther"/>
-        <pop depth="1"/>
-      </rule>
-      <rule>
-        <include state="string-intp-escaped"/>
-      </rule>
-      <rule pattern="[\\#\[\]]">
-        <token type="LiteralStringOther"/>
-      </rule>
-      <rule pattern="[^\\#\[\]]+">
-        <token type="LiteralStringOther"/>
-      </rule>
-    </state>
-    <state name="pa-regex">
-      <rule pattern="\\[\\()]">
-        <token type="LiteralStringRegex"/>
-      </rule>
-      <rule pattern="\(">
-        <token type="LiteralStringRegex"/>
-        <push/>
-      </rule>
-      <rule pattern="\)[imsx]*">
-        <token type="LiteralStringRegex"/>
-        <pop depth="1"/>
-      </rule>
-      <rule>
-        <include state="string-intp"/>
-      </rule>
-      <rule pattern="[\\#()]">
-        <token type="LiteralStringRegex"/>
-      </rule>
-      <rule pattern="[^\\#()]+">
-        <token type="LiteralStringRegex"/>
-      </rule>
-    </state>
-    <state name="in-attr">
-      <rule pattern="\[">
-        <token type="Operator"/>
-        <push/>
-      </rule>
-      <rule pattern="\]">
-        <token type="Operator"/>
-        <pop depth="1"/>
-      </rule>
-      <rule>
-        <include state="root"/>
-      </rule>
-    </state>
-    <state name="ab-intp-string">
-      <rule pattern="\\[&lt;]">
-        <token type="LiteralStringOther"/>
-      </rule>
-      <rule pattern="&lt;">
-        <token type="LiteralStringOther"/>
-        <push/>
-      </rule>
-      <rule pattern="&gt;">
-        <token type="LiteralStringOther"/>
-        <pop depth="1"/>
-      </rule>
-      <rule>
-        <include state="string-intp-escaped"/>
-      </rule>
-      <rule pattern="[\\#&lt;&gt;]">
-        <token type="LiteralStringOther"/>
-      </rule>
-      <rule pattern="[^\\#&lt;&gt;]+">
-        <token type="LiteralStringOther"/>
-      </rule>
-    </state>
-    <state name="in-intp">
-      <rule pattern="\{">
-        <token type="LiteralStringInterpol"/>
-        <push/>
-      </rule>
-      <rule pattern="\}">
-        <token type="LiteralStringInterpol"/>
-        <pop depth="1"/>
-      </rule>
-      <rule>
-        <include state="root"/>
-      </rule>
-    </state>
-    <state name="end-part">
-      <rule pattern=".+">
-        <token type="CommentPreproc"/>
-        <pop depth="1"/>
-      </rule>
-    </state>
-    <state name="root">
-      <rule pattern="#.*?$">
-        <token type="CommentSingle"/>
-      </rule>
-      <rule pattern="(instance_sizeof|pointerof|protected|abstract|require|private|include|unless|typeof|sizeof|return|extend|ensure|rescue|ifdef|super|break|begin|until|while|elsif|yield|next|when|else|then|case|with|end|asm|if|do|as|of)\b">
-        <token type="Keyword"/>
-      </rule>
-      <rule pattern="(false|true|nil)\b">
-        <token type="KeywordConstant"/>
-      </rule>
-      <rule pattern="(module|lib)(\s+)([a-zA-Z_]\w*(?:::[a-zA-Z_]\w*)*)">
-        <bygroups>
-          <token type="Keyword"/>
-          <token type="Text"/>
-          <token type="NameNamespace"/>
-        </bygroups>
-      </rule>
-      <rule pattern="(def|fun|macro)(\s+)((?:[a-zA-Z_]\w*::)*)">
-        <bygroups>
-          <token type="Keyword"/>
-          <token type="Text"/>
-          <token type="NameNamespace"/>
-        </bygroups>
-        <push state="funcname"/>
-      </rule>
-      <rule pattern="def(?=[*%&amp;^`~+-/\[&lt;&gt;=])">
-        <token type="Keyword"/>
-        <push state="funcname"/>
-      </rule>
-      <rule pattern="(class|struct|union|type|alias|enum)(\s+)((?:[a-zA-Z_]\w*::)*)">
-        <bygroups>
-          <token type="Keyword"/>
-          <token type="Text"/>
-          <token type="NameNamespace"/>
-        </bygroups>
-        <push state="classname"/>
-      </rule>
-      <rule pattern="(self|out|uninitialized)\b|(is_a|responds_to)\?">
-        <token type="KeywordPseudo"/>
-      </rule>
-      <rule pattern="(def_equals_and_hash|assert_responds_to|forward_missing_to|def_equals|property|def_hash|parallel|delegate|debugger|getter|record|setter|spawn|pp)\b">
-        <token type="NameBuiltinPseudo"/>
-      </rule>
-      <rule pattern="getter[!?]|property[!?]|__(DIR|FILE|LINE)__\b">
-        <token type="NameBuiltinPseudo"/>
-      </rule>
-      <rule pattern="(?&lt;!\.)(get_stack_top|StaticArray|Concurrent|with_color|Reference|Scheduler|read_line|Exception|at_exit|Pointer|Channel|Float64|sprintf|Float32|Process|Object|Struct|caller|UInt16|UInt32|UInt64|system|future|Number|printf|String|Symbol|Int32|Range|Slice|Regex|Mutex|sleep|Array|Class|raise|Tuple|Deque|delay|Float|Int16|print|abort|Value|UInt8|Int64|puts|Proc|File|Void|exit|fork|Bool|Char|gets|lazy|loop|main|rand|Enum|Int8|Time|Hash|Set|Box|Nil|Dir|Int|p)\b">
-        <token type="NameBuiltin"/>
-      </rule>
-      <rule pattern="(?&lt;!\w)(&lt;&lt;-?)([&#34;`\&#39;]?)([a-zA-Z_]\w*)(\2)(.*?\n)">
-        <token type="LiteralStringHeredoc"/>
-      </rule>
-      <rule pattern="(&lt;&lt;-?)(&#34;|\&#39;)()(\2)(.*?\n)">
-        <token type="LiteralStringHeredoc"/>
-      </rule>
-      <rule pattern="__END__">
-        <token type="CommentPreproc"/>
-        <push state="end-part"/>
-      </rule>
-      <rule pattern="(?:^|(?&lt;=[=&lt;&gt;~!:])|(?&lt;=(?:\s|;)when\s)|(?&lt;=(?:\s|;)or\s)|(?&lt;=(?:\s|;)and\s)|(?&lt;=\.index\s)|(?&lt;=\.scan\s)|(?&lt;=\.sub\s)|(?&lt;=\.sub!\s)|(?&lt;=\.gsub\s)|(?&lt;=\.gsub!\s)|(?&lt;=\.match\s)|(?&lt;=(?:\s|;)if\s)|(?&lt;=(?:\s|;)elsif\s)|(?&lt;=^when\s)|(?&lt;=^index\s)|(?&lt;=^scan\s)|(?&lt;=^sub\s)|(?&lt;=^gsub\s)|(?&lt;=^sub!\s)|(?&lt;=^gsub!\s)|(?&lt;=^match\s)|(?&lt;=^if\s)|(?&lt;=^elsif\s))(\s*)(/)">
-        <bygroups>
-          <token type="Text"/>
-          <token type="LiteralStringRegex"/>
-        </bygroups>
-        <push state="multiline-regex"/>
-      </rule>
-      <rule pattern="(?&lt;=\(|,|\[)/">
-        <token type="LiteralStringRegex"/>
-        <push state="multiline-regex"/>
-      </rule>
-      <rule pattern="(\s+)(/)(?![\s=])">
-        <bygroups>
-          <token type="Text"/>
-          <token type="LiteralStringRegex"/>
-        </bygroups>
-        <push state="multiline-regex"/>
-      </rule>
-      <rule pattern="(0o[0-7]+(?:_[0-7]+)*(?:_?[iu][0-9]+)?)\b(\s*)([/?])?">
-        <bygroups>
-          <token type="LiteralNumberOct"/>
-          <token type="Text"/>
-          <token type="Operator"/>
-        </bygroups>
-      </rule>
-      <rule pattern="(0x[0-9A-Fa-f]+(?:_[0-9A-Fa-f]+)*(?:_?[iu][0-9]+)?)\b(\s*)([/?])?">
-        <bygroups>
-          <token type="LiteralNumberHex"/>
-          <token type="Text"/>
-          <token type="Operator"/>
-        </bygroups>
-      </rule>
-      <rule pattern="(0b[01]+(?:_[01]+)*(?:_?[iu][0-9]+)?)\b(\s*)([/?])?">
-        <bygroups>
-          <token type="LiteralNumberBin"/>
-          <token type="Text"/>
-          <token type="Operator"/>
-        </bygroups>
-      </rule>
-      <rule pattern="((?:0(?![0-9])|[1-9][\d_]*)(?:\.\d[\d_]*)(?:e[+-]?[0-9]+)?(?:_?f[0-9]+)?)(\s*)([/?])?">
-        <bygroups>
-          <token type="LiteralNumberFloat"/>
-          <token type="Text"/>
-          <token type="Operator"/>
-        </bygroups>
-      </rule>
-      <rule pattern="((?:0(?![0-9])|[1-9][\d_]*)(?:\.\d[\d_]*)?(?:e[+-]?[0-9]+)(?:_?f[0-9]+)?)(\s*)([/?])?">
-        <bygroups>
-          <token type="LiteralNumberFloat"/>
-          <token type="Text"/>
-          <token type="Operator"/>
-        </bygroups>
-      </rule>
-      <rule pattern="((?:0(?![0-9])|[1-9][\d_]*)(?:\.\d[\d_]*)?(?:e[+-]?[0-9]+)?(?:_?f[0-9]+))(\s*)([/?])?">
-        <bygroups>
-          <token type="LiteralNumberFloat"/>
-          <token type="Text"/>
-          <token type="Operator"/>
-        </bygroups>
-      </rule>
-      <rule pattern="(0\b|[1-9][\d]*(?:_\d+)*(?:_?[iu][0-9]+)?)\b(\s*)([/?])?">
-        <bygroups>
-          <token type="LiteralNumberInteger"/>
-          <token type="Text"/>
-          <token type="Operator"/>
-        </bygroups>
-      </rule>
-      <rule pattern="@@[a-zA-Z_]\w*">
-        <token type="NameVariableClass"/>
-      </rule>
-      <rule pattern="@[a-zA-Z_]\w*">
-        <token type="NameVariableInstance"/>
-      </rule>
-      <rule pattern="\$\w+">
-        <token type="NameVariableGlobal"/>
-      </rule>
-      <rule pattern="\$[!@&amp;`\&#39;+~=/\\,;.&lt;&gt;_*$?:&#34;^-]">
-        <token type="NameVariableGlobal"/>
-      </rule>
-      <rule pattern="\$-[0adFiIlpvw]">
-        <token type="NameVariableGlobal"/>
-      </rule>
-      <rule pattern="::">
-        <token type="Operator"/>
-      </rule>
-      <rule>
-        <include state="strings"/>
-      </rule>
-      <rule pattern="\?(\\[MC]-)*(\\([\\befnrtv#&#34;\&#39;]|x[a-fA-F0-9]{1,2}|[0-7]{1,3})|\S)(?!\w)">
-        <token type="LiteralStringChar"/>
-      </rule>
-      <rule pattern="[A-Z][A-Z_]+\b">
-        <token type="NameConstant"/>
-      </rule>
-      <rule pattern="\{%">
-        <token type="LiteralStringInterpol"/>
-        <push state="in-macro-control"/>
-      </rule>
-      <rule pattern="\{\{">
-        <token type="LiteralStringInterpol"/>
-        <push state="in-macro-expr"/>
-      </rule>
-      <rule pattern="(@\[)(\s*)([A-Z]\w*)">
-        <bygroups>
-          <token type="Operator"/>
-          <token type="Text"/>
-          <token type="NameDecorator"/>
-        </bygroups>
-        <push state="in-attr"/>
-      </rule>
-      <rule pattern="(\.|::)(\[\]\?|&lt;=&gt;|===|\[\]=|&gt;&gt;|&amp;&amp;|\*\*|\[\]|\|\||&gt;=|=~|!~|&lt;&lt;|&lt;=|!=|==|&lt;|/|=|-|\+|&gt;|\*|&amp;|%|\^|!|\||~)">
-        <bygroups>
-          <token type="Operator"/>
-          <token type="NameOperator"/>
-        </bygroups>
-      </rule>
-      <rule pattern="(\.|::)([a-zA-Z_]\w*[!?]?|[*%&amp;^`~+\-/\[&lt;&gt;=])">
-        <bygroups>
-          <token type="Operator"/>
-          <token type="Name"/>
-        </bygroups>
-      </rule>
-      <rule pattern="[a-zA-Z_]\w*(?:[!?](?!=))?">
-        <token type="Name"/>
-      </rule>
-      <rule pattern="(\[|\]\??|\*\*|&lt;=&gt;?|&gt;=|&lt;&lt;?|&gt;&gt;?|=~|===|!~|&amp;&amp;?|\|\||\.{1,3})">
-        <token type="Operator"/>
-      </rule>
-      <rule pattern="[-+/*%=&lt;&gt;&amp;!^|~]=?">
-        <token type="Operator"/>
-      </rule>
-      <rule pattern="[(){};,/?:\\]">
-        <token type="Punctuation"/>
-      </rule>
-      <rule pattern="\s+">
-        <token type="Text"/>
-      </rule>
-    </state>
-    <state name="multiline-regex">
-      <rule>
-        <include state="string-intp"/>
-      </rule>
-      <rule pattern="\\\\">
-        <token type="LiteralStringRegex"/>
-      </rule>
-      <rule pattern="\\/">
-        <token type="LiteralStringRegex"/>
-      </rule>
-      <rule pattern="[\\#]">
-        <token type="LiteralStringRegex"/>
-      </rule>
-      <rule pattern="[^\\/#]+">
-        <token type="LiteralStringRegex"/>
-      </rule>
-      <rule pattern="/[imsx]*">
-        <token type="LiteralStringRegex"/>
-        <pop depth="1"/>
-      </rule>
-    </state>
-    <state name="ab-string">
-      <rule pattern="\\[\\&lt;&gt;]">
-        <token type="LiteralStringOther"/>
-      </rule>
-      <rule pattern="&lt;">
-        <token type="LiteralStringOther"/>
-        <push/>
-      </rule>
-      <rule pattern="&gt;">
-        <token type="LiteralStringOther"/>
-        <pop depth="1"/>
-      </rule>
-      <rule pattern="[\\#&lt;&gt;]">
-        <token type="LiteralStringOther"/>
-      </rule>
-      <rule pattern="[^\\#&lt;&gt;]+">
-        <token type="LiteralStringOther"/>
-      </rule>
-    </state>
-    <state name="pa-string">
-      <rule pattern="\\[\\()]">
-        <token type="LiteralStringOther"/>
-      </rule>
-      <rule pattern="\(">
-        <token type="LiteralStringOther"/>
-        <push/>
-      </rule>
-      <rule pattern="\)">
-        <token type="LiteralStringOther"/>
-        <pop depth="1"/>
-      </rule>
-      <rule pattern="[\\#()]">
-        <token type="LiteralStringOther"/>
-      </rule>
-      <rule pattern="[^\\#()]+">
-        <token type="LiteralStringOther"/>
-      </rule>
-    </state>
-    <state name="strings">
-      <rule pattern="\:@{0,2}[a-zA-Z_]\w*[!?]?">
-        <token type="LiteralStringSymbol"/>
-      </rule>
-      <rule pattern="\:@{0,2}(\[\]\?|&lt;=&gt;|===|\[\]=|&gt;&gt;|&amp;&amp;|\*\*|\[\]|\|\||&gt;=|=~|!~|&lt;&lt;|&lt;=|!=|==|&lt;|/|=|-|\+|&gt;|\*|&amp;|%|\^|!|\||~)">
-        <token type="LiteralStringSymbol"/>
-      </rule>
-      <rule pattern=":&#39;(\\\\|\\&#39;|[^&#39;])*&#39;">
-        <token type="LiteralStringSymbol"/>
-      </rule>
-      <rule pattern="&#39;(\\\\|\\&#39;|[^&#39;]|\\[^&#39;\\]+)&#39;">
-        <token type="LiteralStringChar"/>
-      </rule>
-      <rule pattern=":&#34;">
-        <token type="LiteralStringSymbol"/>
-        <push state="simple-sym"/>
-      </rule>
-      <rule pattern="([a-zA-Z_]\w*)(:)(?!:)">
-        <bygroups>
-          <token type="LiteralStringSymbol"/>
-          <token type="Punctuation"/>
-        </bygroups>
-      </rule>
-      <rule pattern="&#34;">
-        <token type="LiteralStringDouble"/>
-        <push state="simple-string"/>
-      </rule>
-      <rule pattern="(?&lt;!\.)`">
-        <token type="LiteralStringBacktick"/>
-        <push state="simple-backtick"/>
-      </rule>
-      <rule pattern="%\{">
-        <token type="LiteralStringOther"/>
-        <push state="cb-intp-string"/>
-      </rule>
-      <rule pattern="%[wi]\{">
-        <token type="LiteralStringOther"/>
-        <push state="cb-string"/>
-      </rule>
-      <rule pattern="%r\{">
-        <token type="LiteralStringRegex"/>
-        <push state="cb-regex"/>
-      </rule>
-      <rule pattern="%\[">
-        <token type="LiteralStringOther"/>
-        <push state="sb-intp-string"/>
-      </rule>
-      <rule pattern="%[wi]\[">
-        <token type="LiteralStringOther"/>
-        <push state="sb-string"/>
-      </rule>
-      <rule pattern="%r\[">
-        <token type="LiteralStringRegex"/>
-        <push state="sb-regex"/>
-      </rule>
-      <rule pattern="%\(">
-        <token type="LiteralStringOther"/>
-        <push state="pa-intp-string"/>
-      </rule>
-      <rule pattern="%[wi]\(">
-        <token type="LiteralStringOther"/>
-        <push state="pa-string"/>
-      </rule>
-      <rule pattern="%r\(">
-        <token type="LiteralStringRegex"/>
-        <push state="pa-regex"/>
-      </rule>
-      <rule pattern="%&lt;">
-        <token type="LiteralStringOther"/>
-        <push state="ab-intp-string"/>
-      </rule>
-      <rule pattern="%[wi]&lt;">
-        <token type="LiteralStringOther"/>
-        <push state="ab-string"/>
-      </rule>
-      <rule pattern="%r&lt;">
-        <token type="LiteralStringRegex"/>
-        <push state="ab-regex"/>
-      </rule>
-      <rule pattern="(%r([\W_]))((?:\\\2|(?!\2).)*)(\2[imsx]*)">
-        <token type="LiteralString"/>
-      </rule>
-      <rule pattern="(%[wi]([\W_]))((?:\\\2|(?!\2).)*)(\2)">
-        <token type="LiteralString"/>
-      </rule>
-      <rule pattern="(?&lt;=[-+/*%=&lt;&gt;&amp;!^|~,(])(\s*)(%([\t ])(?:(?:\\\3|(?!\3).)*)\3)">
-        <bygroups>
-          <token type="Text"/>
-          <token type="LiteralStringOther"/>
-          <token type="None"/>
-        </bygroups>
-      </rule>
-      <rule pattern="^(\s*)(%([\t ])(?:(?:\\\3|(?!\3).)*)\3)">
-        <bygroups>
-          <token type="Text"/>
-          <token type="LiteralStringOther"/>
-          <token type="None"/>
-        </bygroups>
-      </rule>
-      <rule pattern="(%([\[{(&lt;]))((?:\\\2|(?!\2).)*)(\2)">
-        <token type="LiteralString"/>
-      </rule>
-    </state>
-    <state name="sb-string">
-      <rule pattern="\\[\\\[\]]">
-        <token type="LiteralStringOther"/>
-      </rule>
-      <rule pattern="\[">
-        <token type="LiteralStringOther"/>
-        <push/>
-      </rule>
-      <rule pattern="\]">
-        <token type="LiteralStringOther"/>
-        <pop depth="1"/>
-      </rule>
-      <rule pattern="[\\#\[\]]">
-        <token type="LiteralStringOther"/>
-      </rule>
-      <rule pattern="[^\\#\[\]]+">
-        <token type="LiteralStringOther"/>
-      </rule>
-    </state>
-    <state name="funcname">
-      <rule pattern="(?:([a-zA-Z_]\w*)(\.))?([a-zA-Z_]\w*[!?]?|\*\*?|[-+]@?|[/%&amp;|^`~]|\[\]=?|&lt;&lt;|&gt;&gt;|&lt;=?&gt;|&gt;=?|===?)">
-        <bygroups>
-          <token type="NameClass"/>
-          <token type="Operator"/>
-          <token type="NameFunction"/>
-        </bygroups>
-        <pop depth="1"/>
-      </rule>
-      <rule>
-        <pop depth="1"/>
-      </rule>
-    </state>
-    <state name="simple-sym">
-      <rule>
-        <include state="string-escaped"/>
-      </rule>
-      <rule pattern="[^\\&#34;#]+">
-        <token type="LiteralStringSymbol"/>
-      </rule>
-      <rule pattern="[\\#]">
-        <token type="LiteralStringSymbol"/>
-      </rule>
-      <rule pattern="&#34;">
-        <token type="LiteralStringSymbol"/>
-        <pop depth="1"/>
-      </rule>
-    </state>
-  </rules>
-</lexer>
--- a/lexers/csv.xml
+++ b/lexers/csv.xml
@@ -0,0 +1,53 @@
+<!--
+Lexer for RFC-4180 compliant CSV subject to the following additions:
+- UTF-8 encoding is accepted (the RFC requires 7-bit ASCII)
+- The line terminator character can be LF or CRLF (the RFC allows CRLF only)
+
+Link to the RFC-4180 specification: https://tools.ietf.org/html/rfc4180
+
+Additions inspired by:
+https://github.com/frictionlessdata/datapackage/issues/204#issuecomment-193242077
+
+Future improvements:
+- Identify non-quoted numbers as LiteralNumber
+- Identify y as an error in "x"y. Currently it's identified as another string
+  literal.
+-->
+
+<lexer>
+    <config>
+        <name>CSV</name>
+        <alias>csv</alias>
+        <filename>*.csv</filename>
+        <mime_type>text/csv</mime_type>
+    </config>
+    <rules>
+        <state name="root">
+            <rule pattern="\r?\n">
+                <token type="Punctuation" />
+            </rule>
+            <rule pattern=",">
+                <token type="Punctuation" />
+            </rule>
+            <rule pattern="&quot;">
+                <token type="LiteralStringDouble" />
+                <push state="escaped" />
+            </rule>
+            <rule pattern="[^\r\n,]+">
+                <token type="LiteralString" />
+            </rule>
+        </state>
+        <state name="escaped">
+            <rule pattern="&quot;&quot;">
+                <token type="LiteralStringEscape"/>
+            </rule>
+            <rule pattern="&quot;">
+                <token type="LiteralStringDouble" />
+                <pop depth="1"/>
+            </rule>
+            <rule pattern="[^&quot;]+">
+                <token type="LiteralStringDouble" />
+            </rule>
+        </state>
+    </rules>
+</lexer>
--- a/lexers/haskell.xml
+++ b/lexers/haskell.xml
@@ -95,19 +95,22 @@
      <rule pattern="[:!#$%&amp;*+.\\/&lt;=&gt;?@^|~-]+">
        <token type="Operator"/>
      </rule>
-      <rule pattern="\d+[eE][+-]?\d+">
+      <rule pattern="\d+_*[eE][+-]?\d+">
        <token type="LiteralNumberFloat"/>
      </rule>
-      <rule pattern="\d+\.\d+([eE][+-]?\d+)?">
+      <rule pattern="\d+(_+[\d]+)*\.\d+(_+[\d]+)*([eE][+-]?\d+)?">
        <token type="LiteralNumberFloat"/>
      </rule>
-      <rule pattern="0[oO][0-7]+">
+      <rule pattern="0[oO](_*[0-7])+">
        <token type="LiteralNumberOct"/>
      </rule>
-      <rule pattern="0[xX][\da-fA-F]+">
+      <rule pattern="0[xX](_*[\da-fA-F])+">
        <token type="LiteralNumberHex"/>
      </rule>
-      <rule pattern="\d+">
+      <rule pattern="0[bB](_*[01])+">
+        <token type="LiteralNumberBin"/>
+      </rule>
+      <rule pattern="\d+(_*[\d])*">
        <token type="LiteralNumberInteger"/>
      </rule>
      <rule pattern="&#39;">
--- a/lexers/heuristics.yml
+++ b/lexers/heuristics.yml
@@ -0,0 +1,913 @@
+# A collection of simple regexp-based rules that can be applied to content
+# to disambiguate languages with the same file extension.
+#
+# There are two top-level keys: disambiguations and named_patterns.
+#
+# disambiguations     - a list of disambiguation rules, one for each
+#                       extension or group of extensions.
+# extensions          - an array of file extensions that this block applies to.
+# rules               - list of rules that are applied in order to the content
+#                       of a file with a matching extension. Rules are evaluated
+#                       until one of them matches. If none matches, no language
+#                       is returned.
+# language            - Language to be returned if the rule matches.
+# pattern             - Ruby-compatible regular expression that makes the rule
+#                       match. If no pattern is specified, the rule always matches.
+#                       Pattern can be a string with a single regular expression
+#                       or an array of strings that will be merged in a single
+#                       regular expression (with union).
+# and                 - An and block merges multiple rules and checks that all of
+#                       them must match.
+# negative_pattern    - Same as pattern, but checks for absence of matches.
+# named_pattern       - A pattern can be reused by specifying it in the
+#                       named_patterns section and referencing it here by its
+#                       key.
+# named_patterns      - Key-value map of reusable named patterns.
+#
+# Please keep this list alphabetized.
+#
+---
+disambiguations:
+- extensions: ['.1', '.2', '.3', '.4', '.5', '.6', '.7', '.8', '.9']
+  rules:
+  - language: man
+    and:
+    - named_pattern: mdoc-date
+    - named_pattern: mdoc-title
+    - named_pattern: mdoc-heading
+  - language: man
+    and:
+    - named_pattern: man-title
+    - named_pattern: man-heading
+  - language: Roff
+    pattern: '^\.(?:[A-Za-z]{2}(?:\s|$)|\\")'
+- extensions: ['.1in', '.1m', '.1x', '.3in', '.3m', '.3p', '.3pm', '.3qt', '.3x', '.man', '.mdoc']
+  rules:
+  - language: man
+    and:
+    - named_pattern: mdoc-date
+    - named_pattern: mdoc-title
+    - named_pattern: mdoc-heading
+  - language: man
+    and:
+    - named_pattern: man-title
+    - named_pattern: man-heading
+  - language: Roff
+- extensions: ['.al']
+  rules:
+  # AL pattern source from https://github.com/microsoft/AL/blob/master/grammar/alsyntax.tmlanguage - keyword.other.applicationobject.al
+  - language: AL
+    and:
+    - pattern: '\b(?i:(CODEUNIT|PAGE|PAGEEXTENSION|PAGECUSTOMIZATION|DOTNET|ENUM|ENUMEXTENSION|VALUE|QUERY|REPORT|TABLE|TABLEEXTENSION|XMLPORT|PROFILE|CONTROLADDIN|REPORTEXTENSION|INTERFACE|PERMISSIONSET|PERMISSIONSETEXTENSION|ENTITLEMENT))\b'
+  # Open-ended fallback to Perl AutoLoader
+  - language: Perl
+- extensions: ['.app']
+  rules:
+  - language: Erlang
+    pattern: '^\{\s*(?:application|''application'')\s*,\s*(?:[a-z]+[\w@]*|''[^'']+'')\s*,\s*\[(?:.|[\r\n])*\]\s*\}\.[ \t]*$'
+- extensions: ['.as']
+  rules:
+  - language: ActionScript
+    pattern: '^\s*(?:package(?:\s+[\w.]+)?\s+(?:\{|$)|import\s+[\w.*]+\s*;|(?=.*?(?:intrinsic|extends))(intrinsic\s+)?class\s+[\w<>.]+(?:\s+extends\s+[\w<>.]+)?|(?:(?:public|protected|private|static)\s+)*(?:(?:var|const|local)\s+\w+\s*:\s*[\w<>.]+(?:\s*=.*)?\s*;|function\s+\w+\s*\((?:\s*\w+\s*:\s*[\w<>.]+\s*(,\s*\w+\s*:\s*[\w<>.]+\s*)*)?\)))'
+- extensions: ['.asc']
+  rules:
+  - language: Public Key
+    pattern: '^(----[- ]BEGIN|ssh-(rsa|dss)) '
+  - language: AsciiDoc
+    pattern: '^[=-]+\s|\{\{[A-Za-z]'
+  - language: AGS Script
+    pattern: '^(\/\/.+|((import|export)\s+)?(function|int|float|char)\s+((room|repeatedly|on|game)_)?([A-Za-z]+[A-Za-z_0-9]+)\s*[;\(])'
+- extensions: ['.asm']
+  rules:
+  - language: Motorola 68K Assembly
+    named_pattern: m68k
+- extensions: ['.asy']
+  rules:
+  - language: LTspice Symbol
+    pattern: '^SymbolType[ \t]'
+  - language: Asymptote
+- extensions: ['.bas']
+  rules:
+  - language: FreeBasic
+    pattern: '^[ \t]*#(?i)(?:define|endif|endmacro|ifn?def|include|lang|macro)(?:$|\s)'
+  - language: BASIC
+    pattern: '\A\s*\d'
+  - language: VBA
+    and:
+    - named_pattern: vb-module
+    - named_pattern: vba
+  - language: Visual Basic 6.0
+    named_pattern: vb-module
+- extensions: ['.bb']
+  rules:
+  - language: BlitzBasic
+    pattern: '(<^\s*; |End Function)'
+  - language: BitBake
+    pattern: '^(# |include|require|inherit)\b'
+  - language: Clojure
+    pattern: '\((def|defn|defmacro|let)\s'
+- extensions: ['.bf']
+  rules:
+  - language: Beef
+    pattern: '(?-m)^\s*using\s+(System|Beefy)(\.(.*))?;\s*$'
+  - language: HyPhy
+    pattern:
+    - '(?-m)^\s*#include\s+".*";\s*$'
+    - '\sfprintf\s*\('
+  - language: Brainfuck
+    pattern: '(>\+>|>\+<)'
+- extensions: ['.bi']
+  rules:
+  - language: FreeBasic
+    pattern: '^[ \t]*#(?i)(?:define|endif|endmacro|ifn?def|if|include|lang|macro)(?:$|\s)'
+- extensions: ['.bs']
+  rules:
+  - language: Bikeshed
+    pattern: '^(?i:<pre\s+class)\s*=\s*(''|\"|\b)metadata\b\1[^>\r\n]*>'
+  - language: BrighterScript
+    pattern:
+    - (?i:^\s*(?=^sub\s)(?:sub\s*\w+\(.*?\))|(?::\s*sub\(.*?\))$)
+    - (?i:^\s*(end\ssub)$)
+    - (?i:^\s*(?=^function\s)(?:function\s*\w+\(.*?\)\s*as\s*\w*)|(?::\s*function\(.*?\)\s*as\s*\w*)$)
+    - (?i:^\s*(end\sfunction)$)
+  - language: Bluespec BH
+    pattern: '^package\s+[A-Za-z_][A-Za-z0-9_'']*(?:\s*\(|\s+where)'
+- extensions: ['.builds']
+  rules:
+  - language: XML
+    pattern: '^(\s*)(?i:<Project|<Import|<Property|<?xml|xmlns)'
+- extensions: ['.ch']
+  rules:
+  - language: xBase
+    pattern: '^\s*#\s*(?i:if|ifdef|ifndef|define|command|xcommand|translate|xtranslate|include|pragma|undef)\b'
+- extensions: ['.cl']
+  rules:
+  - language: Common Lisp
+    pattern: '^\s*\((?i:defun|in-package|defpackage) '
+  - language: Cool
+    pattern: '^class'
+  - language: OpenCL
+    pattern: '\/\* |\/\/ |^\}'
+- extensions: ['.cls']
+  rules:
+  - language: Visual Basic 6.0
+    and:
+    - named_pattern: vb-class
+    - pattern: '^\s*BEGIN(?:\r?\n|\r)\s*MultiUse\s*=.*(?:\r?\n|\r)\s*Persistable\s*='
+  - language: VBA
+    named_pattern: vb-class
+  - language: TeX
+    pattern: '^\s*\\(?:NeedsTeXFormat|ProvidesClass)\{'
+  - language: ObjectScript
+    pattern: '^Class\s'
+- extensions: ['.cmp']
+  rules:
+  - language: Gerber Image
+    pattern: '^[DGMT][0-9]{2}\*(?:\r?\n|\r)'
+- extensions: ['.cs']
+  rules:
+  - language: Smalltalk
+    pattern: '![\w\s]+methodsFor: '
+  - language: 'C#'
+    pattern: '^\s*(using\s+[A-Z][\s\w.]+;|namespace\s*[\w\.]+\s*(\{|;)|\/\/)'
+- extensions: ['.csc']
+  rules:
+  - language: GSC
+    named_pattern: gsc
+- extensions: ['.csl']
+  rules:
+  - language: XML
+    pattern: '(?i:^\s*(<\?xml|xmlns))'
+  - language: Kusto
+    pattern: '(^\|\s*(where|extend|project|limit|summarize))|(^\.\w+)'
+- extensions: ['.d']
+  rules:
+  - language: D
+    # see http://dlang.org/spec/grammar
+    # ModuleDeclaration | ImportDeclaration | FuncDeclaration | unittest
+    pattern: '^module\s+[\w.]*\s*;|import\s+[\w\s,.:]*;|\w+\s+\w+\s*\(.*\)(?:\(.*\))?\s*\{[^}]*\}|unittest\s*(?:\(.*\))?\s*\{[^}]*\}'
+  - language: DTrace
+    # see http://dtrace.org/guide/chp-prog.html, http://dtrace.org/guide/chp-profile.html, http://dtrace.org/guide/chp-opt.html
+    pattern: '^(\w+:\w*:\w*:\w*|BEGIN|END|provider\s+|(tick|profile)-\w+\s+\{[^}]*\}|#pragma\s+D\s+(option|attributes|depends_on)\s|#pragma\s+ident\s)'
+  - language: Makefile
+    # path/target : dependency \
+    # target : \
+    #  : dependency
+    # path/file.ext1 : some/path/../file.ext2
+    pattern: '([\/\\].*:\s+.*\s\\$|: \\$|^[ %]:|^[\w\s\/\\.]+\w+\.\w+\s*:\s+[\w\s\/\\.]+\w+\.\w+)'
+- extensions: ['.dsp']
+  rules:
+  - language: Microsoft Developer Studio Project
+    pattern: '# Microsoft Developer Studio Generated Build File'
+  - language: Faust
+    pattern: '\bprocess\s*[(=]|\b(library|import)\s*\(\s*"|\bdeclare\s+(name|version|author|copyright|license)\s+"'
+- extensions: ['.e']
+  rules:
+  - language: E
+    pattern:
+    - '^\s*(def|var)\s+(.+):='
+    - '^\s*(def|to)\s+(\w+)(\(.+\))?\s+\{'
+    - '^\s*(when)\s+(\(.+\))\s+->\s+\{'
+  - language: Eiffel
+    pattern:
+    - '^\s*\w+\s*(?:,\s*\w+)*[:]\s*\w+\s'
+    - '^\s*\w+\s*(?:\(\s*\w+[:][^)]+\))?(?:[:]\s*\w+)?(?:--.+\s+)*\s+(?:do|local)\s'
+    - '^\s*(?:across|deferred|elseif|ensure|feature|from|inherit|inspect|invariant|note|once|require|undefine|variant|when)\s*$'
+  - language: Euphoria
+    named_pattern: euphoria
+- extensions: ['.ecl']
+  rules:
+  - language: ECLiPSe
+    pattern: '^[^#]+:-'
+  - language: ECL
+    pattern: ':='
+- extensions: ['.es']
+  rules:
+  - language: Erlang
+    pattern: '^\s*(?:%%|main\s*\(.*?\)\s*->)'
+  - language: JavaScript
+    pattern: '\/\/|("|'')use strict\1|export\s+default\s|\/\*(?:.|[\r\n])*?\*\/'
+- extensions: ['.ex']
+  rules:
+  - language: Elixir
+    pattern:
+    - '^\s*@moduledoc\s'
+    - '^\s*(?:cond|import|quote|unless)\s'
+    - '^\s*def(?:exception|impl|macro|module|protocol)[(\s]'
+  - language: Euphoria
+    named_pattern: euphoria
+- extensions: ['.f']
+  rules:
+  - language: Forth
+    pattern: '^: '
+  - language: Filebench WML
+    pattern: 'flowop'
+  - language: Fortran
+    named_pattern: fortran
+- extensions: ['.for']
+  rules:
+  - language: Forth
+    pattern: '^: '
+  - language: Fortran
+    named_pattern: fortran
+- extensions: ['.fr']
+  rules:
+  - language: Forth
+    pattern: '^(: |also |new-device|previous )'
+  - language: Frege
+    pattern: '^\s*(import|module|package|data|type) '
+  - language: Text
+- extensions: ['.frm']
+  rules:
+  - language: VBA
+    and:
+    - named_pattern: vb-form
+    - pattern: '^\s*Begin\s+\{[0-9A-Z\-]*\}\s?'
+  - language: Visual Basic 6.0
+    and:
+    - named_pattern: vb-form
+    - pattern: '^\s*Begin\s+VB\.Form\s+'
+- extensions: ['.fs']
+  rules:
+  - language: Forth
+    pattern: '^(: |new-device)'
+  - language: 'F#'
+    pattern: '^\s*(#light|import|let|module|namespace|open|type)'
+  - language: GLSL
+    pattern: '^\s*(#version|precision|uniform|varying|vec[234])'
+  - language: Filterscript
+    pattern: '#include|#pragma\s+(rs|version)|__attribute__'
+- extensions: ['.ftl']
+  rules:
+  - language: FreeMarker
+    pattern: '^(?:<|[a-zA-Z-][a-zA-Z0-9_-]+[ \t]+\w)|\$\{\w+[^\r\n]*?\}|^[ \t]*(?:<#--.*?-->|<#([a-z]+)(?=\s|>)[^>]*>.*?</#\1>|\[#--.*?--\]|\[#([a-z]+)(?=\s|\])[^\]]*\].*?\[#\2\])'
+  - language: Fluent
+    pattern: '^-?[a-zA-Z][a-zA-Z0-9_-]* *=|\{\$-?[a-zA-Z][-\w]*(?:\.[a-zA-Z][-\w]*)?\}'
+- extensions: ['.g']
+  rules:
+  - language: GAP
+    pattern: '\s*(Declare|BindGlobal|KeyDependentOperation|Install(Method|GlobalFunction)|SetPackageInfo)'
+  - language: G-code
+    pattern: '^[MG][0-9]+(?:\r?\n|\r)'
+- extensions: ['.gd']
+  rules:
+  - language: GAP
+    pattern: '\s*(Declare|BindGlobal|KeyDependentOperation)'
+  - language: GDScript
+    pattern: '\s*(extends|var|const|enum|func|class|signal|tool|yield|assert|onready)'
+- extensions: ['.gml']
+  rules:
+  - language: XML
+    pattern: '(?i:^\s*(<\?xml|xmlns))'
+  - language: Graph Modeling Language
+    pattern: '(?i:^\s*(graph|node)\s+\[$)'
+  - language: Gerber Image
+    pattern: '^[DGMT][0-9]{2}\*$'
+  - language: Game Maker Language
+- extensions: ['.gs']
+  rules:
+  - language: GLSL
+    pattern: '^#version\s+[0-9]+\b'
+  - language: Gosu
+    pattern: '^uses (java|gw)\.'
+  - language: Genie
+    pattern: '^\[indent=[0-9]+\]'
+- extensions: ['.gsc']
+  rules:
+  - language: GSC
+    named_pattern: gsc
+- extensions: ['.gsh']
+  rules:
+  - language: GSC
+    named_pattern: gsc
+- extensions: ['.gts']
+  rules:
+  - language: Gerber Image
+    pattern: '^G0.'
+  - language: Glimmer TS
+    negative_pattern: '^G0.'
+- extensions: ['.h']
+  rules:
+  - language: Objective-C
+    named_pattern: objectivec
+  - language: C++
+    named_pattern: cpp
+  - language: C
+- extensions: ['.hh']
+  rules:
+  - language: Hack
+    pattern: '<\?hh'
+- extensions: ['.html']
+  rules:
+  - language: Ecmarkup
+    pattern: '<emu-(?:alg|annex|biblio|clause|eqn|example|figure|gann|gmod|gprose|grammar|intro|not-ref|note|nt|prodref|production|rhs|table|t|xref)(?:$|\s|>)'
+  - language: HTML
+- extensions: ['.i']
+  rules:
+  - language: Motorola 68K Assembly
+    named_pattern: m68k
+  - language: SWIG
+    pattern: '^[ \t]*%[a-z_]+\b|^%[{}]$'
+- extensions: ['.ice']
+  rules:
+  - language: JSON
+    pattern: '\A\s*[{\[]'
+  - language: Slice
+- extensions: ['.inc']
+  rules:
+  - language: Motorola 68K Assembly
+    named_pattern: m68k
+  - language: PHP
+    pattern: '^<\?(?:php)?'
+  - language: SourcePawn
+    pattern:
+    - '^public\s+(?:SharedPlugin(?:\s+|:)__pl_\w+\s*=(?:\s*\{)?|(?:void\s+)?__pl_\w+_SetNTVOptional\(\)(?:\s*\{)?)'
+    - '^methodmap\s+\w+\s+<\s+\w+'
+    - '^\s*MarkNativeAsOptional\s*\('
+  - language: NASL
+    pattern:
+    - '^\s*include\s*\(\s*(?:"|'')[\\/\w\-\.:\s]+\.(?:nasl|inc)\s*(?:"|'')\s*\)\s*;'
+    - '^\s*(?:global|local)_var\s+(?:\w+(?:\s*=\s*[\w\-"'']+)?\s*)(?:,\s*\w+(?:\s*=\s*[\w\-"'']+)?\s*)*+\s*;'
+    - '^\s*namespace\s+\w+\s*\{'
+    - '^\s*object\s+\w+\s*(?:extends\s+\w+(?:::\w+)?)?\s*\{'
+    - '^\s*(?:public\s+|private\s+|\s*)function\s+\w+\s*\([\w\s,]*\)\s*\{'
+  - language: POV-Ray SDL
+    pattern: '^\s*#(declare|local|macro|while)\s'
+  - language: Pascal
+    pattern:
+    - '(?i:^\s*\{\$(?:mode|ifdef|undef|define)[ ]+[a-z0-9_]+\})'
+    - '^\s*end[.;]\s*$'
+  - language: BitBake
+    pattern: '^inherit(\s+[\w.-]+)+\s*$'
+- extensions: ['.json']
+  rules:
+  - language: OASv2-json
+    pattern: '"swagger":\s?"2.[0-9.]+"'
+  - language: OASv3-json
+    pattern: '"openapi":\s?"3.[0-9.]+"'
+  - language: JSON
+- extensions: ['.l']
+  rules:
+  - language: Common Lisp
+    pattern: '\(def(un|macro)\s'
+  - language: Lex
+    pattern: '^(%[%{}]xs|<.*>)'
+  - language: Roff
+    pattern: '^\.[A-Za-z]{2}(\s|$)'
+  - language: PicoLisp
+    pattern: '^\((de|class|rel|code|data|must)\s'
+- extensions: ['.lean']
+  rules:
+  - language: Lean
+    pattern: '^import [a-z]'
+  - language: Lean 4
+    pattern: '^import [A-Z]'
+- extensions: ['.ls']
+  rules:
+  - language: LoomScript
+    pattern: '^\s*package\s*[\w\.\/\*\s]*\s*\{'
+  - language: LiveScript
+- extensions: ['.lsp', '.lisp']
+  rules:
+  - language: Common Lisp
+    pattern: '^\s*\((?i:defun|in-package|defpackage) '
+  - language: NewLisp
+    pattern: '^\s*\(define '
+- extensions: ['.m']
+  rules:
+  - language: Objective-C
+    named_pattern: objectivec
+  - language: Mercury
+    pattern: ':- module'
+  - language: MUF
+    pattern: '^: '
+  - language: M
+    pattern: '^\s*;'
+  - language: Mathematica
+    and:
+      - pattern: '\(\*'
+      - pattern: '\*\)$'
+  - language: MATLAB
+    pattern: '^\s*%'
+  - language: Limbo
+    pattern: '^\w+\s*:\s*module\s*\{'
+- extensions: ['.m4']
+  rules:
+  - language: M4Sugar
+    pattern:
+    - 'AC_DEFUN|AC_PREREQ|AC_INIT'
+    - '^_?m4_'
+  - language: 'M4'
+- extensions: ['.mask']
+  rules:
+  - language: Unity3D Asset
+    pattern: 'tag:unity3d.com'
+- extensions: ['.mc']
+  rules:
+  - language: Win32 Message File
+    pattern: '(?i)^[ \t]*(?>\/\*\s*)?MessageId=|^\.$'
+  - language: M4
+    pattern: '^dnl|^divert\((?:-?\d+)?\)|^\w+\(`[^\r\n]*?''[),]'
+  - language: Monkey C
+    pattern: '\b(?:using|module|function|class|var)\s+\w'
+- extensions: ['.md']
+  rules:
+  - language: Markdown
+    pattern:
+    - '(^[-A-Za-z0-9=#!\*\[|>])|<\/'
+    - '\A\z'
+  - language: GCC Machine Description
+    pattern: '^(;;|\(define_)'
+  - language: Markdown
+- extensions: ['.ml']
+  rules:
+  - language: OCaml
+    pattern: '(^\s*module)|let rec |match\s+(\S+\s)+with'
+  - language: Standard ML
+    pattern: '=> |case\s+(\S+\s)+of'
+- extensions: ['.mod']
+  rules:
+  - language: XML
+    pattern: '<!ENTITY '
+  - language: NMODL
+    pattern: '\b(NEURON|INITIAL|UNITS)\b'
+  - language: Modula-2
+    pattern: '^\s*(?i:MODULE|END) [\w\.]+;'
+  - language: [Linux Kernel Module, AMPL]
+- extensions: ['.mojo']
+  rules:
+  - language: Mojo
+    pattern: '^\s*(alias|def|from|fn|import|struct|trait)\s'
+  - language: XML
+    pattern: '^\s*<\?xml'
+- extensions: ['.ms']
+  rules:
+  - language: Roff
+    pattern: '^[.''][A-Za-z]{2}(\s|$)'
+  - language: Unix Assembly
+    and:
+      - negative_pattern: '/\*'
+      - pattern: '^\s*\.(?:include\s|globa?l\s|[A-Za-z][_A-Za-z0-9]*:)'
+  - language: MAXScript
+- extensions: ['.n']
+  rules:
+  - language: Roff
+    pattern: '^[.'']'
+  - language: Nemerle
+    pattern: '^(module|namespace|using)\s'
+- extensions: ['.ncl']
+  rules:
+  - language: XML
+    pattern: '^\s*<\?xml\s+version'
+  - language: Gerber Image
+    pattern: '^[DGMT][0-9]{2}\*(?:\r?\n|\r)'
+  - language: Text
+    pattern: 'THE_TITLE'
+- extensions: ['.nl']
+  rules:
+  - language: NL
+    pattern: '^(b|g)[0-9]+ '
+  - language: NewLisp
+- extensions: ['.nu']
+  rules:
+  - language: Nushell
+    pattern: '^\s*(import|export|module|def|let|let-env) '
+  - language: Nu
+- extensions: ['.odin']
+  rules:
+  - language: Object Data Instance Notation
+    pattern: '(?:^|<)\s*[A-Za-z0-9_]+\s*=\s*<'
+  - language: Odin
+    pattern: 'package\s+\w+|\b(?:im|ex)port\s*"[\w:./]+"|\w+\s*::\s*(?:proc|struct)\s*\(|^\s*//\s'
+- extensions: ['.p']
+  rules:
+  - language: Gnuplot
+    pattern:
+    - '^s?plot\b'
+    - '^set\s+(term|terminal|out|output|[xy]tics|[xy]label|[xy]range|style)\b'
+  - language: OpenEdge ABL
+- extensions: ['.php']
+  rules:
+  - language: Hack
+    pattern: '<\?hh'
+  - language: PHP
+    pattern: '<\?[^h]'
+- extensions: ['.pkl']
+  rules:
+    - language: Pkl
+      pattern:
+      - '^\s*(module|import|amends|extends|local|const|fixed|abstract|open|class|typealias|@\w+)\b'
+      - '^\s*[a-zA-Z0-9_$]+\s*(=|{|:)|^\s*`[^`]+`\s*(=|{|:)|for\s*\(|when\s*\('
+    - language: Pickle
+- extensions: ['.pl']
+  rules:
+  - language: Prolog
+    pattern: '^[^#]*:-'
+  - language: Perl
+    and:
+      - negative_pattern: '^\s*use\s+v6\b'
+      - named_pattern: perl
+  - language: Raku
+    named_pattern: raku
+- extensions: ['.plist']
+  rules:
+  - language: XML Property List
+    pattern: '^\s*(?:<\?xml\s|<!DOCTYPE\s+plist|<plist(?:\s+version\s*=\s*(["''])\d+(?:\.\d+)?\1)?\s*>\s*$)'
+  - language: OpenStep Property List
+- extensions: ['.plt']
+  rules:
+  - language: Prolog
+    pattern: '^\s*:-'
+- extensions: ['.pm']
+  rules:
+  - language: Perl
+    and:
+      - negative_pattern: '^\s*use\s+v6\b'
+      - named_pattern: perl
+  - language: Raku
+    named_pattern: raku
+  - language: X PixMap
+    pattern: '^\s*\/\* XPM \*\/'
+- extensions: ['.pod']
+  rules:
+  - language: Pod 6
+    pattern: '^[\s&&[^\r\n]]*=(comment|begin pod|begin para|item\d+)'
+  - language: Pod
+- extensions: ['.pp']
+  rules:
+  - language: Pascal
+    pattern: '^\s*end[.;]'
+  - language: Puppet
+    pattern: '^\s+\w+\s+=>\s'
+- extensions: ['.pro']
+  rules:
+  - language: Proguard
+    pattern: '^-(include\b.*\.pro$|keep\b|keepclassmembers\b|keepattributes\b)'
+  - language: Prolog
+    pattern: '^[^\[#]+:-'
+  - language: INI
+    pattern: 'last_client='
+  - language: QMake
+    and:
+    - pattern: HEADERS
+    - pattern: SOURCES
+  - language: IDL
+    pattern: '^\s*(?i:function|pro|compile_opt) \w[ \w,:]*$'
+- extensions: ['.properties']
+  rules:
+  - language: INI
+    and:
+    - named_pattern: key_equals_value
+    - pattern: '^[;\[]'
+  - language: Java Properties
+    and:
+    - named_pattern: key_equals_value
+    - pattern: '^[#!]'
+  - language: INI
+    named_pattern: key_equals_value
+  - language: Java Properties
+    pattern: '^[^#!][^:]*:'
+- extensions: ['.q']
+  rules:
+  - language: q
+    pattern: '((?i:[A-Z.][\w.]*:\{)|^\\(cd?|d|l|p|ts?) )'
+  - language: HiveQL
+    pattern: '(?i:SELECT\s+[\w*,]+\s+FROM|(CREATE|ALTER|DROP)\s(DATABASE|SCHEMA|TABLE))'
+- extensions: ['.qs']
+  rules:
+  - language: Q#
+    pattern: '^((\/{2,3})?\s*(namespace|operation)\b)'
+  - language: Qt Script
+    pattern: '(\w+\.prototype\.\w+|===|\bvar\b)'
+- extensions: ['.r']
+  rules:
+  - language: Rebol
+    pattern: '(?i:\bRebol\b)'
+  - language: Rez
+    pattern: '(#include\s+["<](Types\.r|Carbon\/Carbon\.r)[">])|((resource|data|type)\s+''[A-Za-z0-9]{4}''\s+((\(.*\)\s+){0,1}){)'
+  - language: R
+    pattern: '<-|^\s*#'
+- extensions: ['.re']
+  rules:
+  - language: Reason
+    pattern:
+    - '^\s*module\s+type\s'
+    - '^\s*(?:include|open)\s+\w+\s*;\s*$'
+    - '^\s*let\s+(?:module\s\w+\s*=\s*\{|\w+:\s+.*=.*;\s*$)'
+  - language: C++
+    pattern:
+    - '^\s*#(?:(?:if|ifdef|define|pragma)\s+\w|\s*include\s+<[^>]+>)'
+    - '^\s*template\s*<'
+- extensions: ['.res']
+  rules:
+  - language: ReScript
+    pattern:
+    - '^\s*(let|module|type)\s+\w*\s+=\s+'
+    - '^\s*(?:include|open)\s+\w+\s*$'
+- extensions: ['.rno']
+  rules:
+  - language: RUNOFF
+    pattern: '(?i:^\.!|^\f|\f$|^\.end lit(?:eral)?\b|^\.[a-zA-Z].*?;\.[a-zA-Z](?:[; \t])|\^\*[^\s*][^*]*\\\*(?=$|\s)|^\.c;[ \t]*\w+)'
+  - language: Roff
+    pattern: '^\.\\" '
+- extensions: ['.rpy']
+  rules:
+  - language: Python
+    pattern: '^(import|from|class|def)\s'
+  - language: "Ren'Py"
+- extensions: ['.rs']
+  rules:
+  - language: Rust
+    pattern: '^(use |fn |mod |pub |macro_rules|impl|#!?\[)'
+  - language: RenderScript
+    pattern: '#include|#pragma\s+(rs|version)|__attribute__'
+  - language: XML
+    pattern: '^\s*<\?xml'
+- extensions: ['.s']
+  rules:
+  - language: Motorola 68K Assembly
+    named_pattern: m68k
+- extensions: ['.sc']
+  rules:
+  - language: SuperCollider
+    pattern: '(?i:\^(this|super)\.|^\s*~\w+\s*=\.)'
+  - language: Scala
+    pattern: '(^\s*import (scala|java)\.|^\s*class\b)'
+- extensions: ['.scd']
+  rules:
+  - language: SuperCollider
+    pattern: '(?i:\^(this|super)\.|^\s*(~\w+\s*=\.|SynthDef\b))'
+  - language: Markdown
+    # Markdown syntax for scdoc
+    pattern: '^#+\s+(NAME|SYNOPSIS|DESCRIPTION)'
+- extensions: ['.sol']
+  rules:
+  - language: Solidity
+    pattern: '\bpragma\s+solidity\b|\b(?:abstract\s+)?contract\s+(?!\d)[a-zA-Z0-9$_]+(?:\s+is\s+(?:[a-zA-Z0-9$_][^\{]*?)?)?\s*\{'
+  - language: Gerber Image
+    pattern: '^[DGMT][0-9]{2}\*(?:\r?\n|\r)'
+- extensions: ['.sql']
+  rules:
+   # Postgres
+  - language: PLpgSQL
+    pattern: '(?i:^\\i\b|AS\s+\$\$|LANGUAGE\s+''?plpgsql''?|BEGIN(\s+WORK)?\s*;)'
+  # IBM db2
+  - language: SQLPL
+    pattern: '(?i:ALTER\s+MODULE|MODE\s+DB2SQL|\bSYS(CAT|PROC)\.|ASSOCIATE\s+RESULT\s+SET|\bEND!\s*$)'
+  # Oracle
+  - language: PLSQL
+    pattern: '(?i:\$\$PLSQL_|XMLTYPE|systimestamp|\.nextval|CONNECT\s+BY|AUTHID\s+(DEFINER|CURRENT_USER)|constructor\W+function)'
+  # T-SQL
+  - language: TSQL
+    pattern: '(?i:^\s*GO\b|BEGIN(\s+TRY|\s+CATCH)|OUTPUT\s+INSERTED|DECLARE\s+@|\[dbo\])'
+  - language: SQL
+- extensions: ['.srt']
+  rules:
+  - language: SubRip Text
+    pattern: '^(\d{2}:\d{2}:\d{2},\d{3})\s*(-->)\s*(\d{2}:\d{2}:\d{2},\d{3})$'
+- extensions: ['.st']
+  rules:
+  - language: StringTemplate
+    pattern: '\$\w+[($]|(.)!\s*.+?\s*!\1|<!\s*.+?\s*!>|\[!\s*.+?\s*!\]|\{!\s*.+?\s*!\}'
+  - language: Smalltalk
+    pattern: '\A\s*[\[{(^"''\w#]|[a-zA-Z_]\w*\s*:=\s*[a-zA-Z_]\w*|class\s*>>\s*[a-zA-Z_]\w*|^[a-zA-Z_]\w*\s+[a-zA-Z_]\w*:|^Class\s*\{|if(?:True|False):\s*\['
+- extensions: ['.star']
+  rules:
+  - language: STAR
+    pattern: '^loop_\s*$'
+  - language: Starlark
+- extensions: ['.stl']
+  rules:
+  - language: STL
+    pattern: '\A\s*solid(?:$|\s)[\s\S]*^endsolid(?:$|\s)'
+- extensions: ['.sw']
+  rules:
+  - language: Sway
+    pattern: '^\s*(?:(?:abi|dep|fn|impl|mod|pub|trait)\s|#\[)'
+  - language: XML
+    pattern: '^\s*<\?xml\s+version'
+- extensions: ['.t']
+  rules:
+  - language: Perl
+    and:
+      - negative_pattern: '^\s*use\s+v6\b'
+      - named_pattern: perl
+  - language: Raku
+    pattern: '^\s*(?:use\s+v6\b|\bmodule\b|\bmy\s+class\b)'
+  - language: Turing
+    pattern: '^\s*%[ \t]+|^\s*var\s+\w+(\s*:\s*\w+)?\s*:=\s*\w+'
+- extensions: ['.tag']
+  rules:
+  - language: Java Server Pages
+    pattern: '<%[@!=\s]?\s*(taglib|tag|include|attribute|variable)\s'
+- extensions: ['.tlv']
+  rules:
+  - language: TL-Verilog
+    pattern: '^\\.{0,10}TLV_version'
+- extensions: ['.toc']
+  rules:
+  - language: World of Warcraft Addon Data
+    pattern: '^## |@no-lib-strip@'
+  - language: TeX
+    pattern: '^\\(contentsline|defcounter|beamer|boolfalse)'
+- extensions: ['.ts']
+  rules:
+  - language: XML
+    pattern: '<TS\b'
+  - language: TypeScript
+- extensions: ['.tst']
+  rules:
+  - language: GAP
+    pattern: 'gap> '
+  # Heads up - we don't usually write heuristics like this (with no regex match)
+  - language: Scilab
+- extensions: ['.tsx']
+  rules:
+  - language: TSX
+    pattern: '^\s*(import.+(from\s+|require\()[''"]react|\/\/\/\s*<reference\s)'
+  - language: XML
+    pattern: '(?i:^\s*<\?xml\s+version)'
+- extensions: ['.txt']
+  rules:
+    # The following RegExp is simply a collapsed and simplified form of the
+    # VIM_MODELINE pattern in `./lib/linguist/strategy/modeline.rb`.
+  - language: Vim Help File
+    pattern: '(?:(?:^|[ \t])(?:vi|Vi(?=m))(?:m[<=>]?[0-9]+|m)?|[ \t]ex)(?=:(?=[ \t]*set?[ \t][^\r\n:]+:)|:(?![ \t]*set?[ \t]))(?:(?:[ \t]*:[ \t]*|[ \t])\w*(?:[ \t]*=(?:[^\\\s]|\\.)*)?)*[ \t:](?:filetype|ft|syntax)[ \t]*=(help)(?=$|\s|:)'
+  - language: Adblock Filter List
+    pattern: |-
+      (?x)\A
+      \[
+      (?<version>
+        (?:
+          [Aa]d[Bb]lock
+          (?:[ \t][Pp]lus)?
+          |
+          u[Bb]lock
+          (?:[ \t][Oo]rigin)?
+          |
+          [Aa]d[Gg]uard
+        )
+        (?:[ \t] \d+(?:\.\d+)*+)?
+      )
+      (?:
+        [ \t]?;[ \t]?
+        \g<version>
+      )*+
+      \]
+    # HACK: This is a contrived use of heuristics needed to address
+    # an unusual edge-case. See https://git.io/JULye for discussion.
+  - language: Text
+- extensions: ['.typ']
+  rules:
+  - language: Typst
+    pattern: '^#(import|show|let|set)'
+  - language: XML
+- extensions: ['.url']
+  rules:
+  - language: INI
+    pattern: '^\[InternetShortcut\](?:\r?\n|\r)(?>[^\s\[][^\r\n]*(?:\r?\n|\r))*URL='
+- extensions: ['.v']
+  rules:
+  - language: Coq
+    pattern: '(?:^|\s)(?:Proof|Qed)\.(?:$|\s)|(?:^|\s)Require[ \t]+(Import|Export)\s'
+  - language: Verilog
+    pattern: '^[ \t]*module\s+[^\s()]+\s+\#?\(|^[ \t]*`(?:define|ifdef|ifndef|include|timescale)|^[ \t]*always[ \t]+@|^[ \t]*initial[ \t]+(begin|@)'
+  - language: V
+    pattern: '\$(?:if|else)[ \t]|^[ \t]*fn\s+[^\s()]+\(.*?\).*?\{|^[ \t]*for\s*\{'
+- extensions: ['.vba']
+  rules:
+  - language: Vim Script
+    pattern: '^UseVimball'
+  - language: VBA
+- extensions: ['.w']
+  rules:
+  - language: OpenEdge ABL
+    pattern: '&ANALYZE-SUSPEND _UIB-CODE-BLOCK _CUSTOM _DEFINITIONS'
+  - language: CWeb
+    pattern: '^@(<|\w+\.)'
+- extensions: ['.x']
+  rules:
+  - language: DirectX 3D File
+    pattern:  '^xof 030(2|3)(?:txt|bin|tzip|bzip)\b'
+  - language: RPC
+    pattern: '\b(program|version)\s+\w+\s*\{|\bunion\s+\w+\s+switch\s*\('
+  - language: Logos
+    pattern: '^%(end|ctor|hook|group)\b'
+  - language: Linker Script
+    pattern: 'OUTPUT_ARCH\(|OUTPUT_FORMAT\(|SECTIONS'
+- extensions: ['.yaml', '.yml']
+  rules:
+  - language: MiniYAML
+    pattern: '^\t+.*?[^\s:].*?:'
+    negative_pattern: '---'
+  - language: OASv2-yaml
+    pattern: 'swagger:\s?''?"?2.[0-9.]+''?"?'
+  - language: OASv3-yaml
+    pattern: 'openapi:\s?''?"?3.[0-9.]+''?"?'
+  - language: YAML
+- extensions: ['.yy']
+  rules:
+  - language: JSON
+    pattern: '\"modelName\"\:\s*\"GM'
+  - language: Yacc
+named_patterns:
+  cpp:
+  - '^\s*#\s*include <(cstdint|string|vector|map|list|array|bitset|queue|stack|forward_list|unordered_map|unordered_set|(i|o|io)stream)>'
+  - '^\s*template\s*<'
+  - '^[ \t]*(try|constexpr)'
+  - '^[ \t]*catch\s*\('
+  - '^[ \t]*(class|(using[ \t]+)?namespace)\s+\w+'
+  - '^[ \t]*(private|public|protected):$'
+  - '__has_cpp_attribute|__cplusplus >'
+  - 'std::\w+'
+  euphoria:
+  - '^\s*namespace\s'
+  - '^\s*(?:public\s+)?include\s'
+  - '^\s*(?:(?:public|export|global)\s+)?(?:atom|constant|enum|function|integer|object|procedure|sequence|type)\s'
+  fortran: '^(?i:[c*][^abd-z]|      (subroutine|program|end|data)\s|\s*!)'
+  gsc:
+  - '^\s*#\s*(?:using|insert|include|define|namespace)[ \t]+\w'
+  - '^\s*(?>(?:autoexec|private)\s+){0,2}function\s+(?>(?:autoexec|private)\s+){0,2}\w+\s*\('
+  - '\b(?:level|self)[ \t]+thread[ \t]+(?:\[\[[ \t]*(?>\w+\.)*\w+[ \t]*\]\]|\w+)[ \t]*\([^\r\n\)]*\)[ \t]*;'
+  - '^[ \t]*#[ \t]*(?:precache|using_animtree)[ \t]*\('
+  key_equals_value: '^[^#!;][^=]*='
+  m68k:
+  - '(?im)\bmoveq(?:\.l)?\s+#(?:\$-?[0-9a-f]{1,3}|%[0-1]{1,8}|-?[0-9]{1,3}),\s*d[0-7]\b'
+  - '(?im)^\s*move(?:\.[bwl])?\s+(?:sr|usp),\s*[^\s]+'
+  - '(?im)^\s*move\.[bwl]\s+.*\b[ad]\d'
+  - '(?im)^\s*movem\.[bwl]\b'
+  - '(?im)^\s*move[mp](?:\.[wl])?\b'
+  - '(?im)^\s*btst\b'
+  - '(?im)^\s*dbra\b'
+  man-heading:  '^[.''][ \t]*SH +(?:[^"\s]+|"[^"\s]+)'
+  man-title:    '^[.''][ \t]*TH +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)'
+  mdoc-date:    '^[.''][ \t]*Dd +(?:[^"\s]+|"[^"]+")'
+  mdoc-heading: '^[.''][ \t]*Sh +(?:[^"\s]|"[^"]+")'
+  mdoc-title:   '^[.''][ \t]*Dt +(?:[^"\s]+|"[^"]+") +"?(?:[1-9]|@[^\s@]+@)'
+  objectivec: '^\s*(@(interface|class|protocol|property|end|synchronised|selector|implementation)\b|#import\s+.+\.h[">])'
+  perl:
+  - '\buse\s+(?:strict\b|v?5\b)'
+  - '^\s*use\s+(?:constant|overload)\b'
+  - '^\s*(?:\*|(?:our\s*)?@)EXPORT\s*='
+  - '^\s*package\s+[^\W\d]\w*(?:::\w+)*\s*(?:[;{]|\sv?\d)'
+  - '[\s$][^\W\d]\w*(?::\w+)*->[a-zA-Z_\[({]'
+  raku: '^\s*(?:use\s+v6\b|\bmodule\b|\b(?:my\s+)?class\b)'
+  vb-class: '^[ ]*VERSION [0-9]\.[0-9] CLASS'
+  vb-form: '^[ ]*VERSION [0-9]\.[0-9]{2}'
+  vb-module: '^[ ]*Attribute VB_Name = '
+  vba:
+  - '\b(?:VBA|[vV]ba)(?:\b|[0-9A-Z_])'
+    # VBA7 new 64-bit features
+  - '^[ ]*(?:Public|Private)? Declare PtrSafe (?:Sub|Function)\b'
+  - '^[ ]*#If Win64\b'
+  - '^[ ]*(?:Dim|Const) [0-9a-zA-Z_]*[ ]*As Long(?:Ptr|Long)\b'
+  # Top module declarations unique to VBA
+  - '^[ ]*Option (?:Private Module|Compare (?:Database|Text|Binary))\b'
+  # General VBA libraries and objects
+  - '(?: |\()(?:Access|Excel|Outlook|PowerPoint|Visio|Word|VBIDE)\.\w'
+  - '\b(?:(?:Active)?VBProjects?|VBComponents?|Application\.(?:VBE|ScreenUpdating))\b'
+  # AutoCAD, Outlook, PowerPoint and Word objects
+  - '\b(?:ThisDrawing|AcadObject|Active(?:Explorer|Inspector|Window\.Presentation|Presentation|Document)|Selection\.(?:Find|Paragraphs))\b'
+  # Excel objects
+  - '\b(?:(?:This|Active)?Workbooks?|Worksheets?|Active(?:Sheet|Chart|Cell)|WorksheetFunction)\b'
+  - '\b(?:Range\(".*|Cells\([0-9a-zA-Z_]*, (?:[0-9a-zA-Z_]*|"[a-zA-Z]{1,3}"))\)'
--- a/lexers/json.xml
+++ b/lexers/json.xml
@@ -3,6 +3,7 @@
    <name>JSON</name>
    <alias>json</alias>
    <filename>*.json</filename>
+    <filename>*.jsonc</filename>
    <filename>*.avsc</filename>
    <mime_type>application/json</mime_type>
    <dot_all>true</dot_all>
--- a/lexers/jsonnet.xml
+++ b/lexers/jsonnet.xml
@@ -0,0 +1,137 @@
+
+<lexer>
+  <config>
+    <name>Jsonnet</name>
+    <alias>jsonnet</alias>
+    <filename>*.jsonnet</filename>
+    <filename>*.libsonnet</filename>
+  </config>
+  <rules>
+    <state name="_comments">
+      <rule pattern="(//|#).*\n"><token type="CommentSingle"/></rule>
+      <rule pattern="/\*\*([^/]|/(?!\*))*\*/"><token type="LiteralStringDoc"/></rule>
+      <rule pattern="/\*([^/]|/(?!\*))*\*/"><token type="Comment"/></rule>
+    </state>
+    <state name="root">
+      <rule><include state="_comments"/></rule>
+      <rule pattern="@&#x27;.*&#x27;"><token type="LiteralString"/></rule>
+      <rule pattern="@&quot;.*&quot;"><token type="LiteralString"/></rule>
+      <rule pattern="&#x27;"><token type="LiteralString"/><push state="singlestring"/></rule>
+      <rule pattern="&quot;"><token type="LiteralString"/><push state="doublestring"/></rule>
+      <rule pattern="\|\|\|(.|\n)*\|\|\|"><token type="LiteralString"/></rule>
+      <rule pattern="[+-]?[0-9]+(.[0-9])?"><token type="LiteralNumberFloat"/></rule>
+      <rule pattern="[!$~+\-&amp;|^=&lt;&gt;*/%]"><token type="Operator"/></rule>
+      <rule pattern="\{"><token type="Punctuation"/><push state="object"/></rule>
+      <rule pattern="\["><token type="Punctuation"/><push state="array"/></rule>
+      <rule pattern="local\b"><token type="Keyword"/><push state="local_name"/></rule>
+      <rule pattern="assert\b"><token type="Keyword"/><push state="assert"/></rule>
+      <rule pattern="(assert|else|error|false|for|if|import|importstr|in|null|tailstrict|then|self|super|true)\b"><token type="Keyword"/></rule>
+      <rule pattern="\s+"><token type="TextWhitespace"/></rule>
+      <rule pattern="function(?=\()"><token type="Keyword"/><push state="function_params"/></rule>
+      <rule pattern="std\.[^\W\d]\w*(?=\()"><token type="NameBuiltin"/><push state="function_args"/></rule>
+      <rule pattern="[^\W\d]\w*(?=\()"><token type="NameFunction"/><push state="function_args"/></rule>
+      <rule pattern="[^\W\d]\w*"><token type="NameVariable"/></rule>
+      <rule pattern="[\.()]"><token type="Punctuation"/></rule>
+    </state>
+    <state name="singlestring">
+      <rule pattern="[^&#x27;\\]"><token type="LiteralString"/></rule>
+      <rule pattern="\\."><token type="LiteralStringEscape"/></rule>
+      <rule pattern="&#x27;"><token type="LiteralString"/><pop depth="1"/></rule>
+    </state>
+    <state name="doublestring">
+      <rule pattern="[^&quot;\\]"><token type="LiteralString"/></rule>
+      <rule pattern="\\."><token type="LiteralStringEscape"/></rule>
+      <rule pattern="&quot;"><token type="LiteralString"/><pop depth="1"/></rule>
+    </state>
+    <state name="array">
+      <rule pattern=","><token type="Punctuation"/></rule>
+      <rule pattern="\]"><token type="Punctuation"/><pop depth="1"/></rule>
+      <rule><include state="root"/></rule>
+    </state>
+    <state name="local_name">
+      <rule pattern="[^\W\d]\w*(?=\()"><token type="NameFunction"/><push state="function_params"/></rule>
+      <rule pattern="[^\W\d]\w*"><token type="NameVariable"/></rule>
+      <rule pattern="\s+"><token type="TextWhitespace"/></rule>
+      <rule pattern="(?==)"><token type="TextWhitespace"/><push state="#pop" state="local_value"/></rule>
+    </state>
+    <state name="local_value">
+      <rule pattern="="><token type="Operator"/></rule>
+      <rule pattern=";"><token type="Punctuation"/><pop depth="1"/></rule>
+      <rule><include state="root"/></rule>
+    </state>
+    <state name="assert">
+      <rule pattern=":"><token type="Punctuation"/></rule>
+      <rule pattern=";"><token type="Punctuation"/><pop depth="1"/></rule>
+      <rule><include state="root"/></rule>
+    </state>
+    <state name="function_params">
+      <rule pattern="[^\W\d]\w*"><token type="NameVariable"/></rule>
+      <rule pattern="\("><token type="Punctuation"/></rule>
+      <rule pattern="\)"><token type="Punctuation"/><pop depth="1"/></rule>
+      <rule pattern=","><token type="Punctuation"/></rule>
+      <rule pattern="\s+"><token type="TextWhitespace"/></rule>
+      <rule pattern="="><token type="Operator"/><push state="function_param_default"/></rule>
+    </state>
+    <state name="function_args">
+      <rule pattern="\("><token type="Punctuation"/></rule>
+      <rule pattern="\)"><token type="Punctuation"/><pop depth="1"/></rule>
+      <rule pattern=","><token type="Punctuation"/></rule>
+      <rule pattern="\s+"><token type="TextWhitespace"/></rule>
+      <rule><include state="root"/></rule>
+    </state>
+    <state name="object">
+      <rule pattern="\s+"><token type="TextWhitespace"/></rule>
+      <rule pattern="local\b"><token type="Keyword"/><push state="object_local_name"/></rule>
+      <rule pattern="assert\b"><token type="Keyword"/><push state="object_assert"/></rule>
+      <rule pattern="\["><token type="Operator"/><push state="field_name_expr"/></rule>
+      <rule pattern="(?=[^\W\d]\w*)"><token type="Text"/><push state="field_name"/></rule>
+      <rule pattern="\}"><token type="Punctuation"/><pop depth="1"/></rule>
+      <rule pattern="&quot;"><token type="NameVariable"/><push state="double_field_name"/></rule>
+      <rule pattern="&#x27;"><token type="NameVariable"/><push state="single_field_name"/></rule>
+      <rule><include state="_comments"/></rule>
+    </state>
+    <state name="field_name">
+      <rule pattern="[^\W\d]\w*(?=\()"><token type="NameFunction"/><push state="field_separator" state="function_params"/></rule>
+      <rule pattern="[^\W\d]\w*"><token type="NameVariable"/><push state="field_separator"/></rule>
+    </state>
+    <state name="double_field_name">
+      <rule pattern="([^&quot;\\]|\\.)*&quot;"><token type="NameVariable"/><push state="field_separator"/></rule>
+    </state>
+    <state name="single_field_name">
+      <rule pattern="([^&#x27;\\]|\\.)*&#x27;"><token type="NameVariable"/><push state="field_separator"/></rule>
+    </state>
+    <state name="field_name_expr">
+      <rule pattern="\]"><token type="Operator"/><push state="field_separator"/></rule>
+      <rule><include state="root"/></rule>
+    </state>
+    <state name="function_param_default">
+      <rule pattern="(?=[,\)])"><token type="TextWhitespace"/><pop depth="1"/></rule>
+      <rule><include state="root"/></rule>
+    </state>
+    <state name="field_separator">
+      <rule pattern="\s+"><token type="TextWhitespace"/></rule>
+      <rule pattern="\+?::?:?"><token type="Punctuation"/><push state="#pop" state="#pop" state="field_value"/></rule>
+      <rule><include state="_comments"/></rule>
+    </state>
+    <state name="field_value">
+      <rule pattern=","><token type="Punctuation"/><pop depth="1"/></rule>
+      <rule pattern="\}"><token type="Punctuation"/><pop depth="2"/></rule>
+      <rule><include state="root"/></rule>
+    </state>
+    <state name="object_assert">
+      <rule pattern=":"><token type="Punctuation"/></rule>
+      <rule pattern=","><token type="Punctuation"/><pop depth="1"/></rule>
+      <rule><include state="root"/></rule>
+    </state>
+    <state name="object_local_name">
+      <rule pattern="[^\W\d]\w*"><token type="NameVariable"/><push state="#pop" state="object_local_value"/></rule>
+      <rule pattern="\s+"><token type="TextWhitespace"/></rule>
+    </state>
+    <state name="object_local_value">
+      <rule pattern="="><token type="Operator"/></rule>
+      <rule pattern=","><token type="Punctuation"/><pop depth="1"/></rule>
+      <rule pattern="\}"><token type="Punctuation"/><pop depth="2"/></rule>
+      <rule><include state="root"/></rule>
+    </state>
+  </rules>
+</lexer>
--- a/lexers/markdown.xml
+++ b/lexers/markdown.xml
@@ -0,0 +1,55 @@
+
+<lexer>
+  <config>
+    <name>Markdown</name>
+    <alias>markdown</alias>
+    <alias>md</alias>
+    <filename>*.md</filename>
+    <filename>*.markdown</filename>
+    <mime_type>text/x-markdown</mime_type>
+  </config>
+  <rules>
+    <state name="root">
+      <rule pattern="(^#[^#].+)(\n)"><bygroups><token type="GenericHeading"/><token type="Text"/></bygroups></rule>
+      <rule pattern="(^#{2,6}[^#].+)(\n)"><bygroups><token type="GenericSubheading"/><token type="Text"/></bygroups></rule>
+      <rule pattern="^(.+)(\n)(=+)(\n)"><bygroups><token type="GenericHeading"/><token type="Text"/><token type="GenericHeading"/><token type="Text"/></bygroups></rule>
+      <rule pattern="^(.+)(\n)(-+)(\n)"><bygroups><token type="GenericSubheading"/><token type="Text"/><token type="GenericSubheading"/><token type="Text"/></bygroups></rule>
+      <rule pattern="^(\s*)([*-] )(\[[ xX]\])( .+\n)"><bygroups><token type="TextWhitespace"/><token type="Keyword"/><token type="Keyword"/><usingself state="inline"/></bygroups></rule>
+      <rule pattern="^(\s*)([*-])(\s)(.+\n)"><bygroups><token type="TextWhitespace"/><token type="Keyword"/><token type="TextWhitespace"/><usingself state="inline"/></bygroups></rule>
+      <rule pattern="^(\s*)([0-9]+\.)( .+\n)"><bygroups><token type="TextWhitespace"/><token type="Keyword"/><usingself state="inline"/></bygroups></rule>
+      <rule pattern="^(\s*&gt;\s)(.+\n)"><bygroups><token type="Keyword"/><token type="GenericEmph"/></bygroups></rule>
+      <rule pattern="^(```\n)([\w\W]*?)(^```$)">
+      <bygroups>
+        <token type="LiteralStringBacktick"/>
+        <token type="Text"/>
+        <token type="LiteralStringBacktick"/>
+      </bygroups>
+      </rule>
+      <rule pattern="^(```)(\w+)(\n)([\w\W]*?)(^```$)">
+        <bygroups>
+          <token type="LiteralStringBacktick"/>
+          <token type="NameLabel"/>
+          <token type="TextWhitespace"/>
+          <UsingByGroup lexer="2" content="4"/>
+          <token type="LiteralStringBacktick"/>
+        </bygroups>
+      </rule>
+      <rule><include state="inline"/></rule>
+    </state>
+    <state name="inline">
+      <rule pattern="\\."><token type="Text"/></rule>
+      <rule pattern="([^`]?)(`[^`\n]+`)"><bygroups><token type="Text"/><token type="LiteralStringBacktick"/></bygroups></rule>
+      <rule pattern="([^\*]?)(\*\*[^* \n][^*\n]*\*\*)"><bygroups><token type="Text"/><token type="GenericStrong"/></bygroups></rule>
+      <rule pattern="([^_]?)(__[^_ \n][^_\n]*__)"><bygroups><token type="Text"/><token type="GenericStrong"/></bygroups></rule>
+      <rule pattern="([^\*]?)(\*[^* \n][^*\n]*\*)"><bygroups><token type="Text"/><token type="GenericEmph"/></bygroups></rule>
+      <rule pattern="([^_]?)(_[^_ \n][^_\n]*_)"><bygroups><token type="Text"/><token type="GenericEmph"/></bygroups></rule>
+      <rule pattern="([^~]?)(~~[^~ \n][^~\n]*~~)"><bygroups><token type="Text"/><token type="GenericDeleted"/></bygroups></rule>
+      <rule pattern="[@#][\w/:]+"><token type="NameEntity"/></rule>
+      <rule pattern="(!?\[)([^]]+)(\])(\()([^)]+)(\))"><bygroups><token type="Text"/><token type="NameTag"/><token type="Text"/><token type="Text"/><token type="NameAttribute"/><token type="Text"/></bygroups></rule>
+      <rule pattern="(\[)([^]]+)(\])(\[)([^]]*)(\])"><bygroups><token type="Text"/><token type="NameTag"/><token type="Text"/><token type="Text"/><token type="NameLabel"/><token type="Text"/></bygroups></rule>
+      <rule pattern="^(\s*\[)([^]]*)(\]:\s*)(.+)"><bygroups><token type="Text"/><token type="NameLabel"/><token type="Text"/><token type="NameAttribute"/></bygroups></rule>
+      <rule pattern="[^\\\s]+"><token type="Text"/></rule>
+      <rule pattern="."><token type="Text"/></rule>
+    </state>
+  </rules>
+</lexer>
--- a/lexers/materialize_sql_dialect.xml
+++ b/lexers/materialize_sql_dialect.xml
@@ -45,7 +45,7 @@
          </emitters>
        </usingbygroup>
      </rule>
-      <rule pattern="(ACCESS|ADD|ADDRESSES|AGGREGATE|ALIGNED|ALL|ALTER|ANALYSIS|AND|ANY|ARITY|ARN|ARRANGEMENT|ARRAY|AS|ASC|ASSERT|ASSUME|AT|AUCTION|AUTHORITY|AVAILABILITY|AVRO|AWS|BATCH|BEGIN|BETWEEN|BIGINT|BILLED|BODY|BOOLEAN|BOTH|BPCHAR|BROKEN|BROKER|BROKERS|BY|BYTES|CARDINALITY|CASCADE|CASE|CAST|CERTIFICATE|CHAIN|CHAINS|CHAR|CHARACTER|CHARACTERISTICS|CHECK|CLIENT|CLOSE|CLUSTER|CLUSTERS|COALESCE|COLLATE|COLUMN|COLUMNS|COMMENT|COMMIT|COMMITTED|COMPACTION|COMPATIBILITY|COMPRESSION|COMPUTE|COMPUTECTL|CONFIG|CONFLUENT|CONNECTION|CONNECTIONS|CONSTRAINT|COPY|COUNT|COUNTER|CREATE|CREATECLUSTER|CREATEDB|CREATEROLE|CREATION|CROSS|CSV|CURRENT|CURSOR|DATABASE|DATABASES|DATUMS|DAY|DAYS|DEALLOCATE|DEBEZIUM|DEBUG|DEBUGGING|DEC|DECIMAL|DECLARE|DECODING|DECORRELATED|DEFAULT|DEFAULTS|DELETE|DELIMITED|DELIMITER|DELTA|DESC|DETAILS|DISCARD|DISK|DISTINCT|DOC|DOT|DOUBLE|DROP|EAGER|ELEMENT|ELSE|ENABLE|END|ENDPOINT|ENFORCED|ENVELOPE|ERROR|ERRORS|ESCAPE|ESTIMATE|EVERY|EXCEPT|EXECUTE|EXISTS|EXPECTED|EXPLAIN|EXPOSE|EXPRESSIONS|EXTERNAL|EXTRACT|FACTOR|FALSE|FAST|FEATURES|FETCH|FIELDS|FILE|FILTER|FIRST|FIXPOINT|FLOAT|FOLLOWING|FOR|FOREIGN|FORMAT|FORWARD|FROM|FULL|FULLNAME|FUNCTION|GENERATOR|GRANT|GREATEST|GROUP|GROUPS|HAVING|HEADER|HEADERS|HISTORY|HOLD|HOST|HOUR|HOURS|HUMANIZED|ID|IDENTIFIERS|IDS|IF|IGNORE|ILIKE|IMPLEMENTATIONS|IMPORTED|IN|INCLUDE|INDEX|INDEXES|INFO|INHERIT|INLINE|INNER|INPUT|INSERT|INSIGHTS|INSPECT|INT|INTEGER|INTERNAL|INTERSECT|INTERVAL|INTO|INTROSPECTION|IS|ISNULL|ISOLATION|JOIN|JOINS|JSON|KAFKA|KEY|KEYS|LAST|LATERAL|LATEST|LEADING|LEAST|LEFT|LEGACY|LETREC|LEVEL|LIKE|LIMIT|LINEAR|LIST|LOAD|LOCAL|LOCALLY|LOG|LOGICAL|LOGIN|LOWERING|MANAGED|MANUAL|MAP|MARKETING|MATERIALIZE|MATERIALIZED|MAX|MECHANISMS|MEMBERSHIP|MESSAGE|METADATA|MINUTE|MINUTES|MODE|MONTH|MONTHS|MUTUALLY|MYSQL|NAME|NAMES|NATURAL|NEGATIVE|NEW|NEXT|NO|NOCREATECLUSTER|NOCREATEDB|NOCREATEROLE|NODE|NOINHERIT|NOLOGIN|NON|NONE|NOSUPERUSER|NOT|NOTICE|NOTICES|NULL|NULLIF|NULLS|OBJECTS|OF|OFFSET|ON|ONLY|OPERATOR|OPTIMIZED|OPTIMIZER|OPTIONS|OR|ORDER|ORDINALITY|OUTER|OVER|OWNED|OWNER|PARTITION|PARTITIONS|PASSWORD|PATH|PHYSICAL|PLAN|PLANS|PORT|POSITION|POSTGRES|PRECEDING|PRECISION|PREFIX|PREPARE|PRIMARY|PRIVATELINK|PRIVILEGES|PROGRESS|PROTOBUF|PROTOCOL|PUBLICATION|PUSHDOWN|QUERY|QUOTE|RAISE|RANGE|RATE|RAW|READ|REAL|REASSIGN|RECURSION|RECURSIVE|REDACTED|REFERENCE|REFERENCES|REFRESH|REGEX|REGION|REGISTRY|REHYDRATION|RENAME|REOPTIMIZE|REPEATABLE|REPLACE|REPLAN|REPLICA|REPLICAS|REPLICATION|RESET|RESPECT|RESTRICT|RETAIN|RETURN|RETURNING|REVOKE|RIGHT|ROLE|ROLES|ROLLBACK|ROTATE|ROUNDS|ROW|ROWS|SASL|SCALE|SCHEDULE|SCHEMA|SCHEMAS|SECOND|SECONDS|SECRET|SECRETS|SECURITY|SEED|SELECT|SEQUENCES|SERIALIZABLE|SERVICE|SESSION|SET|SHARD|SHOW|SINK|SINKS|SIZE|SMALLINT|SNAPSHOT|SOME|SOURCE|SOURCES|SSH|SSL|START|STDIN|STDOUT|STORAGE|STORAGECTL|STRATEGY|STRICT|STRING|STRONG|SUBSCRIBE|SUBSOURCE|SUBSOURCES|SUBSTRING|SUBTREE|SUPERUSER|SWAP|SYNTAX|SYSTEM|TABLE|TABLES|TAIL|TEMP|TEMPORARY|TEXT|THEN|TICK|TIES|TIME|TIMELINE|TIMEOUT|TIMESTAMP|TIMESTAMPTZ|TIMING|TO|TOKEN|TOPIC|TPCH|TRACE|TRAILING|TRANSACTION|TRANSACTIONAL|TRIM|TRUE|TUNNEL|TYPE|TYPES|UNBOUNDED|UNCOMMITTED|UNION|UNIQUE|UNKNOWN|UP|UPDATE|UPSERT|URL|USAGE|USER|USERNAME|USERS|USING|VALIDATE|VALUE|VALUES|VARCHAR|VARIADIC|VARYING|VERSION|VIEW|VIEWS|WARNING|WEBHOOK|WHEN|WHERE|WINDOW|WIRE|WITH|WITHIN|WITHOUT|WORK|WORKERS|WRITE|YEAR|YEARS|ZONE|ZONES)\b">
+      <rule pattern="(ACCESS|ADD|ADDRESSES|AGGREGATE|ALIGNED|ALL|ALTER|ANALYSIS|AND|ANY|ARITY|ARN|ARRANGEMENT|ARRAY|AS|ASC|ASSERT|ASSUME|AT|AUCTION|AUTHORITY|AVAILABILITY|AVRO|AWS|BATCH|BEGIN|BETWEEN|BIGINT|BILLED|BODY|BOOLEAN|BOTH|BPCHAR|BROKEN|BROKER|BROKERS|BY|BYTES|CARDINALITY|CASCADE|CASE|CAST|CERTIFICATE|CHAIN|CHAINS|CHAR|CHARACTER|CHARACTERISTICS|CHECK|CLASS|CLIENT|CLOCK|CLOSE|CLUSTER|CLUSTERS|COALESCE|COLLATE|COLUMN|COLUMNS|COMMENT|COMMIT|COMMITTED|COMPACTION|COMPATIBILITY|COMPRESSION|COMPUTE|COMPUTECTL|CONFIG|CONFLUENT|CONNECTION|CONNECTIONS|CONSTRAINT|CONTINUAL|COPY|COUNT|COUNTER|CREATE|CREATECLUSTER|CREATEDB|CREATEROLE|CREATION|CROSS|CSV|CURRENT|CURSOR|DATABASE|DATABASES|DATUMS|DAY|DAYS|DEALLOCATE|DEBEZIUM|DEBUG|DEBUGGING|DEC|DECIMAL|DECLARE|DECODING|DECORRELATED|DEFAULT|DEFAULTS|DELETE|DELIMITED|DELIMITER|DELTA|DESC|DETAILS|DISCARD|DISK|DISTINCT|DOC|DOT|DOUBLE|DROP|EAGER|ELEMENT|ELSE|ENABLE|END|ENDPOINT|ENFORCED|ENVELOPE|ERROR|ERRORS|ESCAPE|ESTIMATE|EVERY|EXCEPT|EXCLUDE|EXECUTE|EXISTS|EXPECTED|EXPLAIN|EXPOSE|EXPRESSIONS|EXTERNAL|EXTRACT|FACTOR|FALSE|FAST|FEATURES|FETCH|FIELDS|FILE|FILTER|FIRST|FIXPOINT|FLOAT|FOLLOWING|FOR|FOREIGN|FORMAT|FORWARD|FROM|FULL|FULLNAME|FUNCTION|FUSION|GENERATOR|GRANT|GREATEST|GROUP|GROUPS|HAVING|HEADER|HEADERS|HISTORY|HOLD|HOST|HOUR|HOURS|HUMANIZED|HYDRATION|ID|IDENTIFIERS|IDS|IF|IGNORE|ILIKE|IMPLEMENTATIONS|IMPORTED|IN|INCLUDE|INDEX|INDEXES|INFO|INHERIT|INLINE|INNER|INPUT|INSERT|INSIGHTS|INSPECT|INT|INTEGER|INTERNAL|INTERSECT|INTERVAL|INTO|INTROSPECTION|IS|ISNULL|ISOLATION|JOIN|JOINS|JSON|KAFKA|KEY|KEYS|LAST|LATERAL|LATEST|LEADING|LEAST|LEFT|LEGACY|LETREC|LEVEL|LIKE|LIMIT|LINEAR|LIST|LOAD|LOCAL|LOCALLY|LOG|LOGICAL|LOGIN|LOWERING|MANAGED|MANUAL|MAP|MARKETING|MATERIALIZE|MATERIALIZED|MAX|MECHANISMS|MEMBERSHIP|MESSAGE|METADATA|MINUTE|MINUTES|MODE|MONTH|MONTHS|MUTUALLY|MYSQL|NAME|NAMES|NATURAL|NEGATIVE|NEW|NEXT|NO|NOCREATECLUSTER|NOCREATEDB|NOCREATEROLE|NODE|NOINHERIT|NOLOGIN|NON|NONE|NOSUPERUSER|NOT|NOTICE|NOTICES|NULL|NULLIF|NULLS|OBJECTS|OF|OFFSET|ON|ONLY|OPERATOR|OPTIMIZED|OPTIMIZER|OPTIONS|OR|ORDER|ORDINALITY|OUTER|OVER|OWNED|OWNER|PARTITION|PARTITIONS|PASSWORD|PATH|PHYSICAL|PLAN|PLANS|PORT|POSITION|POSTGRES|PRECEDING|PRECISION|PREFIX|PREPARE|PRIMARY|PRIVATELINK|PRIVILEGES|PROGRESS|PROTOBUF|PROTOCOL|PUBLIC|PUBLICATION|PUSHDOWN|QUERY|QUOTE|RAISE|RANGE|RATE|RAW|READ|READY|REAL|REASSIGN|RECURSION|RECURSIVE|REDACTED|REDUCE|REFERENCE|REFERENCES|REFRESH|REGEX|REGION|REGISTRY|RENAME|REOPTIMIZE|REPEATABLE|REPLACE|REPLAN|REPLICA|REPLICAS|REPLICATION|RESET|RESPECT|RESTRICT|RETAIN|RETURN|RETURNING|REVOKE|RIGHT|ROLE|ROLES|ROLLBACK|ROTATE|ROUNDS|ROW|ROWS|SASL|SCALE|SCHEDULE|SCHEMA|SCHEMAS|SECOND|SECONDS|SECRET|SECRETS|SECURITY|SEED|SELECT|SEQUENCES|SERIALIZABLE|SERVICE|SESSION|SET|SHARD|SHOW|SINK|SINKS|SIZE|SMALLINT|SNAPSHOT|SOME|SOURCE|SOURCES|SSH|SSL|START|STDIN|STDOUT|STORAGE|STORAGECTL|STRATEGY|STRICT|STRING|STRONG|SUBSCRIBE|SUBSOURCE|SUBSOURCES|SUBSTRING|SUBTREE|SUPERUSER|SWAP|SYNTAX|SYSTEM|TABLE|TABLES|TAIL|TASK|TEMP|TEMPORARY|TEXT|THEN|TICK|TIES|TIME|TIMELINE|TIMEOUT|TIMESTAMP|TIMESTAMPTZ|TIMING|TO|TOKEN|TOPIC|TPCH|TRACE|TRAILING|TRANSACTION|TRANSACTIONAL|TRIM|TRUE|TUNNEL|TYPE|TYPES|UNBOUNDED|UNCOMMITTED|UNION|UNIQUE|UNKNOWN|UNNEST|UNTIL|UP|UPDATE|UPSERT|URL|USAGE|USER|USERNAME|USERS|USING|VALIDATE|VALUE|VALUES|VARCHAR|VARIADIC|VARYING|VERSION|VIEW|VIEWS|WAIT|WARNING|WEBHOOK|WHEN|WHERE|WINDOW|WIRE|WITH|WITHIN|WITHOUT|WORK|WORKERS|WORKLOAD|WRITE|YEAR|YEARS|YUGABYTE|ZONE|ZONES)\b">
        <token type="Keyword" />
      </rule>
      <rule pattern="[+*/&lt;&gt;=~!@#%^&amp;|`?-]+">
--- a/lexers/mcfunction.xml
+++ b/lexers/mcfunction.xml
@@ -1,182 +1,137 @@
+
 <lexer>
  <config>
-    <name>mcfunction</name>
+    <name>MCFunction</name>
    <alias>mcfunction</alias>
+    <alias>mcf</alias>
    <filename>*.mcfunction</filename>
-    <dot_all>true</dot_all>
-    <not_multiline>true</not_multiline>
+    <mime_type>text/mcfunction</mime_type>
  </config>
  <rules>
-    <state name="nbtobjectvalue">
-      <rule pattern="(&#34;(\\\\|\\&#34;|[^&#34;])*&#34;|[a-zA-Z0-9_]+)">
-        <token type="NameTag"/>
-        <push state="nbtobjectattribute"/>
-      </rule>
-      <rule pattern="\}">
-        <token type="Punctuation"/>
-        <pop depth="1"/>
-      </rule>
-    </state>
-    <state name="nbtarrayvalue">
-      <rule>
-        <include state="nbtvalue"/>
-      </rule>
-      <rule pattern=",">
-        <token type="Punctuation"/>
-      </rule>
-      <rule pattern="\]">
-        <token type="Punctuation"/>
-        <pop depth="1"/>
-      </rule>
-    </state>
-    <state name="nbtvalue">
-      <rule>
-        <include state="simplevalue"/>
-      </rule>
-      <rule pattern="\{">
-        <token type="Punctuation"/>
-        <push state="nbtobjectvalue"/>
-      </rule>
-      <rule pattern="\[">
-        <token type="Punctuation"/>
-        <push state="nbtarrayvalue"/>
-      </rule>
-    </state>
-    <state name="argumentvalue">
-      <rule>
-        <include state="simplevalue"/>
-      </rule>
-      <rule pattern=",">
-        <token type="Punctuation"/>
-        <pop depth="1"/>
-      </rule>
-      <rule pattern="[}\]]">
-        <token type="Punctuation"/>
-        <pop depth="2"/>
-      </rule>
-    </state>
-    <state name="argumentlist">
-      <rule pattern="(nbt)(={)">
-        <bygroups>
-          <token type="NameAttribute"/>
-          <token type="Punctuation"/>
-        </bygroups>
-        <push state="nbtobjectvalue"/>
-      </rule>
-      <rule pattern="([A-Za-z0-9/_!]+)(={)">
-        <bygroups>
-          <token type="NameAttribute"/>
-          <token type="Punctuation"/>
-        </bygroups>
-        <push state="argumentlist"/>
-      </rule>
-      <rule pattern="([A-Za-z0-9/_!]+)(=)">
-        <bygroups>
-          <token type="NameAttribute"/>
-          <token type="Punctuation"/>
-        </bygroups>
-        <push state="argumentvalue"/>
-      </rule>
-      <rule>
-        <include state="simplevalue"/>
-      </rule>
-      <rule pattern=",">
-        <token type="Punctuation"/>
-      </rule>
-      <rule pattern="[}\]]">
-        <token type="Punctuation"/>
-        <pop depth="1"/>
-      </rule>
-    </state>
    <state name="root">
-      <rule pattern="#.*?\n">
-        <token type="CommentSingle"/>
-      </rule>
-      <rule pattern="/?(geteduclientinfo|clearspawnpoint|defaultgamemode|transferserver|toggledownfall|immutableworld|detectredstone|setidletimeout|playanimation|classroommode|spreadplayers|testforblocks|setmaxplayers|setworldspawn|testforblock|worldbuilder|createagent|worldborder|camerashake|advancement|raytracefog|locatebiome|tickingarea|replaceitem|attributes|spawnpoint|difficulty|experience|scoreboard|whitelist|structure|playsound|stopsound|forceload|spectate|gamerule|function|schedule|wsserver|teleport|position|save-off|particle|setblock|datapack|mobevent|transfer|gamemode|save-all|bossbar|enchant|trigger|collect|execute|weather|teammsg|tpagent|banlist|dropall|publish|tellraw|testfor|save-on|destroy|ability|locate|summon|remove|effect|reload|ban-ip|recipe|pardon|detect|music|clear|clone|event|mixer|debug|title|ride|stop|list|turn|data|team|kick|loot|tell|help|give|flog|fill|move|time|seed|kill|save|item|deop|code|tag|ban|msg|say|tp|me|op|xp|w|place)\b">
-        <token type="KeywordReserved"/>
-      </rule>
-      <rule pattern="(@p|@r|@a|@e|@s|@c|@v)">
-        <token type="KeywordConstant"/>
-      </rule>
-      <rule pattern="\[">
-        <token type="Punctuation"/>
-        <push state="argumentlist"/>
-      </rule>
-      <rule pattern="{">
-        <token type="Punctuation"/>
-        <push state="nbtobjectvalue"/>
-      </rule>
-      <rule pattern="~">
-        <token type="NameBuiltin"/>
-      </rule>
-      <rule pattern="([a-zA-Z_]+:)?[a-zA-Z_]+\b">
-        <token type="Text"/>
-      </rule>
-      <rule pattern="([a-z]+)(\.)([0-9]+)\b">
-        <bygroups>
-          <token type="Text"/>
-          <token type="Punctuation"/>
-          <token type="LiteralNumber"/>
-        </bygroups>
-      </rule>
-      <rule pattern="([&lt;&gt;=]|&lt;=|&gt;=)">
-        <token type="Punctuation"/>
-      </rule>
-      <rule>
-        <include state="simplevalue"/>
-      </rule>
-      <rule pattern="\s+">
-        <token type="TextWhitespace"/>
-      </rule>
+      <rule><include state="names"/></rule>
+      <rule><include state="comments"/></rule>
+      <rule><include state="literals"/></rule>
+      <rule><include state="whitespace"/></rule>
+      <rule><include state="property"/></rule>
+      <rule><include state="operators"/></rule>
+      <rule><include state="selectors"/></rule>
    </state>
-    <state name="simplevalue">
-      <rule pattern="(true|false)">
-        <token type="KeywordConstant"/>
-      </rule>
-      <rule pattern="[01]b">
-        <token type="LiteralNumber"/>
-      </rule>
-      <rule pattern="-?(0|[1-9]\d*)(\.\d+[eE](\+|-)?\d+|[eE](\+|-)?\d+|\.\d+)">
-        <token type="LiteralNumberFloat"/>
-      </rule>
-      <rule pattern="(-?\d+)(\.\.)(-?\d+)">
-        <bygroups>
-          <token type="LiteralNumberInteger"/>
-          <token type="Punctuation"/>
-          <token type="LiteralNumberInteger"/>
-        </bygroups>
-      </rule>
-      <rule pattern="-?(0|[1-9]\d*)">
-        <token type="LiteralNumberInteger"/>
-      </rule>
-      <rule pattern="&#34;(\\\\|\\&#34;|[^&#34;])*&#34;">
-        <token type="LiteralStringDouble"/>
-      </rule>
-      <rule pattern="&#39;[^&#39;]+&#39;">
-        <token type="LiteralStringSingle"/>
-      </rule>
-      <rule pattern="([!#]?)(\w+)">
-        <bygroups>
-          <token type="Punctuation"/>
-          <token type="Text"/>
-        </bygroups>
-      </rule>
+    <state name="names">
+      <rule pattern="^(\s*)([a-z_]+)"><bygroups><token type="TextWhitespace"/><token type="NameBuiltin"/></bygroups></rule>
+      <rule pattern="(?&lt;=run)\s+[a-z_]+"><token type="NameBuiltin"/></rule>
+      <rule pattern="\b[0-9a-fA-F]+(?:-[0-9a-fA-F]+){4}\b"><token type="NameVariable"/></rule>
+      <rule><include state="resource-name"/></rule>
+      <rule pattern="[A-Za-z_][\w.#%$]+"><token type="KeywordConstant"/></rule>
+      <rule pattern="[#%$][\w.#%$]+"><token type="NameVariableMagic"/></rule>
    </state>
-    <state name="nbtobjectattribute">
-      <rule>
-        <include state="nbtvalue"/>
-      </rule>
-      <rule pattern=":">
-        <token type="Punctuation"/>
-      </rule>
-      <rule pattern=",">
-        <token type="Punctuation"/>
-        <pop depth="1"/>
-      </rule>
-      <rule pattern="\}">
-        <token type="Punctuation"/>
-        <pop depth="2"/>
-      </rule>
+    <state name="resource-name">
+      <rule pattern="#?[a-z_][a-z_.-]*:[a-z0-9_./-]+"><token type="NameFunction"/></rule>
+      <rule pattern="#?[a-z0-9_\.\-]+\/[a-z0-9_\.\-\/]+"><token type="NameFunction"/></rule>
+    </state>
+    <state name="whitespace">
+      <rule pattern="\s+"><token type="TextWhitespace"/></rule>
+    </state>
+    <state name="comments">
+      <rule pattern="^\s*(#[&gt;!])"><token type="CommentMultiline"/><push state="comments.block" state="comments.block.emphasized"/></rule>
+      <rule pattern="#.*$"><token type="CommentSingle"/></rule>
+    </state>
+    <state name="comments.block">
+      <rule pattern="^\s*#[&gt;!]"><token type="CommentMultiline"/><push state="comments.block.emphasized"/></rule>
+      <rule pattern="^\s*#"><token type="CommentMultiline"/><push state="comments.block.normal"/></rule>
+      <rule><pop depth="1"/></rule>
+    </state>
+    <state name="comments.block.normal">
+      <rule><include state="comments.block.special"/></rule>
+      <rule pattern="\S+"><token type="CommentMultiline"/></rule>
+      <rule pattern="\n"><token type="Text"/><pop depth="1"/></rule>
+      <rule><include state="whitespace"/></rule>
+    </state>
+    <state name="comments.block.emphasized">
+      <rule><include state="comments.block.special"/></rule>
+      <rule pattern="\S+"><token type="LiteralStringDoc"/></rule>
+      <rule pattern="\n"><token type="Text"/><pop depth="1"/></rule>
+      <rule><include state="whitespace"/></rule>
+    </state>
+    <state name="comments.block.special">
+      <rule pattern="@\S+"><token type="NameDecorator"/></rule>
+      <rule><include state="resource-name"/></rule>
+      <rule pattern="[#%$][\w.#%$]+"><token type="NameVariableMagic"/></rule>
+    </state>
+    <state name="operators">
+      <rule pattern="[\-~%^?!+*&lt;&gt;\\/|&amp;=.]"><token type="Operator"/></rule>
+    </state>
+    <state name="literals">
+      <rule pattern="\.\."><token type="Literal"/></rule>
+      <rule pattern="(true|false)"><token type="KeywordPseudo"/></rule>
+      <rule pattern="[A-Za-z_]+"><token type="NameVariableClass"/></rule>
+      <rule pattern="[0-7]b"><token type="LiteralNumberByte"/></rule>
+      <rule pattern="[+-]?\d*\.?\d+([eE]?[+-]?\d+)?[df]?\b"><token type="LiteralNumberFloat"/></rule>
+      <rule pattern="[+-]?\d+\b"><token type="LiteralNumberInteger"/></rule>
+      <rule pattern="&quot;"><token type="LiteralStringDouble"/><push state="literals.string-double"/></rule>
+      <rule pattern="&#x27;"><token type="LiteralStringSingle"/><push state="literals.string-single"/></rule>
+    </state>
+    <state name="literals.string-double">
+      <rule pattern="\\."><token type="LiteralStringEscape"/></rule>
+      <rule pattern="[^\\&quot;\n]+"><token type="LiteralStringDouble"/></rule>
+      <rule pattern="&quot;"><token type="LiteralStringDouble"/><pop depth="1"/></rule>
+    </state>
+    <state name="literals.string-single">
+      <rule pattern="\\."><token type="LiteralStringEscape"/></rule>
+      <rule pattern="[^\\&#x27;\n]+"><token type="LiteralStringSingle"/></rule>
+      <rule pattern="&#x27;"><token type="LiteralStringSingle"/><pop depth="1"/></rule>
+    </state>
+    <state name="selectors">
+      <rule pattern="@[a-z]"><token type="NameVariable"/></rule>
+    </state>
+    <state name="property">
+      <rule pattern="\{"><token type="Punctuation"/><push state="property.curly" state="property.key"/></rule>
+      <rule pattern="\["><token type="Punctuation"/><push state="property.square" state="property.key"/></rule>
+    </state>
+    <state name="property.curly">
+      <rule><include state="whitespace"/></rule>
+      <rule><include state="property"/></rule>
+      <rule pattern="\}"><token type="Punctuation"/><pop depth="1"/></rule>
+    </state>
+    <state name="property.square">
+      <rule><include state="whitespace"/></rule>
+      <rule><include state="property"/></rule>
+      <rule pattern="\]"><token type="Punctuation"/><pop depth="1"/></rule>
+      <rule pattern=","><token type="Punctuation"/></rule>
+    </state>
+    <state name="property.key">
+      <rule><include state="whitespace"/></rule>
+      <rule pattern="#?[a-z_][a-z_\.\-]*\:[a-z0-9_\.\-/]+(?=\s*\=)"><token type="NameAttribute"/><push state="property.delimiter"/></rule>
+      <rule pattern="#?[a-z_][a-z0-9_\.\-/]+"><token type="NameAttribute"/><push state="property.delimiter"/></rule>
+      <rule pattern="[A-Za-z_\-\+]+"><token type="NameAttribute"/><push state="property.delimiter"/></rule>
+      <rule pattern="&quot;"><token type="NameAttribute"/><push state="property.delimiter"/></rule>
+      <rule pattern="&#x27;"><token type="NameAttribute"/><push state="property.delimiter"/></rule>
+      <rule pattern="-?\d+"><token type="LiteralNumberInteger"/><push state="property.delimiter"/></rule>
+      <rule><pop depth="1"/></rule>
+    </state>
+    <state name="property.key.string-double">
+      <rule pattern="\\."><token type="LiteralStringEscape"/></rule>
+      <rule pattern="[^\\&quot;\n]+"><token type="NameAttribute"/></rule>
+      <rule pattern="&quot;"><token type="NameAttribute"/><pop depth="1"/></rule>
+    </state>
+    <state name="property.key.string-single">
+      <rule pattern="\\."><token type="LiteralStringEscape"/></rule>
+      <rule pattern="[^\\&#x27;\n]+"><token type="NameAttribute"/></rule>
+      <rule pattern="&#x27;"><token type="NameAttribute"/><pop depth="1"/></rule>
+    </state>
+    <state name="property.delimiter">
+      <rule><include state="whitespace"/></rule>
+      <rule pattern="[:=]!?"><token type="Punctuation"/><push state="property.value"/></rule>
+      <rule pattern=","><token type="Punctuation"/></rule>
+      <rule><pop depth="1"/></rule>
+    </state>
+    <state name="property.value">
+      <rule><include state="whitespace"/></rule>
+      <rule pattern="#?[a-z_][a-z_\.\-]*\:[a-z0-9_\.\-/]+"><token type="NameTag"/></rule>
+      <rule pattern="#?[a-z_][a-z0-9_\.\-/]+"><token type="NameTag"/></rule>
+      <rule><include state="literals"/></rule>
+      <rule><include state="property"/></rule>
+      <rule><pop depth="1"/></rule>
    </state>
  </rules>
 </lexer>
--- a/lexers/moinwiki.xml
+++ b/lexers/moinwiki.xml
@@ -0,0 +1,33 @@
+
+<lexer>
+  <config>
+    <name>MoinMoin/Trac Wiki markup</name>
+    <alias>trac-wiki</alias>
+    <alias>moin</alias>
+    <mime_type>text/x-trac-wiki</mime_type>
+    <case_insensitive>true</case_insensitive>
+  </config>
+  <rules>
+    <state name="root">
+      <rule pattern="^#.*$"><token type="Comment"/></rule>
+      <rule pattern="(!)(\S+)"><bygroups><token type="Keyword"/><token type="Text"/></bygroups></rule>
+      <rule pattern="^(=+)([^=]+)(=+)(\s*#.+)?$"><bygroups><token type="GenericHeading"/><usingself state="root"/><token type="GenericHeading"/><token type="LiteralString"/></bygroups></rule>
+      <rule pattern="(\{\{\{)(\n#!.+)?"><bygroups><token type="NameBuiltin"/><token type="NameNamespace"/></bygroups><push state="codeblock"/></rule>
+      <rule pattern="(\&#x27;\&#x27;\&#x27;?|\|\||`|__|~~|\^|,,|::)"><token type="Comment"/></rule>
+      <rule pattern="^( +)([.*-])( )"><bygroups><token type="Text"/><token type="NameBuiltin"/><token type="Text"/></bygroups></rule>
+      <rule pattern="^( +)([a-z]{1,5}\.)( )"><bygroups><token type="Text"/><token type="NameBuiltin"/><token type="Text"/></bygroups></rule>
+      <rule pattern="\[\[\w+.*?\]\]"><token type="Keyword"/></rule>
+      <rule pattern="(\[[^\s\]]+)(\s+[^\]]+?)?(\])"><bygroups><token type="Keyword"/><token type="LiteralString"/><token type="Keyword"/></bygroups></rule>
+      <rule pattern="^----+$"><token type="Keyword"/></rule>
+      <rule pattern="[^\n\&#x27;\[{!_~^,|]+"><token type="Text"/></rule>
+      <rule pattern="\n"><token type="Text"/></rule>
+      <rule pattern="."><token type="Text"/></rule>
+    </state>
+    <state name="codeblock">
+      <rule pattern="\}\}\}"><token type="NameBuiltin"/><pop depth="1"/></rule>
+      <rule pattern="\{\{\{"><token type="Text"/><push/></rule>
+      <rule pattern="[^{}]+"><token type="CommentPreproc"/></rule>
+      <rule pattern="."><token type="CommentPreproc"/></rule>
+    </state>
+  </rules>
+</lexer>
--- a/lexers/nix.xml
+++ b/lexers/nix.xml
@@ -106,7 +106,7 @@
        </bygroups>
        <push state="interpol"/>
      </rule>
-      <rule pattern="(&amp;&amp;|&gt;=|&lt;=|\+\+|-&gt;|!=|\|\||//|==|@|!|\+|\?|&lt;|\.|&gt;|\*)">
+      <rule pattern="(&amp;&amp;|&gt;=|&lt;=|\+\+|-&gt;|!=|=|\|\||//|==|@|!|\+|\?|&lt;|\.|&gt;|\*)">
        <token type="Operator"/>
      </rule>
      <rule pattern="[;:]">
--- a/lexers/nsis.xml
+++ b/lexers/nsis.xml
@@ -0,0 +1,59 @@
+<lexer>
+  <config>
+    <name>NSIS</name>
+    <alias>nsis</alias>
+    <alias>nsi</alias>
+    <alias>nsh</alias>
+    <filename>*.nsi</filename>
+    <filename>*.nsh</filename>
+    <mime_type>text/x-nsis</mime_type>
+    <case_insensitive>true</case_insensitive>
+    <not_multiline>true</not_multiline>
+  </config>
+  <rules>
+    <state name="root">
+      <rule pattern="([;#].*)(\n)"><bygroups><token type="Comment"/><token type="TextWhitespace"/></bygroups></rule>
+      <rule pattern="&#x27;.*?&#x27;"><token type="LiteralStringSingle"/></rule>
+      <rule pattern="&quot;"><token type="LiteralStringDouble"/><push state="str_double"/></rule>
+      <rule pattern="`"><token type="LiteralStringBacktick"/><push state="str_backtick"/></rule>
+      <rule><include state="macro"/></rule>
+      <rule><include state="interpol"/></rule>
+      <rule><include state="basic"/></rule>
+      <rule pattern="\$\{[a-z_|][\w|]*\}"><token type="KeywordPseudo"/></rule>
+      <rule pattern="/[a-z_]\w*"><token type="NameAttribute"/></rule>
+      <rule pattern="\s+"><token type="TextWhitespace"/></rule>
+      <rule pattern="[\w.]+"><token type="Text"/></rule>
+    </state>
+    <state name="basic">
+      <rule pattern="(\n)(Function)(\s+)([._a-z][.\w]*)\b"><bygroups><token type="TextWhitespace"/><token type="Keyword"/><token type="TextWhitespace"/><token type="NameFunction"/></bygroups></rule>
+      <rule pattern="\b([_a-z]\w*)(::)([a-z][a-z0-9]*)\b"><bygroups><token type="KeywordNamespace"/><token type="Punctuation"/><token type="NameFunction"/></bygroups></rule>
+      <rule pattern="\b([_a-z]\w*)(:)"><bygroups><token type="NameLabel"/><token type="Punctuation"/></bygroups></rule>
+      <rule pattern="(\b[ULS]|\B)([!&lt;&gt;=]?=|\&lt;\&gt;?|\&gt;)\B"><token type="Operator"/></rule>
+      <rule pattern="[|+-]"><token type="Operator"/></rule>
+      <rule pattern="\\"><token type="Punctuation"/></rule>
+      <rule pattern="\b(Abort|Add(?:BrandingImage|Size)|Allow(?:RootDirInstall|SkipFiles)|AutoCloseWindow|BG(?:Font|Gradient)|BrandingText|BringToFront|Call(?:InstDLL)?|(?:Sub)?Caption|ChangeUI|CheckBitmap|ClearErrors|CompletedText|ComponentText|CopyFiles|CRCCheck|Create(?:Directory|Font|Shortcut)|Delete(?:INI(?:Sec|Str)|Reg(?:Key|Value))?|DetailPrint|DetailsButtonText|Dir(?:Show|Text|Var|Verify)|(?:Disabled|Enabled)Bitmap|EnableWindow|EnumReg(?:Key|Value)|Exch|Exec(?:Shell|Wait)?|ExpandEnvStrings|File(?:BufSize|Close|ErrorText|Open|Read(?:Byte)?|Seek|Write(?:Byte)?)?|Find(?:Close|First|Next|Window)|FlushINI|Function(?:End)?|Get(?:CurInstType|CurrentAddress|DlgItem|DLLVersion(?:Local)?|ErrorLevel|FileTime(?:Local)?|FullPathName|FunctionAddress|InstDirError|LabelAddress|TempFileName)|Goto|HideWindow|Icon|If(?:Abort|Errors|FileExists|RebootFlag|Silent)|InitPluginsDir|Install(?:ButtonText|Colors|Dir(?:RegKey)?)|Inst(?:ProgressFlags|Type(?:[GS]etText)?)|Int(?:CmpU?|Fmt|Op)|IsWindow|LangString(?:UP)?|License(?:BkColor|Data|ForceSelection|LangString|Text)|LoadLanguageFile|LockWindow|Log(?:Set|Text)|MessageBox|MiscButtonText|Name|Nop|OutFile|(?:Uninst)?Page(?:Ex(?:End)?)?|PluginDir|Pop|Push|Quit|Read(?:(?:Env|INI|Reg)Str|RegDWORD)|Reboot|(?:Un)?RegDLL|Rename|RequestExecutionLevel|ReserveFile|Return|RMDir|SearchPath|Section(?:Divider|End|(?:(?:Get|Set)(?:Flags|InstTypes|Size|Text))|Group(?:End)?|In)?|SendMessage|Set(?:AutoClose|BrandingImage|Compress(?:ionLevel|or(?:DictSize)?)?|CtlColors|CurInstType|DatablockOptimize|DateSave|Details(?:Print|View)|Error(?:s|Level)|FileAttributes|Font|OutPath|Overwrite|PluginUnload|RebootFlag|ShellVarContext|Silent|StaticBkColor)|Show(?:(?:I|Uni)nstDetails|Window)|Silent(?:Un)?Install|Sleep|SpaceTexts|Str(?:CmpS?|Cpy|Len)|SubSection(?:End)?|Uninstall(?:ButtonText|(?:Sub)?Caption|EXEName|Icon|Text)|UninstPage|Var|VI(?:AddVersionKey|ProductVersion)|WindowIcon|Write(?:INIStr|Reg(:?Bin|DWORD|(?:Expand)?Str)|Uninstaller)|XPStyle)\b"><token type="Keyword"/></rule>
+      <rule pattern="\b(CUR|END|(?:FILE_ATTRIBUTE_)?(?:ARCHIVE|HIDDEN|NORMAL|OFFLINE|READONLY|SYSTEM|TEMPORARY)|HK(CC|CR|CU|DD|LM|PD|U)|HKEY_(?:CLASSES_ROOT|CURRENT_(?:CONFIG|USER)|DYN_DATA|LOCAL_MACHINE|PERFORMANCE_DATA|USERS)|ID(?:ABORT|CANCEL|IGNORE|NO|OK|RETRY|YES)|MB_(?:ABORTRETRYIGNORE|DEFBUTTON[1-4]|ICON(?:EXCLAMATION|INFORMATION|QUESTION|STOP)|OK(?:CANCEL)?|RETRYCANCEL|RIGHT|SETFOREGROUND|TOPMOST|USERICON|YESNO(?:CANCEL)?)|SET|SHCTX|SW_(?:HIDE|SHOW(?:MAXIMIZED|MINIMIZED|NORMAL))|admin|all|auto|both|bottom|bzip2|checkbox|colored|current|false|force|hide|highest|if(?:diff|newer)|lastused|leave|left|listonly|lzma|nevershow|none|normal|off|on|pop|push|radiobuttons|right|show|silent|silentlog|smooth|textonly|top|true|try|user|zlib)\b"><token type="NameConstant"/></rule>
+    </state>
+    <state name="macro">
+      <rule pattern="\!(addincludedir(?:dir)?|addplugindir|appendfile|cd|define|delfilefile|echo(?:message)?|else|endif|error|execute|if(?:macro)?n?(?:def)?|include|insertmacro|macro(?:end)?|packhdr|search(?:parse|replace)|system|tempfilesymbol|undef|verbose|warning)\b"><token type="CommentPreproc"/></rule>
+    </state>
+    <state name="interpol">
+      <rule pattern="\$(R?[0-9])"><token type="NameBuiltinPseudo"/></rule>
+      <rule pattern="\$(ADMINTOOLS|APPDATA|CDBURN_AREA|COOKIES|COMMONFILES(?:32|64)|DESKTOP|DOCUMENTS|EXE(?:DIR|FILE|PATH)|FAVORITES|FONTS|HISTORY|HWNDPARENT|INTERNET_CACHE|LOCALAPPDATA|MUSIC|NETHOOD|PICTURES|PLUGINSDIR|PRINTHOOD|PROFILE|PROGRAMFILES(?:32|64)|QUICKLAUNCH|RECENT|RESOURCES(?:_LOCALIZED)?|SENDTO|SM(?:PROGRAMS|STARTUP)|STARTMENU|SYSDIR|TEMP(?:LATES)?|VIDEOS|WINDIR|\{NSISDIR\})"><token type="NameBuiltin"/></rule>
+      <rule pattern="\$(CMDLINE|INSTDIR|OUTDIR|LANGUAGE)"><token type="NameVariableGlobal"/></rule>
+      <rule pattern="\$[a-z_]\w*"><token type="NameVariable"/></rule>
+    </state>
+    <state name="str_double">
+      <rule pattern="&quot;"><token type="LiteralStringDouble"/><pop depth="1"/></rule>
+      <rule pattern="\$(\\[nrt&quot;]|\$)"><token type="LiteralStringEscape"/></rule>
+      <rule><include state="interpol"/></rule>
+      <rule pattern="[^&quot;]+"><token type="LiteralStringDouble"/></rule>
+    </state>
+    <state name="str_backtick">
+      <rule pattern="`"><token type="LiteralStringDouble"/><pop depth="1"/></rule>
+      <rule pattern="\$(\\[nrt&quot;]|\$)"><token type="LiteralStringEscape"/></rule>
+      <rule><include state="interpol"/></rule>
+      <rule pattern="[^`]+"><token type="LiteralStringDouble"/></rule>
+    </state>
+  </rules>
+</lexer>
--- a/lexers/ocaml.xml
+++ b/lexers/ocaml.xml
@@ -41,6 +41,14 @@
      <rule pattern="\b(as|assert|begin|class|constraint|do|done|downto|else|end|exception|external|false|for|fun|function|functor|if|in|include|inherit|initializer|lazy|let|match|method|module|mutable|new|object|of|open|private|raise|rec|sig|struct|then|to|true|try|type|value|val|virtual|when|while|with)\b">
        <token type="Keyword"/>
      </rule>
+      <rule pattern="({([a-z_]*)\|)([\s\S]+?)(?=\|\2})(\|\2})">
+        <bygroups>
+          <token type="LiteralStringAffix"/>
+          <token type="Ignore"/>
+          <token type="LiteralString"/>
+          <token type="LiteralStringAffix"/>
+        </bygroups>
+      </rule>
      <rule pattern="(~|\}|\|]|\||\{&lt;|\{|`|_|]|\[\||\[&gt;|\[&lt;|\[|\?\?|\?|&gt;\}|&gt;]|&gt;|=|&lt;-|&lt;|;;|;|:&gt;|:=|::|:|\.\.|\.|-&gt;|-\.|-|,|\+|\*|\)|\(|&amp;&amp;|&amp;|#|!=)">
        <token type="Operator"/>
      </rule>
--- a/lexers/odin.xml
+++ b/lexers/odin.xml
@@ -51,6 +51,20 @@
        <rule pattern = "\#[a-zA-Z_]+\b">
            <token type = "NameDecorator"/>
        </rule>
+        <rule pattern = "^\#\+\w+\s*$">
+            <token type = "NameAttribute"/>
+        </rule>
+        <rule pattern = "^(\#\+\w+)(\s+)(\!)?([A-Za-z0-9-_!]+)(?:(,)(\!)?([A-Za-z0-9-_!]+))*\s*$">
+            <bygroups>
+                <token type = "NameAttribute"/>
+                <token type = "TextWhitespace"/>
+                <token type = "Operator"/>
+                <token type = "Name"/>
+                <token type = "Punctuation"/>
+                <token type = "Operator"/>
+                <token type = "Name"/>
+            </bygroups>
+        </rule>
        <rule pattern = "\@(\([a-zA-Z_]+\b\s*.*\)|\(?[a-zA-Z_]+\)?)">
            <token type = "NameAttribute"/>
        </rule>
--- a/lexers/rpm_spec.xml
+++ b/lexers/rpm_spec.xml
@@ -55,4 +55,3 @@
    </state>
  </rules>
 </lexer>
-
--- a/lexers/rst.xml
+++ b/lexers/rst.xml
@@ -0,0 +1,75 @@
+
+<lexer>
+  <config>
+    <name>reStructuredText</name>
+    <alias>restructuredtext</alias>
+    <alias>rst</alias>
+    <alias>rest</alias>
+    <filename>*.rst</filename>
+    <filename>*.rest</filename>
+    <mime_type>text/x-rst</mime_type>
+    <mime_type>text/prs.fallenstein.rst</mime_type>
+  </config>
+  <rules>
+    <state name="root">
+      <rule pattern="^(=+|-+|`+|:+|\.+|\&#x27;+|&quot;+|~+|\^+|_+|\*+|\++|#+)([ \t]*\n)(.+)(\n)(\1)(\n)"><bygroups><token type="GenericHeading"/><token type="Text"/><token type="GenericHeading"/><token type="Text"/><token type="GenericHeading"/><token type="Text"/></bygroups></rule>
+      <rule pattern="^(\S.*)(\n)(={3,}|-{3,}|`{3,}|:{3,}|\.{3,}|\&#x27;{3,}|&quot;{3,}|~{3,}|\^{3,}|_{3,}|\*{3,}|\+{3,}|#{3,})(\n)"><bygroups><token type="GenericHeading"/><token type="Text"/><token type="GenericHeading"/><token type="Text"/></bygroups></rule>
+      <rule pattern="^(\s*)([-*+])( .+\n(?:\1  .+\n)*)"><bygroups><token type="Text"/><token type="LiteralNumber"/><usingself state="inline"/></bygroups></rule>
+      <rule pattern="^(\s*)([0-9#ivxlcmIVXLCM]+\.)( .+\n(?:\1  .+\n)*)"><bygroups><token type="Text"/><token type="LiteralNumber"/><usingself state="inline"/></bygroups></rule>
+      <rule pattern="^(\s*)(\(?[0-9#ivxlcmIVXLCM]+\))( .+\n(?:\1  .+\n)*)"><bygroups><token type="Text"/><token type="LiteralNumber"/><usingself state="inline"/></bygroups></rule>
+      <rule pattern="^(\s*)([A-Z]+\.)( .+\n(?:\1  .+\n)+)"><bygroups><token type="Text"/><token type="LiteralNumber"/><usingself state="inline"/></bygroups></rule>
+      <rule pattern="^(\s*)(\(?[A-Za-z]+\))( .+\n(?:\1  .+\n)+)"><bygroups><token type="Text"/><token type="LiteralNumber"/><usingself state="inline"/></bygroups></rule>
+      <rule pattern="^(\s*)(\|)( .+\n(?:\|  .+\n)*)"><bygroups><token type="Text"/><token type="Operator"/><usingself state="inline"/></bygroups></rule>
+      <rule pattern="^( *\.\.)(\s*)((?:source)?code(?:-block)?)(::)([ \t]*)([^\n]+)(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\8.*)?\n)+)">
+        <bygroups>
+          <token type="Punctuation"/>
+          <token type="Text"/>
+          <token type="OperatorWord"/>
+          <token type="Punctuation"/>
+          <token type="Text"/>
+          <token type="Keyword"/>
+          <token type="Text"/>
+          <token type="Text"/>
+          <UsingByGroup lexer="6" content="9,10,11"/>
+        </bygroups>
+      </rule>
+      <rule pattern="^( *\.\.)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))">
+        <bygroups>
+          <token type="Punctuation"/>
+          <token type="Text"/>
+          <token type="OperatorWord"/>
+          <token type="Punctuation"/>
+          <token type="Text"/>
+          <usingself state="inline"/>
+        </bygroups>
+      </rule>
+      <rule pattern="^( *\.\.)(\s*)(_(?:[^:\\]|\\.)+:)(.*?)$"><bygroups><token type="Punctuation"/><token type="Text"/><token type="NameTag"/><usingself state="inline"/></bygroups></rule>
+      <rule pattern="^( *\.\.)(\s*)(\[.+\])(.*?)$"><bygroups><token type="Punctuation"/><token type="Text"/><token type="NameTag"/><usingself state="inline"/></bygroups></rule>
+      <rule pattern="^( *\.\.)(\s*)(\|.+\|)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))"><bygroups><token type="Punctuation"/><token type="Text"/><token type="NameTag"/><token type="Text"/><token type="OperatorWord"/><token type="Punctuation"/><token type="Text"/><usingself state="inline"/></bygroups></rule>
+      <rule pattern="^ *\.\..*(\n( +.*\n|\n)+)?"><token type="Comment"/></rule>
+      <rule pattern="^( *)(:(?:\\\\|\\:|[^:\n])+:(?=\s))([ \t]*)"><bygroups><token type="Text"/><token type="NameClass"/><token type="Text"/></bygroups></rule>
+      <rule pattern="^(\S.*(?&lt;!::)\n)((?:(?: +.*)\n)+)"><bygroups><usingself state="inline"/><usingself state="inline"/></bygroups></rule>
+      <rule pattern="(::)(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\3.*)?\n)+)"><bygroups><token type="LiteralStringEscape"/><token type="Text"/><token type="LiteralString"/><token type="LiteralString"/><token type="Text"/><token type="LiteralString"/></bygroups></rule>
+      <rule><include state="inline"/></rule>
+    </state>
+    <state name="inline">
+      <rule pattern="\\."><token type="Text"/></rule>
+      <rule pattern="``"><token type="LiteralString"/><push state="literal"/></rule>
+      <rule pattern="(`.+?)(&lt;.+?&gt;)(`__?)"><bygroups><token type="LiteralString"/><token type="LiteralStringInterpol"/><token type="LiteralString"/></bygroups></rule>
+      <rule pattern="`.+?`__?"><token type="LiteralString"/></rule>
+      <rule pattern="(`.+?`)(:[a-zA-Z0-9:-]+?:)?"><bygroups><token type="NameVariable"/><token type="NameAttribute"/></bygroups></rule>
+      <rule pattern="(:[a-zA-Z0-9:-]+?:)(`.+?`)"><bygroups><token type="NameAttribute"/><token type="NameVariable"/></bygroups></rule>
+      <rule pattern="\*\*.+?\*\*"><token type="GenericStrong"/></rule>
+      <rule pattern="\*.+?\*"><token type="GenericEmph"/></rule>
+      <rule pattern="\[.*?\]_"><token type="LiteralString"/></rule>
+      <rule pattern="&lt;.+?&gt;"><token type="NameTag"/></rule>
+      <rule pattern="[^\\\n\[*`:]+"><token type="Text"/></rule>
+      <rule pattern="."><token type="Text"/></rule>
+    </state>
+    <state name="literal">
+      <rule pattern="[^`]+"><token type="LiteralString"/></rule>
+      <rule pattern="``((?=$)|(?=[-/:.,; \n\x00‐‑‒–— &#x27;&quot;\)\]\}&gt;’”»!\?]))"><token type="LiteralString"/><pop depth="1"/></rule>
+      <rule pattern="`"><token type="LiteralString"/></rule>
+    </state>
+  </rules>
+</lexer>
--- a/lexers/smali.xml
+++ b/lexers/smali.xml
@@ -70,4 +70,3 @@
    </state>
  </rules>
 </lexer>
-
--- a/lexers/snbt.xml
+++ b/lexers/snbt.xml
@@ -0,0 +1,57 @@
+
+<lexer>
+  <config>
+    <name>SNBT</name>
+    <alias>snbt</alias>
+    <filename>*.snbt</filename>
+    <mime_type>text/snbt</mime_type>
+  </config>
+  <rules>
+    <state name="root">
+      <rule pattern="\{"><token type="Punctuation"/><push state="compound"/></rule>
+      <rule pattern="[^\{]+"><token type="Text"/></rule>
+    </state>
+    <state name="whitespace">
+      <rule pattern="\s+"><token type="TextWhitespace"/></rule>
+    </state>
+    <state name="operators">
+      <rule pattern="[,:;]"><token type="Punctuation"/></rule>
+    </state>
+    <state name="literals">
+      <rule pattern="(true|false)"><token type="KeywordConstant"/></rule>
+      <rule pattern="-?\d+[eE]-?\d+"><token type="LiteralNumberFloat"/></rule>
+      <rule pattern="-?\d*\.\d+[fFdD]?"><token type="LiteralNumberFloat"/></rule>
+      <rule pattern="-?\d+[bBsSlLfFdD]?"><token type="LiteralNumberInteger"/></rule>
+      <rule pattern="&quot;"><token type="LiteralStringDouble"/><push state="literals.string_double"/></rule>
+      <rule pattern="&#x27;"><token type="LiteralStringSingle"/><push state="literals.string_single"/></rule>
+    </state>
+    <state name="literals.string_double">
+      <rule pattern="\\."><token type="LiteralStringEscape"/></rule>
+      <rule pattern="[^\\&quot;\n]+"><token type="LiteralStringDouble"/></rule>
+      <rule pattern="&quot;"><token type="LiteralStringDouble"/><pop depth="1"/></rule>
+    </state>
+    <state name="literals.string_single">
+      <rule pattern="\\."><token type="LiteralStringEscape"/></rule>
+      <rule pattern="[^\\&#x27;\n]+"><token type="LiteralStringSingle"/></rule>
+      <rule pattern="&#x27;"><token type="LiteralStringSingle"/><pop depth="1"/></rule>
+    </state>
+    <state name="compound">
+      <rule pattern="[A-Z_a-z]+"><token type="NameAttribute"/></rule>
+      <rule><include state="operators"/></rule>
+      <rule><include state="whitespace"/></rule>
+      <rule><include state="literals"/></rule>
+      <rule pattern="\{"><token type="Punctuation"/><push/></rule>
+      <rule pattern="\["><token type="Punctuation"/><push state="list"/></rule>
+      <rule pattern="\}"><token type="Punctuation"/><pop depth="1"/></rule>
+    </state>
+    <state name="list">
+      <rule pattern="[A-Z_a-z]+"><token type="NameAttribute"/></rule>
+      <rule><include state="literals"/></rule>
+      <rule><include state="operators"/></rule>
+      <rule><include state="whitespace"/></rule>
+      <rule pattern="\["><token type="Punctuation"/><push/></rule>
+      <rule pattern="\{"><token type="Punctuation"/><push state="compound"/></rule>
+      <rule pattern="\]"><token type="Punctuation"/><pop depth="1"/></rule>
+    </state>
+  </rules>
+</lexer>
--- a/lexers/solidity.xml
+++ b/lexers/solidity.xml
@@ -157,8 +157,20 @@
      <rule pattern="(continue|returns|storage|memory|delete|return|throw|break|catch|while|else|from|new|try|for|if|is|as|do|in|_)\b">
        <token type="Keyword"/>
      </rule>
-      <rule pattern="assembly\b">
+      <rule pattern="(assembly)(\s+\()(.+)(\)\s+{)">
+        <bygroups>
          <token type="Keyword"/>
+          <token type="Text"/>
+          <token type="LiteralString"/>
+          <token type="Text"/>
+        </bygroups>
+        <push state="assembly"/>
+      </rule>
+      <rule pattern="(assembly)(\s+{)">
+        <bygroups>
+          <token type="Keyword"/>
+          <token type="Text"/>
+        </bygroups>
        <push state="assembly"/>
      </rule>
      <rule pattern="(contract|interface|enum|event|struct)(\s+)([a-zA-Z_]\w*)">
@@ -235,7 +247,7 @@
        <token type="Punctuation"/>
        <pop depth="1"/>
      </rule>
-      <rule pattern="[(),]">
+      <rule pattern="[(),.]">
        <token type="Punctuation"/>
      </rule>
      <rule pattern=":=|=:">
--- a/lexers/tal.xml
+++ b/lexers/tal.xml
@@ -40,4 +40,3 @@
    </state>
  </rules>
 </lexer>
-
--- a/lexers/typescript.xml
+++ b/lexers/typescript.xml
@@ -51,6 +51,22 @@
      </rule>
    </state>
    <state name="tag">
+      <rule>
+        <include state="jsx"/>
+      </rule>
+      <rule pattern=",">
+        <token type="Punctuation"/>
+      </rule>
+      <rule pattern="&#34;(\\\\|\\&#34;|[^&#34;])*&#34;">
+        <token type="LiteralStringDouble"/>
+      </rule>
+      <rule pattern="&#39;(\\\\|\\&#39;|[^&#39;])*&#39;">
+        <token type="LiteralStringSingle"/>
+      </rule>
+      <rule pattern="`">
+        <token type="LiteralStringBacktick"/>
+        <push state="interp"/>
+      </rule>
      <rule>
        <include state="commentsandwhitespace"/>
      </rule>
@@ -171,7 +187,7 @@
      </rule>
      <rule pattern="(?=/)">
        <token type="Text"/>
-        <push state="#pop" state="badregex"/>
+        <push state="badregex"/>
      </rule>
      <rule>
        <pop depth="1"/>
--- a/lexers/typst.xml
+++ b/lexers/typst.xml
@@ -0,0 +1,107 @@
+
+<lexer>
+  <config>
+    <name>Typst</name>
+    <alias>typst</alias>
+    <filename>*.typ</filename>
+    <mime_type>text/x-typst</mime_type>
+  </config>
+  <rules>
+    <state name="root">
+      <rule><include state="markup"/></rule>
+    </state>
+    <state name="into_code">
+      <rule pattern="(\#let|\#set|\#show)\b"><token type="KeywordDeclaration"/><push state="inline_code"/></rule>
+      <rule pattern="(\#import|\#include)\b"><token type="KeywordNamespace"/><push state="inline_code"/></rule>
+      <rule pattern="(\#if|\#for|\#while|\#export)\b"><token type="KeywordReserved"/><push state="inline_code"/></rule>
+      <rule pattern="#\{"><token type="Punctuation"/><push state="code"/></rule>
+      <rule pattern="#\("><token type="Punctuation"/><push state="code"/></rule>
+      <rule pattern="(#[a-zA-Z_][a-zA-Z0-9_-]*)(\[)"><bygroups><token type="NameFunction"/><token type="Punctuation"/></bygroups><push state="markup"/></rule>
+      <rule pattern="(#[a-zA-Z_][a-zA-Z0-9_-]*)(\()"><bygroups><token type="NameFunction"/><token type="Punctuation"/></bygroups><push state="code"/></rule>
+      <rule pattern="(\#true|\#false|\#none|\#auto)\b"><token type="KeywordConstant"/></rule>
+      <rule pattern="#[a-zA-Z_][a-zA-Z0-9_]*"><token type="NameVariable"/></rule>
+      <rule pattern="#0x[0-9a-fA-F]+"><token type="LiteralNumberHex"/></rule>
+      <rule pattern="#0b[01]+"><token type="LiteralNumberBin"/></rule>
+      <rule pattern="#0o[0-7]+"><token type="LiteralNumberOct"/></rule>
+      <rule pattern="#[0-9]+[\.e][0-9]+"><token type="LiteralNumberFloat"/></rule>
+      <rule pattern="#[0-9]+"><token type="LiteralNumberInteger"/></rule>
+    </state>
+    <state name="markup">
+      <rule><include state="comment"/></rule>
+      <rule pattern="^\s*=+.*$"><token type="GenericHeading"/></rule>
+      <rule pattern="[*][^*]*[*]"><token type="GenericStrong"/></rule>
+      <rule pattern="_[^_]*_"><token type="GenericEmph"/></rule>
+      <rule pattern="\$"><token type="Punctuation"/><push state="math"/></rule>
+      <rule pattern="`[^`]*`"><token type="LiteralStringBacktick"/></rule>
+      <rule pattern="^(\s*)(-)(\s+)"><bygroups><token type="TextWhitespace"/><token type="Punctuation"/><token type="TextWhitespace"/></bygroups></rule>
+      <rule pattern="^(\s*)(\+)(\s+)"><bygroups><token type="TextWhitespace"/><token type="Punctuation"/><token type="TextWhitespace"/></bygroups></rule>
+      <rule pattern="^(\s*)([0-9]+\.)"><bygroups><token type="TextWhitespace"/><token type="Punctuation"/></bygroups></rule>
+      <rule pattern="^(\s*)(/)(\s+)([^:]+)(:)"><bygroups><token type="TextWhitespace"/><token type="Punctuation"/><token type="TextWhitespace"/><token type="NameVariable"/><token type="Punctuation"/></bygroups></rule>
+      <rule pattern="&lt;[a-zA-Z_][a-zA-Z0-9_-]*&gt;"><token type="NameLabel"/></rule>
+      <rule pattern="@[a-zA-Z_][a-zA-Z0-9_-]*"><token type="NameLabel"/></rule>
+      <rule pattern="\\#"><token type="Text"/></rule>
+      <rule><include state="into_code"/></rule>
+      <rule pattern="```(?:.|\n)*?```"><token type="LiteralStringBacktick"/></rule>
+      <rule pattern="https?://[0-9a-zA-Z~/%#&amp;=\&#x27;,;.+?]*"><token type="GenericEmph"/></rule>
+      <rule pattern="(\-\-\-|\\|\~|\-\-|\.\.\.)\B"><token type="Punctuation"/></rule>
+      <rule pattern="\\\["><token type="Punctuation"/></rule>
+      <rule pattern="\\\]"><token type="Punctuation"/></rule>
+      <rule pattern="\["><token type="Punctuation"/><push/></rule>
+      <rule pattern="\]"><token type="Punctuation"/><pop depth="1"/></rule>
+      <rule pattern="[ \t]+\n?|\n"><token type="TextWhitespace"/></rule>
+      <rule pattern="((?![*_$`&lt;@\\#\] ]|https?://).)+"><token type="Text"/></rule>
+    </state>
+    <state name="math">
+      <rule><include state="comment"/></rule>
+      <rule pattern="(\\_|\\\^|\\\&amp;)"><token type="Text"/></rule>
+      <rule pattern="(_|\^|\&amp;|;)"><token type="Punctuation"/></rule>
+      <rule pattern="(\+|/|=|\[\||\|\]|\|\||\*|:=|::=|\.\.\.|&#x27;|\-|=:|!=|&gt;&gt;|&gt;=|&gt;&gt;&gt;|&lt;&lt;|&lt;=|&lt;&lt;&lt;|\-&gt;|\|\-&gt;|=&gt;|\|=&gt;|==&gt;|\-\-&gt;|\~\~&gt;|\~&gt;|&gt;\-&gt;|\-&gt;&gt;|&lt;\-|&lt;==|&lt;\-\-|&lt;\~\~|&lt;\~|&lt;\-&lt;|&lt;&lt;\-|&lt;\-&gt;|&lt;=&gt;|&lt;==&gt;|&lt;\-\-&gt;|&gt;|&lt;|\~|:|\|)"><token type="Operator"/></rule>
+      <rule pattern="\\"><token type="Punctuation"/></rule>
+      <rule pattern="\\\$"><token type="Punctuation"/></rule>
+      <rule pattern="\$"><token type="Punctuation"/><pop depth="1"/></rule>
+      <rule><include state="into_code"/></rule>
+      <rule pattern="([a-zA-Z][a-zA-Z0-9-]*)(\s*)(\()"><bygroups><token type="NameFunction"/><token type="TextWhitespace"/><token type="Punctuation"/></bygroups></rule>
+      <rule pattern="([a-zA-Z][a-zA-Z0-9-]*)(:)"><bygroups><token type="NameVariable"/><token type="Punctuation"/></bygroups></rule>
+      <rule pattern="([a-zA-Z][a-zA-Z0-9-]*)"><token type="NameVariable"/></rule>
+      <rule pattern="[0-9]+(\.[0-9]+)?"><token type="LiteralNumber"/></rule>
+      <rule pattern="\.{1,3}|\(|\)|,|\{|\}"><token type="Punctuation"/></rule>
+      <rule pattern="&quot;[^&quot;]*&quot;"><token type="LiteralStringDouble"/></rule>
+      <rule pattern="[ \t\n]+"><token type="TextWhitespace"/></rule>
+    </state>
+    <state name="comment">
+      <rule pattern="//.*$"><token type="CommentSingle"/></rule>
+      <rule pattern="/[*](.|\n)*?[*]/"><token type="CommentMultiline"/></rule>
+    </state>
+    <state name="code">
+      <rule><include state="comment"/></rule>
+      <rule pattern="\["><token type="Punctuation"/><push state="markup"/></rule>
+      <rule pattern="\(|\{"><token type="Punctuation"/><push state="code"/></rule>
+      <rule pattern="\)|\}"><token type="Punctuation"/><pop depth="1"/></rule>
+      <rule pattern="&quot;[^&quot;]*&quot;"><token type="LiteralStringDouble"/></rule>
+      <rule pattern=",|\.{1,2}"><token type="Punctuation"/></rule>
+      <rule pattern="="><token type="Operator"/></rule>
+      <rule pattern="(and|or|not)\b"><token type="OperatorWord"/></rule>
+      <rule pattern="=&gt;|&lt;=|==|!=|&gt;|&lt;|-=|\+=|\*=|/=|\+|-|\\|\*"><token type="Operator"/></rule>
+      <rule pattern="([a-zA-Z_][a-zA-Z0-9_-]*)(:)"><bygroups><token type="NameVariable"/><token type="Punctuation"/></bygroups></rule>
+      <rule pattern="([a-zA-Z_][a-zA-Z0-9_-]*)(\()"><bygroups><token type="NameFunction"/><token type="Punctuation"/></bygroups><push state="code"/></rule>
+      <rule pattern="(as|break|export|continue|else|for|if|in|return|while)\b"><token type="KeywordReserved"/></rule>
+      <rule pattern="(import|include)\b"><token type="KeywordNamespace"/></rule>
+      <rule pattern="(auto|none|true|false)\b"><token type="KeywordConstant"/></rule>
+      <rule pattern="([0-9.]+)(mm|pt|cm|in|em|fr|%)"><bygroups><token type="LiteralNumber"/><token type="KeywordReserved"/></bygroups></rule>
+      <rule pattern="0x[0-9a-fA-F]+"><token type="LiteralNumberHex"/></rule>
+      <rule pattern="0b[01]+"><token type="LiteralNumberBin"/></rule>
+      <rule pattern="0o[0-7]+"><token type="LiteralNumberOct"/></rule>
+      <rule pattern="[0-9]+[\.e][0-9]+"><token type="LiteralNumberFloat"/></rule>
+      <rule pattern="[0-9]+"><token type="LiteralNumberInteger"/></rule>
+      <rule pattern="(let|set|show)\b"><token type="KeywordDeclaration"/></rule>
+      <rule pattern="([a-zA-Z_][a-zA-Z0-9_-]*)"><token type="NameVariable"/></rule>
+      <rule pattern="[ \t\n]+"><token type="TextWhitespace"/></rule>
+      <rule pattern=":"><token type="Punctuation"/></rule>
+    </state>
+    <state name="inline_code">
+      <rule pattern=";\b"><token type="Punctuation"/><pop depth="1"/></rule>
+      <rule pattern="\n"><token type="TextWhitespace"/><pop depth="1"/></rule>
+      <rule><include state="code"/></rule>
+    </state>
+  </rules>
+</lexer>
--- a/lexers/webvtt.xml
+++ b/lexers/webvtt.xml
@@ -0,0 +1,283 @@
+<lexer>
+  <config>
+    <name>WebVTT</name>
+    <alias>vtt</alias>
+    <filename>*.vtt</filename>
+    <mime_type>text/vtt</mime_type>
+  </config>
+  <!--
+    The WebVTT spec refers to a WebVTT line terminator as either CRLF, CR or LF.
+    (https://www.w3.org/TR/webvtt1/#webvtt-line-terminator) However, with this
+    definition it is unclear whether CRLF is one line terminator (CRLF) or two
+    line terminators (CR and LF).
+
+    To work around this ambiguity, only CRLF and LF are considered as line terminators.
+    To my knowledge only classic Mac OS uses CR as line terminators, so the lexer should
+    still work for most files.
+  -->
+  <rules>
+    <!-- https://www.w3.org/TR/webvtt1/#webvtt-file-body -->
+    <state name="root">
+      <rule pattern="(\AWEBVTT)((?:[ \t][^\r\n]*)?(?:\r?\n){2,})">
+        <bygroups>
+          <token type="Keyword" />
+          <token type="Text" />
+        </bygroups>
+      </rule>
+      <rule pattern="(^REGION)([ \t]*$)">
+        <bygroups>
+          <token type="Keyword" />
+          <token type="Text" />
+        </bygroups>
+        <push state="region-settings-list" />
+      </rule>
+      <rule
+        pattern="(^STYLE)([ \t]*$)((?:(?!&#45;&#45;&gt;)[\s\S])*?)((?:\r?\n){2})">
+        <bygroups>
+          <token type="Keyword" />
+          <token type="Text" />
+          <using lexer="CSS" />
+          <token type="Text" />
+        </bygroups>
+      </rule>
+      <rule>
+        <include state="comment" />
+      </rule>
+      <rule
+        pattern="(?=((?![^\r\n]*&#45;&#45;&gt;)[^\r\n]*\r?\n)?(\d{2}:)?(?:[0-5][0-9]):(?:[0-5][0-9])\.\d{3}[ \t]+&#45;&#45;&gt;[ \t]+(\d{2}:)?(?:[0-5][0-9]):(?:[0-5][0-9])\.\d{3})"
+      >
+        <push state="cues" />
+      </rule>
+    </state>
+
+    <!-- https://www.w3.org/TR/webvtt1/#webvtt-region-settings-list -->
+    <state name="region-settings-list">
+      <rule pattern="(?: |\t|\r?\n(?!\r?\n))+">
+        <token type="Text" />
+      </rule>
+      <rule pattern="(?:\r?\n){2}">
+        <token type="Text" />
+        <pop depth="1" />
+      </rule>
+      <rule pattern="(id)(:)(?!&#45;&#45;&gt;)(\S+)">
+        <bygroups>
+          <token type="Keyword" />
+          <token type="Punctuation" />
+          <token type="Literal" />
+        </bygroups>
+      </rule>
+      <rule pattern="(width)(:)((?:[1-9]?\d|100)(?:\.\d+)?)(%)">
+        <bygroups>
+          <token type="Keyword" />
+          <token type="Punctuation" />
+          <token type="Literal" />
+          <token type="KeywordType" />
+        </bygroups>
+      </rule>
+      <rule pattern="(lines)(:)(\d+)">
+        <bygroups>
+          <token type="Keyword" />
+          <token type="Punctuation" />
+          <token type="Literal" />
+        </bygroups>
+      </rule>
+      <rule
+        pattern="(regionanchor|viewportanchor)(:)((?:[1-9]?\d|100)(?:\.\d+)?)(%)(,)((?:[1-9]?\d|100)(?:\.\d+)?)(%)">
+        <bygroups>
+          <token type="Keyword" />
+          <token type="Punctuation" />
+          <token type="Literal" />
+          <token type="KeywordType" />
+          <token type="Punctuation" />
+          <token type="Literal" />
+          <token type="KeywordType" />
+        </bygroups>
+      </rule>
+      <rule pattern="(scroll)(:)(up)">
+        <bygroups>
+          <token type="Keyword" />
+          <token type="Punctuation" />
+          <token type="KeywordConstant" />
+        </bygroups>
+      </rule>
+    </state>
+
+    <!-- https://www.w3.org/TR/webvtt1/#webvtt-comment-block -->
+    <state name="comment">
+      <rule
+        pattern="^NOTE( |\t|\r?\n)((?!&#45;&#45;&gt;)[\s\S])*?(?:(\r?\n){2}|\Z)">
+        <token type="Comment" />
+      </rule>
+    </state>
+
+    <!--
+      "Zero or more WebVTT cue blocks and WebVTT comment blocks separated from each other by one or more
+      WebVTT line terminators." (https://www.w3.org/TR/webvtt1/#file-structure)
+    -->
+    <state name="cues">
+      <rule
+        pattern="(?:((?!&#45;&#45;&gt;)[^\r\n]+)?(\r?\n))?((?:\d{2}:)?(?:[0-5][0-9]):(?:[0-5][0-9])\.\d{3})([ \t]+)(&#45;&#45;&gt;)([ \t]+)((?:\d{2}:)?(?:[0-5][0-9]):(?:[0-5][0-9])\.\d{3})([ \t]*)">
+        <bygroups>
+          <token type="Name" />
+          <token type="Text" />
+          <token type="LiteralDate" />
+          <token type="Text" />
+          <token type="Operator" />
+          <token type="Text" />
+          <token type="LiteralDate" />
+          <token type="Text" />
+        </bygroups>
+        <push state="cue-settings-list" />
+      </rule>
+      <rule>
+        <include state="comment" />
+      </rule>
+    </state>
+
+    <!-- https://www.w3.org/TR/webvtt1/#webvtt-cue-settings-list -->
+    <state name="cue-settings-list">
+      <rule pattern="[ \t]+">
+        <token type="Text" />
+      </rule>
+      <rule pattern="(vertical)(:)?(rl|lr)?">
+        <bygroups>
+          <token type="Keyword" />
+          <token type="Punctuation" />
+          <token type="KeywordConstant" />
+        </bygroups>
+      </rule>
+      <rule
+        pattern="(line)(:)?(?:(?:((?:[1-9]?\d|100)(?:\.\d+)?)(%)|(-?\d+))(?:(,)(start|center|end))?)?">
+        <bygroups>
+          <token type="Keyword" />
+          <token type="Punctuation" />
+          <token type="Literal" />
+          <token type="KeywordType" />
+          <token type="Literal" />
+          <token type="Punctuation" />
+          <token type="KeywordConstant" />
+        </bygroups>
+      </rule>
+      <rule
+        pattern="(position)(:)?(?:(?:((?:[1-9]?\d|100)(?:\.\d+)?)(%)|(-?\d+))(?:(,)(line-left|center|line-right))?)?">
+        <bygroups>
+          <token type="Keyword" />
+          <token type="Punctuation" />
+          <token type="Literal" />
+          <token type="KeywordType" />
+          <token type="Literal" />
+          <token type="Punctuation" />
+          <token type="KeywordConstant" />
+        </bygroups>
+      </rule>
+      <rule pattern="(size)(:)?(?:((?:[1-9]?\d|100)(?:\.\d+)?)(%))?">
+        <bygroups>
+          <token type="Keyword" />
+          <token type="Punctuation" />
+          <token type="Literal" />
+          <token type="KeywordType" />
+        </bygroups>
+      </rule>
+      <rule pattern="(align)(:)?(start|center|end|left|right)?">
+        <bygroups>
+          <token type="Keyword" />
+          <token type="Punctuation" />
+          <token type="KeywordConstant" />
+        </bygroups>
+      </rule>
+      <rule pattern="(region)(:)?((?![^\r\n]*&#45;&#45;&gt;(?=[ \t]+?))[^ \t\r\n]+)?">
+        <bygroups>
+          <token type="Keyword" />
+          <token type="Punctuation" />
+          <token type="Literal" />
+        </bygroups>
+      </rule>
+      <rule
+        pattern="(?=\r?\n)">
+        <push state="cue-payload" />
+      </rule>
+    </state>
+
+    <!-- https://www.w3.org/TR/webvtt1/#cue-payload -->
+    <state name="cue-payload">
+      <rule pattern="(\r?\n){2,}">
+        <token type="Text" />
+        <pop depth="2" />
+      </rule>
+      <rule pattern="[^&lt;&amp;]+?">
+        <token type="Text" />
+      </rule>
+      <rule pattern="&amp;(#\d+|#x[0-9A-Fa-f]+|[a-zA-Z0-9]+);">
+        <token type="Text" />
+      </rule>
+      <rule pattern="(?=&lt;)">
+        <token type="Text" />
+        <push state="cue-span-tag" />
+      </rule>
+    </state>
+    <state name="cue-span-tag">
+      <rule
+        pattern="&lt;(?=c|i|b|u|ruby|rt|v|lang|(?:\d{2}:)?(?:[0-5][0-9]):(?:[0-5][0-9])\.\d{3})">
+        <token type="Punctuation" />
+        <push state="cue-span-start-tag-name" />
+      </rule>
+      <rule pattern="(&lt;/)(c|i|b|u|ruby|rt|v|lang)">
+        <bygroups>
+          <token type="Punctuation" />
+          <token type="NameTag" />
+        </bygroups>
+      </rule>
+      <rule pattern="&gt;">
+        <token type="Punctuation" />
+        <pop depth="1" />
+      </rule>
+    </state>
+    <state name="cue-span-start-tag-name">
+      <rule pattern="(c|i|b|u|ruby|rt)|((?:\d{2}:)?(?:[0-5][0-9]):(?:[0-5][0-9])\.\d{3})">
+        <bygroups>
+          <token type="NameTag" />
+          <token type="LiteralDate" />
+        </bygroups>
+        <push state="cue-span-classes-without-annotations" />
+      </rule>
+      <rule pattern="v|lang">
+        <token type="NameTag" />
+        <push state="cue-span-classes-with-annotations" />
+      </rule>
+    </state>
+    <state name="cue-span-classes-without-annotations">
+      <rule>
+        <include state="cue-span-classes" />
+      </rule>
+      <rule pattern="(?=&gt;)">
+        <pop depth="2" />
+      </rule>
+    </state>
+    <state name="cue-span-classes-with-annotations">
+      <rule>
+        <include state="cue-span-classes" />
+      </rule>
+      <rule pattern="(?=[ \t])">
+        <push state="cue-span-start-tag-annotations" />
+      </rule>
+    </state>
+    <state name="cue-span-classes">
+      <rule pattern="(\.)([^ \t\n\r&amp;&lt;&gt;\.]+)">
+        <bygroups>
+          <token type="Punctuation" />
+          <token type="NameTag" />
+        </bygroups>
+      </rule>
+    </state>
+    <state name="cue-span-start-tag-annotations">
+      <rule
+        pattern="[ \t](?:[^\n\r&amp;&gt;]|&amp;(?:#\d+|#x[0-9A-Fa-f]+|[a-zA-Z0-9]+);)+">
+        <token type="Text" />
+      </rule>
+      <rule pattern="(?=&gt;)">
+        <token type="Text" />
+        <pop depth="3" />
+      </rule>
+    </state>
+  </rules>
+</lexer>
--- a/lexers/yaml.xml
+++ b/lexers/yaml.xml
@@ -53,7 +53,7 @@
        <bygroups>
          <token type="Punctuation"/>
          <token type="LiteralStringDoc"/>
-          <token type="TextWhitespace"/>
+          <token type="Ignore"/>
        </bygroups>
      </rule>
      <rule pattern="(false|False|FALSE|true|True|TRUE|null|Off|off|yes|Yes|YES|OFF|On|ON|no|No|on|NO|n|N|Y|y)\b">
--- a/scripts/lexer_metadata.py
+++ b/scripts/lexer_metadata.py
@@ -0,0 +1,63 @@
+# This script parses the metadata of all the lexers and generates
+# a datafile with all the information so we don't have to instantiate
+# all the lexers to get the information.
+
+import glob
+from collections import defaultdict
+
+lexer_by_name = {}
+lexer_by_mimetype = defaultdict(set)
+lexer_by_filename = defaultdict(set)
+
+
+for fname in glob.glob("lexers/*.xml"):
+    aliases = set([])
+    mimetypes = set([])
+    filenames = set([])
+    print(fname)
+    with open(fname) as f:
+        lexer_name = fname.split("/")[-1].split(".")[0]
+        for line in f:
+            if "</config" in line:
+                break
+            if "<filename>" in line:
+                filenames.add(line.split(">")[1].split("<")[0].lower())
+            if "<mime_type>" in line:
+                mimetypes.add(line.split(">")[1].split("<")[0].lower())
+            if "<alias>" in line:
+                aliases.add(line.split(">")[1].split("<")[0].lower())
+            if "<name>" in line:
+                aliases.add(line.split(">")[1].split("<")[0].lower())
+    for alias in aliases:
+        if alias in lexer_by_name and alias != lexer_by_name[alias]:
+            raise Exception(f"Alias {alias} already in use by {lexer_by_name[alias]}")
+        lexer_by_name[alias] = lexer_name
+    for mimetype in mimetypes:
+        lexer_by_mimetype[mimetype] = lexer_name
+    for filename in filenames:
+        lexer_by_filename[filename].add(lexer_name)
+
+with open("src/constants/lexers.cr", "w") as f:
+    # Crystal doesn't come from a xml file
+    lexer_by_name["crystal"] = "crystal"
+    lexer_by_name["cr"] = "crystal"
+    lexer_by_filename["*.cr"] = ["crystal"]
+    lexer_by_mimetype["text/x-crystal"] = "crystal"
+
+    f.write("module Tartrazine\n")
+    f.write("  LEXERS_BY_NAME = {\n")
+    for k in sorted(lexer_by_name.keys()):
+        v = lexer_by_name[k]
+        f.write(f'"{k}" => "{v}", \n')
+    f.write("}\n")
+    f.write("  LEXERS_BY_MIMETYPE = {\n")
+    for k in sorted(lexer_by_mimetype.keys()):
+        v = lexer_by_mimetype[k]
+        f.write(f'"{k}" => "{v}", \n')
+    f.write("}\n")
+    f.write("  LEXERS_BY_FILENAME = {\n")
+    for k in sorted(lexer_by_filename.keys()):
+        v = lexer_by_filename[k]
+        f.write(f'"{k}" => {str(sorted(list(v))).replace("'", "\"")}, \n')
+    f.write("}\n")
+    f.write("end\n")
--- a/scripts/token_abbrevs.py
+++ b/scripts/token_abbrevs.py
@@ -1,15 +1,30 @@
+# Script to generate abbreviations for tokens. Parses all lexers
+# and styles files to find all token names and generate a unique
+# abbreviation for each one. The abbreviations are generated by
+# taking the uppercase letters of the token name and converting
+# them to lowercase. If the abbreviation is not unique, the script
+# will print a warning and exit.
+
 import sys
 import string
+import glob

-# Run it as grep token lexers/* | python scripts/token_abbrevs.py
-
+tokens = {"Highlight"}
+abbrevs = {"Highlight": "hl"}

 def abbr(line):
    return "".join(c for c in line if c in string.ascii_uppercase).lower()

-abbrevs = {}
-tokens = set([])
-for line in sys.stdin:
+def check_abbrevs():
+    if len(abbrevs) != len(tokens):
+        print("Warning: Abbreviations are not unique")
+        print(len(abbrevs), len(tokens))
+        sys.exit(1)
+
+# Processes all files in lexers looking for token names
+for fname in glob.glob("lexers/*.xml"):
+    with open(fname) as f:
+        for line in f:
            if "<token" not in line:
                continue
            line = line.strip()
@@ -17,8 +32,24 @@ for line in sys.stdin:
            line = line.split('"')[1]
            abbrevs[line] = abbr(line)
            tokens.add(line)
+            check_abbrevs()

-print("Abbreviations: {")
-for k, v in abbrevs.items():
-    print(f'    "{k}" => "{v}",')
-print("}")
+# Processes all files in styles looking for token names too
+for fname in glob.glob("styles/*.xml"):
+    with open(fname) as f:
+        for line in f:
+            if "<entry" not in line:
+                continue
+            line = line.strip()
+            line = line.split('type=',1)[-1]
+            line = line.split('"')[1]
+            abbrevs[line] = abbr(line)
+            tokens.add(line)
+            check_abbrevs()
+
+with open ("src/constants/token_abbrevs.cr", "w") as outf:
+    outf.write("module Tartrazine\n")
+    outf.write("  Abbreviations = {\n")
+    for k in sorted(abbrevs.keys()):
+        outf.write(f'    "{k}" => "{abbrevs[k]}",\n')
+    outf.write("  }\nend\n")
--- a/shard.yml
+++ b/shard.yml
@@ -1,5 +1,5 @@
 name: tartrazine
-version: 0.1.1
+version: 0.12.0

 authors:
  - Roberto Alsina <roberto.alsina@gmail.com>
@@ -10,11 +10,18 @@ targets:

 dependencies:
  baked_file_system:
-    github: schovi/baked_file_system
+    github: ralsina/baked_file_system
+    branch: master
  base58:
    github: crystal-china/base58.cr
  sixteen:
    github: ralsina/sixteen
+  docopt:
+    github: chenkovsky/docopt.cr
+  stumpy_utils:
+    github: stumpycr/stumpy_utils
+  stumpy_png:
+    github: stumpycr/stumpy_png

 crystal: ">= 1.13.0"

--- a/spec/css/manni.css
+++ b/spec/css/manni.css
@@ -0,0 +1 @@
+.e {color: #aa0000;background-color: #ffaaaa;}.b {background-color: #f0f3f3;tab-size: 8;}.k {color: #006699;font-weight: 600;}.kp {}.kt {color: #007788;}.na {color: #330099;}.nb {color: #336666;}.nc {color: #00aa88;font-weight: 600;}.nc {color: #336600;}.nd {color: #9999ff;}.ne {color: #999999;font-weight: 600;}.ne {color: #cc0000;font-weight: 600;}.nf {color: #cc00ff;}.nl {color: #9999ff;}.nn {color: #00ccff;font-weight: 600;}.nt {color: #330099;font-weight: 600;}.nv {color: #003333;}.ls {color: #cc3300;}.lsd {font-style: italic;}.lse {color: #cc3300;font-weight: 600;}.lsi {color: #aa0000;}.lso {color: #cc3300;}.lsr {color: #33aaaa;}.lss {color: #ffcc33;}.ln {color: #ff6600;}.o {color: #555555;}.ow {color: #000000;font-weight: 600;}.c {color: #0099ff;font-style: italic;}.cs {font-weight: 600;}.cp {color: #009999;font-style: normal;}.gd {background-color: #ffcccc;border: 1px solid #cc0000;}.ge {font-style: italic;}.ge {color: #ff0000;}.gh {color: #003300;font-weight: 600;}.gi {background-color: #ccffcc;border: 1px solid #00cc00;}.go {color: #aaaaaa;}.gp {color: #000099;font-weight: 600;}.gs {font-weight: 600;}.gs {color: #003300;font-weight: 600;}.gt {color: #99cc66;}.gu {text-decoration: underline;}.tw {color: #bbbbbb;}.lh {}
--- a/spec/css/vesper.css
+++ b/spec/css/vesper.css
@@ -0,0 +1 @@
+.b {color: #b7b7b7;background-color: #101010;font-weight: 600;tab-size: 8;}.lh {color: #8eaaaa;background-color: #232323;}.t {color: #b7b7b7;}.e {color: #de6e6e;}.c {color: #333333;}.cp {color: #876c4f;}.cpf {color: #5f8787;}.k {color: #d69094;}.kt {color: #de6e6e;}.na {color: #8eaaaa;}.nb {color: #de6e6e;}.nbp {color: #de6e6e;}.nc {color: #8eaaaa;}.nc {color: #dab083;}.nd {color: #dab083;}.nf {color: #8eaaaa;}.nn {color: #8eaaaa;}.nt {color: #d69094;}.nv {color: #8eaaaa;}.nvi {color: #de6e6e;}.ln {color: #dab083;}.o {color: #60a592;}.ow {color: #d69094;}.l {color: #5f8787;}.ls {color: #5f8787;}.lsi {color: #876c4f;}.lsr {color: #60a592;}.lss {color: #dab083;}
--- a/spec/examples/crystal/hello.cr
+++ b/spec/examples/crystal/hello.cr
@@ -0,0 +1 @@
+puts "Hello Crystal!"
--- a/spec/examples/crystal/hello.cr.json
+++ b/spec/examples/crystal/hello.cr.json
@@ -0,0 +1 @@
+[{"type":"Text","value":"puts "},{"type":"LiteralString","value":"\"Hello Crystal!\""},{"type":"Text","value":"\n"}]
--- a/spec/examples/jinja+python/funko.py.j2
+++ b/spec/examples/jinja+python/funko.py.j2
@@ -0,0 +1,11 @@
+from flask import Flask, request
+
+app = Flask("{{name}}")
+
+@app.route('/')
+def handle():
+    return "Hello World from Flask!"
+
+@app.route('/ping')
+def ping():
+    return "OK"
--- a/spec/examples/jinja+python/funko.py.j2.json
+++ b/spec/examples/jinja+python/funko.py.j2.json
@@ -0,0 +1 @@
+[{"type":"KeywordNamespace","value":"from"},{"type":"Text","value":" "},{"type":"NameNamespace","value":"flask"},{"type":"Text","value":" "},{"type":"KeywordNamespace","value":"import"},{"type":"Text","value":" "},{"type":"Name","value":"Flask"},{"type":"Punctuation","value":","},{"type":"Text","value":" "},{"type":"Name","value":"request"},{"type":"Text","value":"\n\n"},{"type":"Name","value":"app"},{"type":"Text","value":" "},{"type":"Operator","value":"="},{"type":"Text","value":" "},{"type":"Name","value":"Flask"},{"type":"Punctuation","value":"("},{"type":"LiteralStringDouble","value":"\""},{"type":"CommentPreproc","value":"{{"},{"type":"NameVariable","value":"name"},{"type":"CommentPreproc","value":"}}"},{"type":"LiteralStringDouble","value":"\")"},{"type":"Text","value":"\n\n"},{"type":"NameDecorator","value":"@app.route"},{"type":"Punctuation","value":"("},{"type":"LiteralStringSingle","value":"'/'"},{"type":"Punctuation","value":")"},{"type":"Text","value":"\n"},{"type":"Keyword","value":"def"},{"type":"Text","value":" "},{"type":"NameFunction","value":"handle"},{"type":"Punctuation","value":"():"},{"type":"Text","value":"\n    "},{"type":"Keyword","value":"return"},{"type":"Text","value":" "},{"type":"LiteralStringDouble","value":"\"Hello World from Flask!\""},{"type":"Text","value":"\n\n"},{"type":"NameDecorator","value":"@app.route"},{"type":"Punctuation","value":"("},{"type":"LiteralStringSingle","value":"'/ping'"},{"type":"Punctuation","value":")"},{"type":"Text","value":"\n"},{"type":"Keyword","value":"def"},{"type":"Text","value":" "},{"type":"NameFunction","value":"ping"},{"type":"Punctuation","value":"():"},{"type":"Text","value":"\n    "},{"type":"Keyword","value":"return"},{"type":"Text","value":" "},{"type":"LiteralStringDouble","value":"\"OK\""},{"type":"Text","value":"\n"}]
--- a/spec/tartrazine_spec.cr
+++ b/spec/tartrazine_spec.cr
@@ -1,8 +1,15 @@
 require "./spec_helper"
+require "digest/sha1"

 # These are the testcases from Pygments
 testcases = Dir.glob("#{__DIR__}/tests/**/*txt").sort

+# These are custom testcases
+examples = Dir.glob("#{__DIR__}/examples/**/*.*").reject(&.ends_with? ".json").sort!
+
+# CSS Stylesheets
+css_files = Dir.glob("#{__DIR__}/css/*.css")
+
 # These lexers don't load because of parsing issues
 failing_lexers = {
  "webgpu_shading_language",
@@ -14,35 +21,39 @@ unicode_problems = {
  "#{__DIR__}/tests/java/test_string_literals.txt",
  "#{__DIR__}/tests/json/test_strings.txt",
  "#{__DIR__}/tests/systemd/example1.txt",
+  "#{__DIR__}/tests/c++/test_unicode_identifiers.txt",
 }

 # These testcases fail because of differences in the way chroma and tartrazine tokenize
 # but tartrazine is correct
 bad_in_chroma = {
  "#{__DIR__}/tests/bash_session/test_comment_after_prompt.txt",
+  "#{__DIR__}/tests/html/javascript_backtracking.txt",
  "#{__DIR__}/tests/java/test_default.txt",
  "#{__DIR__}/tests/java/test_multiline_string.txt",
  "#{__DIR__}/tests/java/test_numeric_literals.txt",
+  "#{__DIR__}/tests/octave/test_multilinecomment.txt",
  "#{__DIR__}/tests/php/test_string_escaping_run.txt",
  "#{__DIR__}/tests/python_2/test_cls_builtin.txt",
+  "#{__DIR__}/tests/bqn/test_syntax_roles.txt", # This one only fails in CI
 }

 known_bad = {
  "#{__DIR__}/tests/bash_session/fake_ps2_prompt.txt",
  "#{__DIR__}/tests/bash_session/prompt_in_output.txt",
-  "#{__DIR__}/tests/bash_session/test_newline_in_echo_no_ps2.txt",
-  "#{__DIR__}/tests/bash_session/test_newline_in_ls_ps2.txt",
  "#{__DIR__}/tests/bash_session/ps2_prompt.txt",
-  "#{__DIR__}/tests/bash_session/test_newline_in_ls_no_ps2.txt",
-  "#{__DIR__}/tests/bash_session/test_virtualenv.txt",
+  "#{__DIR__}/tests/bash_session/test_newline_in_echo_no_ps2.txt",
  "#{__DIR__}/tests/bash_session/test_newline_in_echo_ps2.txt",
-  "#{__DIR__}/tests/c/test_string_resembling_decl_end.txt",
-  "#{__DIR__}/tests/html/css_backtracking.txt",
+  "#{__DIR__}/tests/bash_session/test_newline_in_ls_no_ps2.txt",
+  "#{__DIR__}/tests/bash_session/test_newline_in_ls_ps2.txt",
+  "#{__DIR__}/tests/bash_session/test_virtualenv.txt",
+  "#{__DIR__}/tests/mcfunction/commenting.txt",
+  "#{__DIR__}/tests/mcfunction/coordinates.txt",
  "#{__DIR__}/tests/mcfunction/data.txt",
+  "#{__DIR__}/tests/mcfunction/difficult_1.txt",
+  "#{__DIR__}/tests/mcfunction/multiline.txt",
  "#{__DIR__}/tests/mcfunction/selectors.txt",
-  "#{__DIR__}/tests/php/anonymous_class.txt",
-  "#{__DIR__}/tests/html/javascript_unclosed.txt",
-
+  "#{__DIR__}/tests/mcfunction/simple.txt",
 }

 # Tests that fail because of a limitation in PCRE2
@@ -52,6 +63,14 @@ not_my_fault = {

 describe Tartrazine do
  describe "Lexer" do
+    examples.each do |example|
+      it "parses #{example}".split("/")[-2...].join("/") do
+        lexer = Tartrazine.lexer(name: File.basename(File.dirname(example)).downcase)
+        text = File.read(example)
+        expected = Array(Tartrazine::Token).from_json(File.read("#{example}.json"))
+        Tartrazine::RegexLexer.collapse_tokens(lexer.tokenizer(text).to_a).should eq expected
+      end
+    end
    testcases.each do |testcase|
      if known_bad.includes?(testcase)
        pending "parses #{testcase}".split("/")[-2...].join("/") do
@@ -70,12 +89,67 @@ describe Tartrazine do
      end
    end
  end
+
+  describe "formatter" do
+    css_files.each do |css_file|
+      it "generates #{css_file}" do
+        css = File.read(css_file)
+        theme = Tartrazine.theme(File.basename(css_file, ".css"))
+        formatter = Tartrazine::Html.new(theme: theme)
+        formatter.style_defs.strip.should eq css.strip
+      end
+    end
+  end
+
+  describe "to_html" do
+    it "should do basic highlighting" do
+      html = Tartrazine.to_html("puts 'Hello, World!'", "ruby", standalone: false)
+      html.should eq(
+        "<pre class=\"b\" ><code class=\"b\"><span class=\"nb\">puts</span><span class=\"t\"> </span><span class=\"lss\">&#39;Hello, World!&#39;</span></code></pre>"
+      )
+    end
+  end
+
+  describe "to_ansi" do
+    it "should do basic highlighting" do
+      ansi = Tartrazine.to_ansi("puts 'Hello, World!'", "ruby")
+      if ENV.fetch("CI", nil)
+        # In Github Actions there is no terminal so these don't
+        # really work
+        ansi.should eq(
+          "puts 'Hello, World!'"
+        )
+      else
+        ansi.should eq(
+          "\e[38;2;171;70;66mputs\e[0m\e[38;2;216;216;216m \e[0m\e[38;2;161;181;108m'Hello, World!'\e[0m"
+        )
+      end
+    end
+  end
+
+  describe "to_svg" do
+    it "should do basic highlighting" do
+      svg = Tartrazine.to_svg("puts 'Hello, World!'", "ruby", standalone: false)
+      svg.should eq(
+        "<text x=\"0\" y=\"19\" xml:space=\"preserve\"><tspan fill=\"#ab4642\">puts</tspan><tspan fill=\"#d8d8d8\"> </tspan><tspan fill=\"#a1b56c\">&#39;Hello, World!&#39;</tspan></text>"
+      )
+    end
+  end
+
+  describe "to_png" do
+    it "should do basic highlighting" do
+      png = Digest::SHA1.hexdigest(Tartrazine.to_png("puts 'Hello, World!'", "ruby"))
+      png.should eq(
+        "62d419dcd263fffffc265a0f04c156dc2530c362"
+      )
+    end
+  end
 end

 # Helper that creates lexer and tokenizes
 def tokenize(lexer_name, text)
-  lexer = Tartrazine.lexer(lexer_name)
-  lexer.tokenize(text)
+  tokenizer = Tartrazine.lexer(lexer_name).tokenizer(text)
+  Tartrazine::RegexLexer.collapse_tokens(tokenizer.to_a)
 end

 # Helper that tokenizes using chroma to validate the lexer
@@ -87,5 +161,5 @@ def chroma_tokenize(lexer_name, text)
    ["-f", "json", "-l", lexer_name],
    input: input, output: output
  )
-  Tartrazine::Lexer.collapse_tokens(Array(Tartrazine::Token).from_json(output.to_s))
+  Tartrazine::RegexLexer.collapse_tokens(Array(Tartrazine::Token).from_json(output.to_s))
 end
--- a/spec/tests/crystal/test_annotation.txt
+++ b/spec/tests/crystal/test_annotation.txt
@@ -1,16 +0,0 @@
---input---
-@[FOO::Bar::Baz(opt: "xx")]
-
---tokens---
-'@['          Operator
-'FOO::Bar::Baz' Name.Decorator
-'('           Punctuation
-'opt'         Literal.String.Symbol
-':'           Punctuation
-' '           Text.Whitespace
-'"'           Literal.String.Double
-'xx'          Literal.String.Double
-'"'           Literal.String.Double
-')'           Punctuation
-']'           Operator
-'\n'          Text.Whitespace
--- a/spec/tests/crystal/test_array_access.txt
+++ b/spec/tests/crystal/test_array_access.txt
@@ -1,11 +0,0 @@
---input---
-[5][5]?
-
---tokens---
-'['           Operator
-'5'           Literal.Number.Integer
-']'           Operator
-'['           Operator
-'5'           Literal.Number.Integer
-']?'          Operator
-'\n'          Text.Whitespace
--- a/spec/tests/crystal/test_chars.txt
+++ b/spec/tests/crystal/test_chars.txt
@@ -1,25 +0,0 @@
---input---
-'a'
-'я'
-'\u{1234}'
-'
-'
-'abc'
-
---tokens---
-"'a'"         Literal.String.Char
-'\n'          Text.Whitespace
-
-"'я'"         Literal.String.Char
-'\n'          Text.Whitespace
-
-"'\\u{1234}'" Literal.String.Char
-'\n'          Text.Whitespace
-
-"'\n'"        Literal.String.Char
-'\n'          Text.Whitespace
-
-"'"           Error
-'abc'         Name
-"'"           Error
-'\n'          Text.Whitespace
--- a/spec/tests/crystal/test_constant_and_module.txt
+++ b/spec/tests/crystal/test_constant_and_module.txt
@@ -1,14 +0,0 @@
---input---
-HTTP
-HTTP::Server.new
-
---tokens---
-'HTTP'        Name.Constant
-'\n'          Text.Whitespace
-
-'HTTP'        Name
-'::'          Operator
-'Server'      Name
-'.'           Operator
-'new'         Name
-'\n'          Text.Whitespace
--- a/spec/tests/crystal/test_empty_percent_strings.txt
+++ b/spec/tests/crystal/test_empty_percent_strings.txt
@@ -1,27 +0,0 @@
---input---
-%()
-%[]
-%{}
-%<>
-%||
-
---tokens---
-'%('          Literal.String.Other
-')'           Literal.String.Other
-'\n'          Text.Whitespace
-
-'%['          Literal.String.Other
-']'           Literal.String.Other
-'\n'          Text.Whitespace
-
-'%{'          Literal.String.Other
-'}'           Literal.String.Other
-'\n'          Text.Whitespace
-
-'%<'          Literal.String.Other
-'>'           Literal.String.Other
-'\n'          Text.Whitespace
-
-'%|'          Literal.String.Other
-'|'           Literal.String.Other
-'\n'          Text.Whitespace
--- a/spec/tests/crystal/test_escaped_bracestring.txt
+++ b/spec/tests/crystal/test_escaped_bracestring.txt
@@ -1,19 +0,0 @@
---input---
-str.gsub(%r{\\\\}, "/")
-
---tokens---
-'str'         Name
-'.'           Operator
-'gsub'        Name
-'('           Punctuation
-'%r{'         Literal.String.Regex
-'\\\\'        Literal.String.Regex
-'\\\\'        Literal.String.Regex
-'}'           Literal.String.Regex
-','           Punctuation
-' '           Text.Whitespace
-'"'           Literal.String.Double
-'/'           Literal.String.Double
-'"'           Literal.String.Double
-')'           Punctuation
-'\n'          Text.Whitespace
--- a/spec/tests/crystal/test_escaped_interpolation.txt
+++ b/spec/tests/crystal/test_escaped_interpolation.txt
@@ -1,9 +0,0 @@
---input---
-"\#{a + b}"
-
---tokens---
-'"'           Literal.String.Double
-'\\#'         Literal.String.Escape
-'{a + b}'     Literal.String.Double
-'"'           Literal.String.Double
-'\n'          Text.Whitespace
--- a/spec/tests/crystal/test_interpolation_nested_curly.txt
+++ b/spec/tests/crystal/test_interpolation_nested_curly.txt
@@ -1,56 +0,0 @@
---input---
-"A#{ (3..5).group_by { |x| x/2}.map do |k,v| "#{k}" end.join }" + "Z"
-
---tokens---
-'"'           Literal.String.Double
-'A'           Literal.String.Double
-'#{'          Literal.String.Interpol
-' '           Text.Whitespace
-'('           Punctuation
-'3'           Literal.Number.Integer
-'..'          Operator
-'5'           Literal.Number.Integer
-')'           Punctuation
-'.'           Operator
-'group_by'    Name
-' '           Text.Whitespace
-'{'           Literal.String.Interpol
-' '           Text.Whitespace
-'|'           Operator
-'x'           Name
-'|'           Operator
-' '           Text.Whitespace
-'x'           Name
-'/'           Operator
-'2'           Literal.Number.Integer
-'}'           Literal.String.Interpol
-'.'           Operator
-'map'         Name
-' '           Text.Whitespace
-'do'          Keyword
-' '           Text.Whitespace
-'|'           Operator
-'k'           Name
-','           Punctuation
-'v'           Name
-'|'           Operator
-' '           Text.Whitespace
-'"'           Literal.String.Double
-'#{'          Literal.String.Interpol
-'k'           Name
-'}'           Literal.String.Interpol
-'"'           Literal.String.Double
-' '           Text.Whitespace
-'end'         Keyword
-'.'           Operator
-'join'        Name
-' '           Text.Whitespace
-'}'           Literal.String.Interpol
-'"'           Literal.String.Double
-' '           Text.Whitespace
-'+'           Operator
-' '           Text.Whitespace
-'"'           Literal.String.Double
-'Z'           Literal.String.Double
-'"'           Literal.String.Double
-'\n'          Text.Whitespace
--- a/spec/tests/crystal/test_lib.txt
+++ b/spec/tests/crystal/test_lib.txt
@@ -1,58 +0,0 @@
---input---
-@[Link("some")]
-lib LibSome
-@[CallConvention("X86_StdCall")]
-fun foo="some.foo"(thing : Void*) : LibC::Int
-end
-
---tokens---
-'@['          Operator
-'Link'        Name.Decorator
-'('           Punctuation
-'"'           Literal.String.Double
-'some'        Literal.String.Double
-'"'           Literal.String.Double
-')'           Punctuation
-']'           Operator
-'\n'          Text.Whitespace
-
-'lib'         Keyword
-' '           Text.Whitespace
-'LibSome'     Name.Namespace
-'\n'          Text.Whitespace
-
-'@['          Operator
-'CallConvention' Name.Decorator
-'('           Punctuation
-'"'           Literal.String.Double
-'X86_StdCall' Literal.String.Double
-'"'           Literal.String.Double
-')'           Punctuation
-']'           Operator
-'\n'          Text.Whitespace
-
-'fun'         Keyword
-' '           Text.Whitespace
-'foo'         Name.Function
-'='           Operator
-'"'           Literal.String.Double
-'some.foo'    Literal.String.Double
-'"'           Literal.String.Double
-'('           Punctuation
-'thing'       Name
-' '           Text.Whitespace
-':'           Punctuation
-' '           Text.Whitespace
-'Void'        Name
-'*'           Operator
-')'           Punctuation
-' '           Text.Whitespace
-':'           Punctuation
-' '           Text.Whitespace
-'LibC'        Name
-'::'          Operator
-'Int'         Name
-'\n'          Text.Whitespace
-
-'end'         Keyword
-'\n'          Text.Whitespace
--- a/spec/tests/crystal/test_macro.txt
+++ b/spec/tests/crystal/test_macro.txt
@@ -1,76 +0,0 @@
---input---
-def<=>(other : self) : Int
-{%for field in %w(first_name middle_name last_name)%}
-cmp={{field.id}}<=>other.{{field.id}}
-return cmp if cmp!=0
-{%end%}
-0
-end
-
---tokens---
-'def'         Keyword
-'<=>'         Name.Function
-'('           Punctuation
-'other'       Name
-' '           Text.Whitespace
-':'           Punctuation
-' '           Text.Whitespace
-'self'        Keyword
-')'           Punctuation
-' '           Text.Whitespace
-':'           Punctuation
-' '           Text.Whitespace
-'Int'         Name
-'\n'          Text.Whitespace
-
-'{%'          Literal.String.Interpol
-'for'         Keyword
-' '           Text.Whitespace
-'field'       Name
-' '           Text.Whitespace
-'in'          Keyword
-' '           Text.Whitespace
-'%w('         Literal.String.Other
-'first_name middle_name last_name' Literal.String.Other
-')'           Literal.String.Other
-'%}'          Literal.String.Interpol
-'\n'          Text.Whitespace
-
-'cmp'         Name
-'='           Operator
-'{{'          Literal.String.Interpol
-'field'       Name
-'.'           Operator
-'id'          Name
-'}}'          Literal.String.Interpol
-'<=>'         Operator
-'other'       Name
-'.'           Operator
-'{{'          Literal.String.Interpol
-'field'       Name
-'.'           Operator
-'id'          Name
-'}}'          Literal.String.Interpol
-'\n'          Text.Whitespace
-
-'return'      Keyword
-' '           Text.Whitespace
-'cmp'         Name
-' '           Text.Whitespace
-'if'          Keyword
-' '           Text.Whitespace
-'cmp'         Name
-'!='          Operator
-'0'           Literal.Number.Integer
-'\n'          Text.Whitespace
-
-'{%'          Literal.String.Interpol
-'end'         Keyword
-'%}'          Literal.String.Interpol
-'\n'          Text.Whitespace
-
-'0'           Literal.Number.Integer
-'\n'          Text.Whitespace
-
-'end'         Keyword
-'\n'          Text.Whitespace
--- a/spec/tests/crystal/test_numbers.txt
+++ b/spec/tests/crystal/test_numbers.txt
@@ -1,84 +0,0 @@
---input---
-# Integers
-0
-1
-1_000_000
-1u8
-11231231231121312i64
-
-# Floats
-0.0
-1.0_f32
-1_f32
-0f64
-1e+4
-1e111
-1_234.567_890
-
-# Error
-01
-0b2
-0x129g2
-0o12358
-
---tokens---
-'# Integers'  Comment.Single
-'\n'          Text.Whitespace
-
-'0'           Literal.Number.Integer
-'\n'          Text.Whitespace
-
-'1'           Literal.Number.Integer
-'\n'          Text.Whitespace
-
-'1_000_000'   Literal.Number.Integer
-'\n'          Text.Whitespace
-
-'1u8'         Literal.Number.Integer
-'\n'          Text.Whitespace
-
-'11231231231121312i64' Literal.Number.Integer
-'\n\n'        Text.Whitespace
-
-'# Floats'    Comment.Single
-'\n'          Text.Whitespace
-
-'0.0'         Literal.Number.Float
-'\n'          Text.Whitespace
-
-'1.0_f32'     Literal.Number.Float
-'\n'          Text.Whitespace
-
-'1_f32'       Literal.Number.Float
-'\n'          Text.Whitespace
-
-'0f64'        Literal.Number.Float
-'\n'          Text.Whitespace
-
-'1e+4'        Literal.Number.Float
-'\n'          Text.Whitespace
-
-'1e111'       Literal.Number.Float
-'\n'          Text.Whitespace
-
-'1_234.567_890' Literal.Number.Float
-'\n\n'        Text.Whitespace
-
-'# Error'     Comment.Single
-'\n'          Text.Whitespace
-
-'0'           Error
-'1'           Literal.Number.Integer
-'\n'          Text.Whitespace
-
-'0'           Error
-'b2'          Name
-'\n'          Text.Whitespace
-
-'0'           Error
-'x129g2'      Name
-'\n'          Text.Whitespace
-
-'0'           Error
-'o12358'      Name
-'\n'          Text.Whitespace
--- a/spec/tests/crystal/test_operator_methods.txt
+++ b/spec/tests/crystal/test_operator_methods.txt
@@ -1,18 +0,0 @@
---input---
-([] of Int32).[]?(5)
-
---tokens---
-'('           Punctuation
-'['           Operator
-']'           Operator
-' '           Text.Whitespace
-'of'          Keyword
-' '           Text.Whitespace
-'Int32'       Name
-')'           Punctuation
-'.'           Operator
-'[]?'         Name.Operator
-'('           Punctuation
-'5'           Literal.Number.Integer
-')'           Punctuation
-'\n'          Text.Whitespace
--- a/spec/tests/crystal/test_percent_strings.txt
+++ b/spec/tests/crystal/test_percent_strings.txt
@@ -1,41 +0,0 @@
---input---
-%(hello ("world"))
-%[hello ["world"]]
-%{hello "world"}
-%<hello <"world">>
-%|hello "world"|
-
---tokens---
-'%('          Literal.String.Other
-'hello '      Literal.String.Other
-'('           Literal.String.Other
-'"world"'     Literal.String.Other
-')'           Literal.String.Other
-')'           Literal.String.Other
-'\n'          Text.Whitespace
-
-'%['          Literal.String.Other
-'hello '      Literal.String.Other
-'['           Literal.String.Other
-'"world"'     Literal.String.Other
-']'           Literal.String.Other
-']'           Literal.String.Other
-'\n'          Text.Whitespace
-
-'%{'          Literal.String.Other
-'hello "world"' Literal.String.Other
-'}'           Literal.String.Other
-'\n'          Text.Whitespace
-
-'%<'          Literal.String.Other
-'hello '      Literal.String.Other
-'<'           Literal.String.Other
-'"world"'     Literal.String.Other
-'>'           Literal.String.Other
-'>'           Literal.String.Other
-'\n'          Text.Whitespace
-
-'%|'          Literal.String.Other
-'hello "world"' Literal.String.Other
-'|'           Literal.String.Other
-'\n'          Text.Whitespace
--- a/spec/tests/crystal/test_percent_strings_special.txt
+++ b/spec/tests/crystal/test_percent_strings_special.txt
@@ -1,31 +0,0 @@
---input---
-%Q(hello \n #{name})
-%q(hello \n #{name})
-%w(foo\nbar baz)
-
---tokens---
-'%Q('         Literal.String.Other
-'hello '      Literal.String.Other
-'\\n'         Literal.String.Escape
-' '           Literal.String.Other
-'#{'          Literal.String.Interpol
-'name'        Name
-'}'           Literal.String.Interpol
-')'           Literal.String.Other
-'\n'          Text.Whitespace
-
-'%q('         Literal.String.Other
-'hello '      Literal.String.Other
-'\\'          Literal.String.Other
-'n '          Literal.String.Other
-'#'           Literal.String.Other
-'{name}'      Literal.String.Other
-')'           Literal.String.Other
-'\n'          Text.Whitespace
-
-'%w('         Literal.String.Other
-'foo'         Literal.String.Other
-'\\'          Literal.String.Other
-'nbar baz'    Literal.String.Other
-')'           Literal.String.Other
-'\n'          Text.Whitespace
--- a/spec/tests/crystal/test_pseudo_builtins.txt
+++ b/spec/tests/crystal/test_pseudo_builtins.txt
@@ -1,20 +0,0 @@
---input---
-record Cls do
-def_equals s
-end
-
---tokens---
-'record'      Name.Builtin.Pseudo
-' '           Text.Whitespace
-'Cls'         Name
-' '           Text.Whitespace
-'do'          Keyword
-'\n'          Text.Whitespace
-
-'def_equals'  Name.Builtin.Pseudo
-' '           Text.Whitespace
-'s'           Name
-'\n'          Text.Whitespace
-
-'end'         Keyword
-'\n'          Text.Whitespace
--- a/spec/tests/crystal/test_pseudo_keywords.txt
+++ b/spec/tests/crystal/test_pseudo_keywords.txt
@@ -1,50 +0,0 @@
---input---
-def f(x : T, line = __LINE__) forall T
-if x.is_a?(String)
-pp! x
-end
-end
-
---tokens---
-'def'         Keyword
-' '           Text.Whitespace
-'f'           Name.Function
-'('           Punctuation
-'x'           Name
-' '           Text.Whitespace
-':'           Punctuation
-' '           Text.Whitespace
-'T'           Name
-','           Punctuation
-' '           Text.Whitespace
-'line'        Name
-' '           Text.Whitespace
-'='           Operator
-' '           Text.Whitespace
-'__LINE__'    Keyword.Pseudo
-')'           Punctuation
-' '           Text.Whitespace
-'forall'      Keyword.Pseudo
-' '           Text.Whitespace
-'T'           Name
-'\n'          Text.Whitespace
-
-'if'          Keyword
-' '           Text.Whitespace
-'x'           Name
-'.is_a?'      Keyword.Pseudo
-'('           Punctuation
-'String'      Name
-')'           Punctuation
-'\n'          Text.Whitespace
-
-'pp!'         Name.Builtin.Pseudo
-' '           Text.Whitespace
-'x'           Name
-'\n'          Text.Whitespace
-
-'end'         Keyword
-'\n'          Text.Whitespace
-
-'end'         Keyword
-'\n'          Text.Whitespace
--- a/spec/tests/crystal/test_range_syntax1.txt
+++ b/spec/tests/crystal/test_range_syntax1.txt
@@ -1,8 +0,0 @@
---input---
-1...3
-
---tokens---
-'1'           Literal.Number.Integer
-'...'         Operator
-'3'           Literal.Number.Integer
-'\n'          Text.Whitespace
--- a/spec/tests/crystal/test_range_syntax2.txt
+++ b/spec/tests/crystal/test_range_syntax2.txt
@@ -1,10 +0,0 @@
---input---
-1 .. 3
-
---tokens---
-'1'           Literal.Number.Integer
-' '           Text.Whitespace
-'..'          Operator
-' '           Text.Whitespace
-'3'           Literal.Number.Integer
-'\n'          Text.Whitespace
--- a/spec/tests/crystal/test_string_escapes.txt
+++ b/spec/tests/crystal/test_string_escapes.txt
@@ -1,58 +0,0 @@
---input---
-"a\nz"
-"a\az"
-"a\xffz"
-"a\u1234z"
-"a\000z"
-"a\u{0}z"
-"a\u{10AfF9}z"
-
---tokens---
-'"'           Literal.String.Double
-'a'           Literal.String.Double
-'\\n'         Literal.String.Escape
-'z'           Literal.String.Double
-'"'           Literal.String.Double
-'\n'          Text.Whitespace
-
-'"'           Literal.String.Double
-'a'           Literal.String.Double
-'\\a'         Literal.String.Escape
-'z'           Literal.String.Double
-'"'           Literal.String.Double
-'\n'          Text.Whitespace
-
-'"'           Literal.String.Double
-'a'           Literal.String.Double
-'\\xff'       Literal.String.Escape
-'z'           Literal.String.Double
-'"'           Literal.String.Double
-'\n'          Text.Whitespace
-
-'"'           Literal.String.Double
-'a'           Literal.String.Double
-'\\u1234'     Literal.String.Escape
-'z'           Literal.String.Double
-'"'           Literal.String.Double
-'\n'          Text.Whitespace
-
-'"'           Literal.String.Double
-'a'           Literal.String.Double
-'\\000'       Literal.String.Escape
-'z'           Literal.String.Double
-'"'           Literal.String.Double
-'\n'          Text.Whitespace
-
-'"'           Literal.String.Double
-'a'           Literal.String.Double
-'\\u{0}'      Literal.String.Escape
-'z'           Literal.String.Double
-'"'           Literal.String.Double
-'\n'          Text.Whitespace
-
-'"'           Literal.String.Double
-'a'           Literal.String.Double
-'\\u{10AfF9}' Literal.String.Escape
-'z'           Literal.String.Double
-'"'           Literal.String.Double
-'\n'          Text.Whitespace
--- a/spec/tests/crystal/test_symbols.txt
+++ b/spec/tests/crystal/test_symbols.txt
@@ -1,20 +0,0 @@
---input---
-:sym_bol
-:あ
-:question?
-:"symbol"
-
---tokens---
-':sym_bol'    Literal.String.Symbol
-'\n'          Text.Whitespace
-
-':あ'          Literal.String.Symbol
-'\n'          Text.Whitespace
-
-':question?'  Literal.String.Symbol
-'\n'          Text.Whitespace
-
-':"'          Literal.String.Symbol
-'symbol'      Literal.String.Symbol
-'"'           Literal.String.Symbol
-'\n'          Text.Whitespace
--- a/src/actions.cr
+++ b/src/actions.cr
@@ -1,5 +1,4 @@
 require "./actions"
-require "./constants"
 require "./formatter"
 require "./rules"
 require "./styles"
@@ -9,12 +8,33 @@ require "./tartrazine"
 # perform a list of actions. These actions can emit tokens
 # or change the state machine.
 module Tartrazine
-  class Action
-    property type : String
-    property xml : XML::Node
+  enum ActionType
+    Bygroups
+    Combined
+    Include
+    Pop
+    Push
+    Token
+    Using
+    Usingbygroup
+    Usingself
+  end
+
+  struct Action
    property actions : Array(Action) = [] of Action

-    def initialize(@type : String, @xml : XML::Node?)
+    @content_index : Array(Int32) = [] of Int32
+    @depth : Int32 = 0
+    @lexer_index : Int32 = 0
+    @lexer_name : String = ""
+    @states : Array(String) = [] of String
+    @states_to_push : Array(String) = [] of String
+    @token_type : String = ""
+    @type : ActionType = ActionType::Token
+
+    def initialize(t : String, xml : XML::Node?)
+      @type = ActionType.parse(t.capitalize)
+
      # Some actions may have actions in them, like this:
      # <bygroups>
      # <token type="GenericPrompt"/>
@@ -24,48 +44,56 @@ module Tartrazine
      #
      # The token actions match with the first 2 groups in the regex
      # the using action matches the 3rd and shunts it to another lexer
-      @xml.children.each do |node|
+      xml.children.each do |node|
        next unless node.element?
        @actions << Action.new(node.name, node)
      end
+
+      # Prefetch the attributes we ned from the XML and keep them
+      case @type
+      when ActionType::Token
+        @token_type = xml["type"]
+      when ActionType::Push
+        @states_to_push = xml.attributes.select { |attrib|
+          attrib.name == "state"
+        }.map &.content
+      when ActionType::Pop
+        @depth = xml["depth"].to_i
+      when ActionType::Using
+        @lexer_name = xml["lexer"].downcase
+      when ActionType::Combined
+        @states = xml.attributes.select { |attrib|
+          attrib.name == "state"
+        }.map &.content
+      when ActionType::Usingbygroup
+        @lexer_index = xml["lexer"].to_i
+        @content_index = xml["content"].split(",").map(&.to_i)
+      end
    end

    # ameba:disable Metrics/CyclomaticComplexity
-    def emit(match : Regex::MatchData?, lexer : Lexer, match_group = 0) : Array(Token)
-      case type
-      when "token"
-        raise Exception.new "Can't have a token without a match" if match.nil?
-        [Token.new(type: xml["type"], value: match[match_group])]
-      when "push"
-        states_to_push = xml.attributes.select { |attrib|
-          attrib.name == "state"
-        }.map &.content
-        if states_to_push.empty?
-          # Push without a state means push the current state
-          states_to_push = [lexer.state_stack.last]
-        end
-        states_to_push.each do |state|
-          if state == "#pop"
+    def emit(match : MatchData, tokenizer : Tokenizer, match_group = 0) : Array(Token)
+      case @type
+      when ActionType::Token
+        raise Exception.new "Can't have a token without a match" if match.empty?
+        [Token.new(type: @token_type, value: String.new(match[match_group].value))]
+      when ActionType::Push
+        to_push = @states_to_push.empty? ? [tokenizer.state_stack.last] : @states_to_push
+        to_push.each do |state|
+          if state == "#pop" && tokenizer.state_stack.size > 1
            # Pop the state
-            Log.trace { "Popping state" }
-            lexer.state_stack.pop
+            tokenizer.state_stack.pop
          else
            # Really push
-            lexer.state_stack << state
-            Log.trace { "Pushed #{lexer.state_stack}" }
+            tokenizer.state_stack << state
          end
        end
        [] of Token
-      when "pop"
-        depth = xml["depth"].to_i
-        Log.trace { "Popping #{depth} states" }
-        if lexer.state_stack.size <= depth
-          Log.trace { "Can't pop #{depth} states, only have #{lexer.state_stack.size}" }
-        else
-          lexer.state_stack.pop(depth)
-        end
+      when ActionType::Pop
+        to_pop = [@depth, tokenizer.state_stack.size - 1].min
+        tokenizer.state_stack.pop(to_pop)
        [] of Token
-      when "bygroups"
+      when ActionType::Bygroups
        # FIXME: handle
        # ><bygroups>
        # <token type="Punctuation"/>
@@ -80,38 +108,50 @@ module Tartrazine
        # the action is skipped.
        result = [] of Token
        @actions.each_with_index do |e, i|
-          next if match[i + 1]?.nil?
-          result += e.emit(match, lexer, i + 1)
+          begin
+            next if match[i + 1].size == 0
+          rescue IndexError
+            # FIXME: This should not actually happen
+            # No match for this group
+            next
+          end
+          result += e.emit(match, tokenizer, i + 1)
        end
        result
-      when "using"
+      when ActionType::Using
        # Shunt to another lexer entirely
-        return [] of Token if match.nil?
-        lexer_name = xml["lexer"].downcase
-        Log.trace { "to tokenize: #{match[match_group]}" }
-        Tartrazine.lexer(lexer_name).tokenize(match[match_group], usingself: true)
-      when "usingself"
+        return [] of Token if match.empty?
+        Tartrazine.lexer(@lexer_name).tokenizer(
+          String.new(match[match_group].value),
+          secondary: true).to_a
+      when ActionType::Usingself
        # Shunt to another copy of this lexer
-        return [] of Token if match.nil?
-
-        new_lexer = Lexer.from_xml(lexer.xml)
-        Log.trace { "to tokenize: #{match[match_group]}" }
-        new_lexer.tokenize(match[match_group], usingself: true)
-      when "combined"
-        # Combine two states into one anonymous state
-        states = xml.attributes.select { |attrib|
-          attrib.name == "state"
-        }.map &.content
-        new_state = states.map { |name|
-          lexer.states[name]
+        return [] of Token if match.empty?
+        tokenizer.lexer.tokenizer(
+          String.new(match[match_group].value),
+          secondary: true).to_a
+      when ActionType::Combined
+        # Combine two or more states into one anonymous state
+        new_state = @states.map { |name|
+          tokenizer.lexer.states[name]
        }.reduce { |state1, state2|
          state1 + state2
        }
-        lexer.states[new_state.name] = new_state
-        lexer.state_stack << new_state.name
+        tokenizer.lexer.states[new_state.name] = new_state
+        tokenizer.state_stack << new_state.name
        [] of Token
+      when ActionType::Usingbygroup
+        # Shunt to content-specified lexer
+        return [] of Token if match.empty?
+        content = ""
+        @content_index.each do |i|
+          content += String.new(match[i].value)
+        end
+        Tartrazine.lexer(String.new(match[@lexer_index].value)).tokenizer(
+          content,
+          secondary: true).to_a
      else
-        raise Exception.new("Unknown action type: #{type}: #{xml}")
+        raise Exception.new("Unknown action type: #{@type}")
      end
    end
  end
--- a/src/bytes_regex.cr
+++ b/src/bytes_regex.cr
@@ -0,0 +1,73 @@
+module BytesRegex
+  extend self
+
+  class Regex
+    def initialize(pattern : String, multiline = false, dotall = false, ignorecase = false, anchored = false)
+      flags = LibPCRE2::UTF | LibPCRE2::UCP | LibPCRE2::NO_UTF_CHECK
+      flags |= LibPCRE2::MULTILINE if multiline
+      flags |= LibPCRE2::DOTALL if dotall
+      flags |= LibPCRE2::CASELESS if ignorecase
+      flags |= LibPCRE2::ANCHORED if anchored
+      if @re = LibPCRE2.compile(
+           pattern,
+           pattern.bytesize,
+           flags,
+           out errorcode,
+           out erroroffset,
+           nil)
+      else
+        msg = String.new(256) do |buffer|
+          bytesize = LibPCRE2.get_error_message(errorcode, buffer, 256)
+          {bytesize, 0}
+        end
+        raise Exception.new "Error #{msg} compiling regex at offset #{erroroffset}"
+      end
+      @match_data = LibPCRE2.match_data_create_from_pattern(@re, nil)
+    end
+
+    def finalize
+      LibPCRE2.match_data_free(@match_data)
+      LibPCRE2.code_free(@re)
+    end
+
+    def match(str : Bytes, pos = 0) : Array(Match)
+      rc = LibPCRE2.match(
+        @re,
+        str,
+        str.size,
+        pos,
+        LibPCRE2::NO_UTF_CHECK,
+        @match_data,
+        nil)
+      if rc > 0
+        ovector = LibPCRE2.get_ovector_pointer(@match_data)
+        (0...rc).map do |i|
+          m_start = ovector[2 * i]
+          m_end = ovector[2 * i + 1]
+          if m_start == m_end
+            m_value = Bytes.new(0)
+          else
+            m_value = str[m_start...m_end]
+          end
+          Match.new(m_value, m_start, m_end - m_start)
+        end
+      else
+        [] of Match
+      end
+    end
+  end
+
+  struct Match
+    property value : Bytes
+    property start : UInt64
+    property size : UInt64
+
+    def initialize(@value : Bytes, @start : UInt64, @size : UInt64)
+    end
+  end
+end
+
+# pattern = "foo"
+# str = "foo bar"
+# re = BytesRegex::Regex.new(pattern)
+# p! String.new(re.match(str.to_slice)[0].value)
--- a/src/constants/lexers.cr
+++ b/src/constants/lexers.cr
--- a/src/constants/token_abbrevs.cr
+++ b/src/constants/token_abbrevs.cr
@@ -1,92 +1,100 @@
 module Tartrazine
  Abbreviations = {
    "Background"               => "b",
-    "Text"                     => "t",
+    "CodeLine"                 => "cl",
+    "Comment"                  => "c",
+    "CommentHashbang"          => "ch",
+    "CommentMultiline"         => "cm",
+    "CommentPreproc"           => "cp",
+    "CommentPreprocFile"       => "cpf",
    "CommentSingle"            => "cs",
    "CommentSpecial"           => "cs",
-    "NameVariable"             => "nv",
-    "Keyword"                  => "k",
-    "NameFunction"             => "nf",
-    "Punctuation"              => "p",
-    "Operator"                 => "o",
-    "LiteralNumberInteger"     => "lni",
-    "NameBuiltin"              => "nb",
-    "Name"                     => "n",
-    "OperatorWord"             => "ow",
-    "LiteralStringSingle"      => "lss",
-    "Literal"                  => "l",
-    "NameClass"                => "nc",
-    "CommentMultiline"         => "cm",
-    "LiteralStringRegex"       => "lsr",
-    "KeywordDeclaration"       => "kd",
-    "KeywordConstant"          => "kc",
-    "NameOther"                => "no",
-    "LiteralNumberFloat"       => "lnf",
-    "LiteralNumberHex"         => "lnh",
-    "LiteralStringDouble"      => "lsd",
-    "KeywordType"              => "kt",
-    "NameNamespace"            => "nn",
-    "NameAttribute"            => "na",
-    "KeywordReserved"          => "kr",
-    "CommentPreproc"           => "cp",
-    "KeywordNamespace"         => "kn",
-    "NameConstant"             => "nc",
-    "NameLabel"                => "nl",
-    "LiteralString"            => "ls",
-    "LiteralStringChar"        => "lsc",
-    "TextWhitespace"           => "tw",
-    "LiteralStringEscape"      => "lse",
-    "LiteralNumber"            => "ln",
-    "Other"                    => "o",
-    "LiteralStringBoolean"     => "lsb",
-    "NameProperty"             => "np",
-    "Comment"                  => "c",
-    "NameTag"                  => "nt",
-    "LiteralStringOther"       => "lso",
-    "NameVariableGlobal"       => "nvg",
-    "NameBuiltinPseudo"        => "nbp",
-    "LiteralNumberBin"         => "lnb",
-    "KeywordPseudo"            => "kp",
-    "CommentPreprocFile"       => "cpf",
-    "LiteralStringAffix"       => "lsa",
-    "LiteralStringDelimiter"   => "lsd",
-    "LiteralNumberOct"         => "lno",
    "Error"                    => "e",
    "Generic"                  => "g",
-    "LiteralNumberIntegerLong" => "lnil",
-    "NameDecorator"            => "nd",
-    "LiteralStringInterpol"    => "lsi",
-    "LiteralStringBacktick"    => "lsb",
-    "GenericPrompt"            => "gp",
-    "GenericOutput"            => "go",
-    "LiteralStringName"        => "lsn",
-    "LiteralStringHeredoc"     => "lsh",
-    "LiteralStringSymbol"      => "lss",
-    "NameVariableInstance"     => "nvi",
-    "LiteralOther"             => "lo",
-    "NameVariableClass"        => "nvc",
-    "NameOperator"             => "no",
-    "None"                     => "n",
-    "LiteralStringDoc"         => "lsd",
-    "NameException"            => "ne",
-    "GenericSubheading"        => "gs",
-    "GenericStrong"            => "gs",
    "GenericDeleted"           => "gd",
-    "GenericInserted"          => "gi",
-    "GenericHeading"           => "gh",
-    "NameEntity"               => "ne",
-    "NamePseudo"               => "np",
-    "CommentHashbang"          => "ch",
-    "TextPunctuation"          => "tp",
-    "NameVariableAnonymous"    => "nva",
-    "NameVariableMagic"        => "nvm",
-    "NameFunctionMagic"        => "nfm",
    "GenericEmph"              => "ge",
-    "GenericUnderline"         => "gu",
-    "LiteralStringAtom"        => "lsa",
-    "LiteralDate"              => "ld",
    "GenericError"             => "ge",
-    "TextSymbol"               => "ts",
+    "GenericHeading"           => "gh",
+    "GenericInserted"          => "gi",
+    "GenericOutput"            => "go",
+    "GenericPrompt"            => "gp",
+    "GenericStrong"            => "gs",
+    "GenericSubheading"        => "gs",
+    "GenericTraceback"         => "gt",
+    "GenericUnderline"         => "gu",
+    "Highlight"                => "hl",
+    "Keyword"                  => "k",
+    "KeywordConstant"          => "kc",
+    "KeywordDeclaration"       => "kd",
+    "KeywordNamespace"         => "kn",
+    "KeywordPseudo"            => "kp",
+    "KeywordReserved"          => "kr",
+    "KeywordType"              => "kt",
+    "LineHighlight"            => "lh",
+    "LineNumbers"              => "ln",
+    "LineNumbersTable"         => "lnt",
+    "LineTable"                => "lt",
+    "LineTableTD"              => "lttd",
+    "Literal"                  => "l",
+    "LiteralDate"              => "ld",
+    "LiteralNumber"            => "ln",
+    "LiteralNumberBin"         => "lnb",
+    "LiteralNumberFloat"       => "lnf",
+    "LiteralNumberHex"         => "lnh",
+    "LiteralNumberInteger"     => "lni",
+    "LiteralNumberIntegerLong" => "lnil",
+    "LiteralNumberOct"         => "lno",
+    "LiteralOther"             => "lo",
+    "LiteralString"            => "ls",
+    "LiteralStringAffix"       => "lsa",
+    "LiteralStringAtom"        => "lsa",
+    "LiteralStringBacktick"    => "lsb",
+    "LiteralStringBoolean"     => "lsb",
+    "LiteralStringChar"        => "lsc",
+    "LiteralStringDelimiter"   => "lsd",
+    "LiteralStringDoc"         => "lsd",
+    "LiteralStringDouble"      => "lsd",
+    "LiteralStringEscape"      => "lse",
+    "LiteralStringHeredoc"     => "lsh",
+    "LiteralStringInterpol"    => "lsi",
+    "LiteralStringName"        => "lsn",
+    "LiteralStringOther"       => "lso",
+    "LiteralStringRegex"       => "lsr",
+    "LiteralStringSingle"      => "lss",
+    "LiteralStringSymbol"      => "lss",
+    "Name"                     => "n",
+    "NameAttribute"            => "na",
+    "NameBuiltin"              => "nb",
+    "NameBuiltinPseudo"        => "nbp",
+    "NameClass"                => "nc",
+    "NameConstant"             => "nc",
+    "NameDecorator"            => "nd",
+    "NameEntity"               => "ne",
+    "NameException"            => "ne",
+    "NameFunction"             => "nf",
+    "NameFunctionMagic"        => "nfm",
    "NameKeyword"              => "nk",
+    "NameLabel"                => "nl",
+    "NameNamespace"            => "nn",
+    "NameOperator"             => "no",
+    "NameOther"                => "no",
+    "NameProperty"             => "np",
+    "NamePseudo"               => "np",
+    "NameTag"                  => "nt",
+    "NameVariable"             => "nv",
+    "NameVariableAnonymous"    => "nva",
+    "NameVariableClass"        => "nvc",
+    "NameVariableGlobal"       => "nvg",
+    "NameVariableInstance"     => "nvi",
+    "NameVariableMagic"        => "nvm",
+    "None"                     => "n",
+    "Operator"                 => "o",
+    "OperatorWord"             => "ow",
+    "Other"                    => "o",
+    "Punctuation"              => "p",
+    "Text"                     => "t",
+    "TextPunctuation"          => "tp",
+    "TextSymbol"               => "ts",
+    "TextWhitespace"           => "tw",
  }
 end
--- a/src/formatter.cr
+++ b/src/formatter.cr
@@ -1,5 +1,4 @@
 require "./actions"
-require "./constants"
 require "./formatter"
 require "./rules"
 require "./styles"
@@ -10,102 +9,27 @@ module Tartrazine
  # This is the base class for all formatters.
  abstract class Formatter
    property name : String = ""
+    property theme : Theme = Tartrazine.theme("default-dark")

-    def format(text : String, lexer : Lexer, theme : Theme) : String
+    # Format the text using the given lexer.
+    def format(text : String, lexer : Lexer, io : IO = nil) : Nil
      raise Exception.new("Not implemented")
    end

-    def get_style_defs(theme : Theme) : String
+    def format(text : String, lexer : Lexer) : String
+      outp = String::Builder.new("")
+      format(text, lexer, outp)
+      outp.to_s
+    end
+
+    # Return the styles, if the formatter supports it.
+    def style_defs : String
      raise Exception.new("Not implemented")
    end
-  end

-  class Ansi < Formatter
-    def format(text : String, lexer : Lexer, theme : Theme) : String
-      output = String.build do |outp|
-        lexer.tokenize(text).each do |token|
-          outp << self.colorize(token[:value], token[:type], theme)
-        end
-      end
-      output
-    end
-
-    def colorize(text : String, token : String, theme : Theme) : String
-      style = theme.styles.fetch(token, nil)
-      return text if style.nil?
-      if theme.styles.has_key?(token)
-        s = theme.styles[token]
-      else
-        # Themes don't contain information for each specific
-        # token type. However, they may contain information
-        # for a parent style. Worst case, we go to the root
-        # (Background) style.
-        s = theme.styles[theme.style_parents(token).reverse.find { |parent|
-          theme.styles.has_key?(parent)
-        }]
-      end
-      colorized = text.colorize
-      s.color.try { |c| colorized = colorized.fore(c.colorize) }
-      # Intentionally not setting background color
-      colorized.mode(:bold) if s.bold
-      colorized.mode(:italic) if s.italic
-      colorized.mode(:underline) if s.underline
-      colorized.to_s
-    end
-  end
-
-  class Html < Formatter
-    def format(text : String, lexer : Lexer, theme : Theme) : String
-      output = String.build do |outp|
-        outp << "<html><head><style>"
-        outp << get_style_defs(theme)
-        outp << "</style></head><body>"
-        outp << "<pre class=\"#{get_css_class("Background", theme)}\"><code class=\"#{get_css_class("Background", theme)}\">"
-        lexer.tokenize(text).each do |token|
-          fragment = "<span class=\"#{get_css_class(token[:type], theme)}\">#{token[:value]}</span>"
-          outp << fragment
-        end
-        outp << "</code></pre></body></html>"
-      end
-      output
-    end
-
-    # ameba:disable Metrics/CyclomaticComplexity
-    def get_style_defs(theme : Theme) : String
-      output = String.build do |outp|
-        theme.styles.each do |token, style|
-          outp << ".#{get_css_class(token, theme)} {"
-          # These are set or nil
-          outp << "color: #{style.color.try &.hex};" if style.color
-          outp << "background-color: #{style.background.try &.hex};" if style.background
-          outp << "border: 1px solid #{style.border.try &.hex};" if style.border
-
-          # These are true/false/nil
-          outp << "border: none;" if style.border == false
-          outp << "font-weight: bold;" if style.bold
-          outp << "font-weight: 400;" if style.bold == false
-          outp << "font-style: italic;" if style.italic
-          outp << "font-style: normal;" if style.italic == false
-          outp << "text-decoration: underline;" if style.underline
-          outp << "text-decoration: none;" if style.underline == false
-
-          outp << "}"
-        end
-      end
-      output
-    end
-
-    # Given a token type, return the CSS class to use.
-    def get_css_class(token, theme)
-      return Abbreviations[token] if theme.styles.has_key?(token)
-
-      # Themes don't contain information for each specific
-      # token type. However, they may contain information
-      # for a parent style. Worst case, we go to the root
-      # (Background) style.
-      Abbreviations[theme.style_parents(token).reverse.find { |parent|
-        theme.styles.has_key?(parent)
-      }]
+    # Is this line in the highlighted ranges?
+    def highlighted?(line : Int) : Bool
+      highlight_lines.any?(&.includes?(line))
    end
  end
 end
--- a/src/formatters/ansi.cr
+++ b/src/formatters/ansi.cr
@@ -0,0 +1,57 @@
+require "../formatter"
+
+module Tartrazine
+  def self.to_ansi(text : String, language : String,
+                   theme : String = "default-dark",
+                   line_numbers : Bool = false) : String
+    Tartrazine::Ansi.new(
+      theme: Tartrazine.theme(theme),
+      line_numbers: line_numbers
+    ).format(text, Tartrazine.lexer(name: language))
+  end
+
+  class Ansi < Formatter
+    property? line_numbers : Bool = false
+
+    def initialize(@theme : Theme = Tartrazine.theme("default-dark"), @line_numbers : Bool = false)
+    end
+
+    private def line_label(i : Int32) : String
+      "#{i + 1}".rjust(4).ljust(5)
+    end
+
+    def format(text : String, lexer : BaseLexer, outp : IO) : Nil
+      tokenizer = lexer.tokenizer(text)
+      i = 0
+      outp << line_label(i) if line_numbers?
+      tokenizer.each do |token|
+        outp << colorize(token[:value], token[:type])
+        if token[:value].includes?("\n")
+          i += 1
+          outp << line_label(i) if line_numbers?
+        end
+      end
+    end
+
+    def colorize(text : String, token : String) : String
+      if theme.styles.has_key?(token)
+        s = theme.styles[token]
+      else
+        # Themes don't contain information for each specific
+        # token type. However, they may contain information
+        # for a parent style. Worst case, we go to the root
+        # (Background) style.
+        s = theme.styles[theme.style_parents(token).reverse.find { |parent|
+          theme.styles.has_key?(parent)
+        }]
+      end
+      colorized = text.colorize
+      s.color.try { |col| colorized = colorized.fore(col.colorize) }
+      # Intentionally not setting background color
+      colorized.mode(:bold) if s.bold
+      colorized.mode(:italic) if s.italic
+      colorized.mode(:underline) if s.underline
+      colorized.to_s
+    end
+  end
+end
--- a/src/formatters/html.cr
+++ b/src/formatters/html.cr
@@ -0,0 +1,149 @@
+require "../constants/token_abbrevs.cr"
+require "../formatter"
+require "html"
+
+module Tartrazine
+  def self.to_html(text : String, language : String,
+                   theme : String = "default-dark",
+                   standalone : Bool = true,
+                   line_numbers : Bool = false) : String
+    Tartrazine::Html.new(
+      theme: Tartrazine.theme(theme),
+      standalone: standalone,
+      line_numbers: line_numbers
+    ).format(text, Tartrazine.lexer(name: language))
+  end
+
+  class Html < Formatter
+    # property line_number_in_table : Bool = false
+    # property with_classes : Bool = true
+    property class_prefix : String = ""
+    property highlight_lines : Array(Range(Int32, Int32)) = [] of Range(Int32, Int32)
+    property line_number_id_prefix : String = "line-"
+    property line_number_start : Int32 = 1
+    property tab_width = 8
+    property? line_numbers : Bool = false
+    property? linkable_line_numbers : Bool = true
+    property? standalone : Bool = false
+    property? surrounding_pre : Bool = true
+    property? wrap_long_lines : Bool = false
+    property weight_of_bold : Int32 = 600
+    property template : String = <<-TEMPLATE
+<!DOCTYPE html><html><head><style>
+{{style_defs}}
+</style></head><body>
+{{body}}
+</body></html>
+TEMPLATE
+
+    property theme : Theme
+
+    def initialize(@theme : Theme = Tartrazine.theme("default-dark"), *,
+                   @highlight_lines = [] of Range(Int32, Int32),
+                   @class_prefix : String = "",
+                   @line_number_id_prefix = "line-",
+                   @line_number_start = 1,
+                   @tab_width = 8,
+                   @line_numbers : Bool = false,
+                   @linkable_line_numbers : Bool = true,
+                   @standalone : Bool = false,
+                   @surrounding_pre : Bool = true,
+                   @wrap_long_lines : Bool = false,
+                   @weight_of_bold : Int32 = 600,
+                   @template : String = @template)
+    end
+
+    def format(text : String, lexer : Lexer) : String
+      outp = String::Builder.new("")
+      format(text, lexer, outp)
+      outp.to_s
+    end
+
+    def format(text : String, lexer : BaseLexer, io : IO) : Nil
+      pre, post = wrap_standalone
+      io << pre if standalone?
+      format_text(text, lexer, io)
+      io << post if standalone?
+    end
+
+    # Wrap text into a full HTML document, including the CSS for the theme
+    def wrap_standalone
+      output = String.build do |outp|
+        if @template.includes? "{{style_defs}}"
+          outp << @template.split("{{style_defs}}")[0]
+          outp << style_defs
+          outp << @template.split("{{style_defs}}")[1].split("{{body}}")[0]
+        else
+          outp << @template.split("{{body}}")[0]
+        end
+      end
+      {output.to_s, @template.split("{{body}}")[1]}
+    end
+
+    private def line_label(i : Int32) : String
+      line_label = "#{i + 1}".rjust(4).ljust(5)
+      line_class = highlighted?(i + 1) ? "class=\"#{get_css_class("LineHighlight")}\"" : ""
+      line_id = linkable_line_numbers? ? "id=\"#{line_number_id_prefix}#{i + 1}\"" : ""
+      "<span #{line_id} #{line_class} style=\"user-select: none;\">#{line_label} </span>"
+    end
+
+    def format_text(text : String, lexer : BaseLexer, outp : IO)
+      tokenizer = lexer.tokenizer(text)
+      i = 0
+      if surrounding_pre?
+        pre_style = wrap_long_lines? ? "style=\"white-space: pre-wrap; word-break: break-word;\"" : ""
+        outp << "<pre class=\"#{get_css_class("Background")}\" #{pre_style}>"
+      end
+      outp << "<code class=\"#{get_css_class("Background")}\">"
+      outp << line_label(i) if line_numbers?
+      tokenizer.each do |token|
+        outp << "<span class=\"#{get_css_class(token[:type])}\">#{HTML.escape(token[:value])}</span>"
+        if token[:value].ends_with? "\n"
+          i += 1
+          outp << line_label(i) if line_numbers?
+        end
+      end
+      outp << "</code></pre>"
+    end
+
+    # ameba:disable Metrics/CyclomaticComplexity
+    def style_defs : String
+      output = String.build do |outp|
+        theme.styles.each do |token, style|
+          outp << ".#{get_css_class(token)} {"
+          # These are set or nil
+          outp << "color: ##{style.color.try &.hex};" if style.color
+          outp << "background-color: ##{style.background.try &.hex};" if style.background
+          outp << "border: 1px solid ##{style.border.try &.hex};" if style.border
+
+          # These are true/false/nil
+          outp << "border: none;" if style.border == false
+          outp << "font-weight: #{@weight_of_bold};" if style.bold
+          outp << "font-style: italic;" if style.italic
+          outp << "font-style: normal;" if style.italic == false
+          outp << "text-decoration: underline;" if style.underline
+          outp << "text-decoration: none;" if style.underline == false
+          outp << "tab-size: #{tab_width};" if token == "Background"
+
+          outp << "}"
+        end
+      end
+      output
+    end
+
+    # Given a token type, return the CSS class to use.
+    def get_css_class(token : String) : String
+      if !theme.styles.has_key? token
+        # Themes don't contain information for each specific
+        # token type. However, they may contain information
+        # for a parent style. Worst case, we go to the root
+        # (Background) style.
+        parent = theme.style_parents(token).reverse.find { |dad|
+          theme.styles.has_key?(dad)
+        }
+        theme.styles[token] = theme.styles[parent]
+      end
+      class_prefix + Abbreviations[token]
+    end
+  end
+end
--- a/src/formatters/json.cr
+++ b/src/formatters/json.cr
@@ -0,0 +1,18 @@
+require "../formatter"
+
+module Tartrazine
+  class Json < Formatter
+    property name = "json"
+
+    def format(text : String, lexer : BaseLexer) : String
+      outp = String::Builder.new("")
+      format(text, lexer, outp)
+      outp.to_s
+    end
+
+    def format(text : String, lexer : BaseLexer, io : IO) : Nil
+      tokenizer = lexer.tokenizer(text)
+      io << Tartrazine::RegexLexer.collapse_tokens(tokenizer.to_a).to_json
+    end
+  end
+end
--- a/src/formatters/png.cr
+++ b/src/formatters/png.cr
@@ -0,0 +1,117 @@
+require "../formatter"
+require "compress/gzip"
+require "digest/sha1"
+require "stumpy_png"
+require "stumpy_utils"
+
+module Tartrazine
+  def self.to_png(text : String, language : String,
+                  theme : String = "default-dark",
+                  line_numbers : Bool = false) : String
+    buf = IO::Memory.new
+
+    Tartrazine::Png.new(
+      theme: Tartrazine.theme(theme),
+      line_numbers: line_numbers
+    ).format(text, Tartrazine.lexer(name: language), buf)
+    buf.to_s
+  end
+
+  class FontFiles
+    extend BakedFileSystem
+    bake_folder "../../fonts", __DIR__
+  end
+
+  class Png < Formatter
+    include StumpyPNG
+    property? line_numbers : Bool = false
+    @font_regular : PCFParser::Font
+    @font_bold : PCFParser::Font
+    @font_oblique : PCFParser::Font
+    @font_bold_oblique : PCFParser::Font
+    @font_width = 15
+    @font_height = 24
+
+    def initialize(@theme : Theme = Tartrazine.theme("default-dark"), @line_numbers : Bool = false)
+      @font_regular = load_font("/courier-regular.pcf.gz")
+      @font_bold = load_font("/courier-bold.pcf.gz")
+      @font_oblique = load_font("/courier-oblique.pcf.gz")
+      @font_bold_oblique = load_font("/courier-bold-oblique.pcf.gz")
+    end
+
+    private def load_font(name : String) : PCFParser::Font
+      compressed = FontFiles.get(name)
+      uncompressed = Compress::Gzip::Reader.open(compressed) do |gzip|
+        gzip.gets_to_end
+      end
+      PCFParser::Font.new(IO::Memory.new uncompressed)
+    end
+
+    private def line_label(i : Int32) : String
+      "#{i + 1}".rjust(4).ljust(5)
+    end
+
+    def format(text : String, lexer : BaseLexer, outp : IO) : Nil
+      # Create canvas of correct size
+      lines = text.split("\n")
+      canvas_height = lines.size * @font_height
+      canvas_width = lines.max_of(&.size)
+      canvas_width += 5 if line_numbers?
+      canvas_width *= @font_width
+
+      bg_color = RGBA.from_hex("##{theme.styles["Background"].background.try &.hex}")
+      canvas = Canvas.new(canvas_width, canvas_height, bg_color)
+
+      tokenizer = lexer.tokenizer(text)
+      x = 0
+      y = @font_height
+      i = 0
+      if line_numbers?
+        canvas.text(x, y, line_label(i), @font_regular, RGBA.from_hex("##{theme.styles["Background"].color.try &.hex}"))
+        x += 5 * @font_width
+      end
+
+      tokenizer.each do |token|
+        font, color = token_style(token[:type])
+        # These fonts are very limited
+        t = token[:value].gsub(/[^[:ascii:]]/, "?")
+        canvas.text(x, y, t.rstrip("\n"), font, color)
+        if token[:value].includes?("\n")
+          x = 0
+          y += @font_height
+          i += 1
+          if line_numbers?
+            canvas.text(x, y, line_label(i), @font_regular, RGBA.from_hex("##{theme.styles["Background"].color.try &.hex}"))
+            x += 4 * @font_width
+          end
+        end
+
+        x += token[:value].size * @font_width
+      end
+
+      StumpyPNG.write(canvas, outp)
+    end
+
+    def token_style(token : String) : {PCFParser::Font, RGBA}
+      if theme.styles.has_key?(token)
+        s = theme.styles[token]
+      else
+        # Themes don't contain information for each specific
+        # token type. However, they may contain information
+        # for a parent style. Worst case, we go to the root
+        # (Background) style.
+        s = theme.styles[theme.style_parents(token).reverse.find { |parent|
+          theme.styles.has_key?(parent)
+        }]
+      end
+
+      color = RGBA.from_hex("##{theme.styles["Background"].color.try &.hex}")
+      color = RGBA.from_hex("##{s.color.try &.hex}") if s.color
+
+      return {@font_bold_oblique, color} if s.bold && s.italic
+      return {@font_bold, color} if s.bold
+      return {@font_oblique, color} if s.italic
+      return {@font_regular, color}
+    end
+  end
+end
--- a/src/formatters/svg.cr
+++ b/src/formatters/svg.cr
@@ -0,0 +1,129 @@
+require "../constants/token_abbrevs.cr"
+require "../formatter"
+require "html"
+
+module Tartrazine
+  def self.to_svg(text : String, language : String,
+                  theme : String = "default-dark",
+                  standalone : Bool = true,
+                  line_numbers : Bool = false) : String
+    Tartrazine::Svg.new(
+      theme: Tartrazine.theme(theme),
+      standalone: standalone,
+      line_numbers: line_numbers
+    ).format(text, Tartrazine.lexer(name: language))
+  end
+
+  class Svg < Formatter
+    property highlight_lines : Array(Range(Int32, Int32)) = [] of Range(Int32, Int32)
+    property line_number_id_prefix : String = "line-"
+    property line_number_start : Int32 = 1
+    property tab_width = 8
+    property? line_numbers : Bool = false
+    property? linkable_line_numbers : Bool = true
+    property? standalone : Bool = false
+    property weight_of_bold : Int32 = 600
+    property fs : Int32
+    property ystep : Int32
+
+    property theme : Theme
+
+    def initialize(@theme : Theme = Tartrazine.theme("default-dark"), *,
+                   @highlight_lines = [] of Range(Int32, Int32),
+                   @class_prefix : String = "",
+                   @line_number_id_prefix = "line-",
+                   @line_number_start = 1,
+                   @tab_width = 8,
+                   @line_numbers : Bool = false,
+                   @linkable_line_numbers : Bool = true,
+                   @standalone : Bool = false,
+                   @weight_of_bold : Int32 = 600,
+                   @font_family : String = "monospace",
+                   @font_size : String = "14px")
+      if font_size.ends_with? "px"
+        @fs = font_size[0...-2].to_i
+      else
+        @fs = font_size.to_i
+      end
+      @ystep = @fs + 5
+    end
+
+    def format(text : String, lexer : BaseLexer, io : IO) : Nil
+      pre, post = wrap_standalone
+      io << pre if standalone?
+      format_text(text, lexer, io)
+      io << post if standalone?
+    end
+
+    # Wrap text into a full HTML document, including the CSS for the theme
+    def wrap_standalone
+      output = String.build do |outp|
+        outp << %(<?xml version="1.0" encoding="utf-8"?>
+        <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.0//EN" "http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd">
+        <svg xmlns="http://www.w3.org/2000/svg">
+        <g font-family="#{self.@font_family}" font-size="#{self.@font_size}">)
+      end
+      {output.to_s, "</g></svg>"}
+    end
+
+    private def line_label(i : Int32, x : Int32, y : Int32) : String
+      line_label = "#{i + 1}".rjust(4).ljust(5)
+      line_style = highlighted?(i + 1) ? "font-weight=\"#{@weight_of_bold}\"" : ""
+      line_id = linkable_line_numbers? ? "id=\"#{line_number_id_prefix}#{i + 1}\"" : ""
+      %(<text #{line_style} #{line_id}  x="#{4*ystep}" y="#{y}" text-anchor="end">#{line_label}</text>)
+    end
+
+    def format_text(text : String, lexer : BaseLexer, outp : IO)
+      x = 0
+      y = ystep
+      i = 0
+      line_x = x
+      line_x += 5 * ystep if line_numbers?
+      tokenizer = lexer.tokenizer(text)
+      outp << line_label(i, x, y) if line_numbers?
+      outp << %(<text x="#{line_x}" y="#{y}" xml:space="preserve">)
+      tokenizer.each do |token|
+        if token[:value].ends_with? "\n"
+          outp << "<tspan #{get_style(token[:type])}>#{HTML.escape(token[:value][0...-1])}</tspan>"
+          outp << "</text>"
+          x = 0
+          y += ystep
+          i += 1
+          outp << line_label(i, x, y) if line_numbers?
+          outp << %(<text x="#{line_x}" y="#{y}" xml:space="preserve">)
+        else
+          outp << "<tspan#{get_style(token[:type])}>#{HTML.escape(token[:value])}</tspan>"
+          x += token[:value].size * ystep
+        end
+      end
+      outp << "</text>"
+    end
+
+    # Given a token type, return the style.
+    def get_style(token : String) : String
+      if !theme.styles.has_key? token
+        # Themes don't contain information for each specific
+        # token type. However, they may contain information
+        # for a parent style. Worst case, we go to the root
+        # (Background) style.
+        parent = theme.style_parents(token).reverse.find { |dad|
+          theme.styles.has_key?(dad)
+        }
+        theme.styles[token] = theme.styles[parent]
+      end
+      output = String.build do |outp|
+        style = theme.styles[token]
+        outp << " fill=\"##{style.color.try &.hex}\"" if style.color
+        # No support for background color or border in SVG
+
+        outp << " font-weight=\"#{@weight_of_bold}\"" if style.bold
+        outp << " font-weight=\"normal\"" if style.bold == false
+        outp << " font-style=\"italic\"" if style.italic
+        outp << " font-style=\"normal\"" if style.italic == false
+        outp << " text-decoration=\"underline\"" if style.underline
+        outp << " text-decoration=\"none" if style.underline == false
+      end
+      output
+    end
+  end
+end
--- a/src/heuristics.cr
+++ b/src/heuristics.cr
@@ -0,0 +1,81 @@
+require "yaml"
+
+# Use linguist's heuristics to disambiguate between languages
+# This is *shamelessly* stolen from https://github.com/github-linguist/linguist
+# and ported to Crystal. Deepest thanks to the authors of Linguist
+# for licensing it liberally.
+#
+# Consider this code (c) 2017 GitHub, Inc. even if I wrote it.
+module Linguist
+  class Heuristic
+    include YAML::Serializable
+
+    property disambiguations : Array(Disambiguation)
+    property named_patterns : Hash(String, String | Array(String))
+
+    # Run the heuristics on the given filename and content
+    def run(filename, content)
+      ext = File.extname filename
+      disambiguation = disambiguations.find do |item|
+        item.extensions.includes? ext
+      end
+      disambiguation.try &.run(content, named_patterns)
+    end
+  end
+
+  class Disambiguation
+    include YAML::Serializable
+    property extensions : Array(String)
+    property rules : Array(LangRule)
+
+    def run(content, named_patterns)
+      rules.each do |rule|
+        if rule.match(content, named_patterns)
+          return rule.language
+        end
+      end
+      nil
+    end
+  end
+
+  class LangRule
+    include YAML::Serializable
+    property pattern : (String | Array(String))?
+    property negative_pattern : (String | Array(String))?
+    property named_pattern : String?
+    property and : Array(LangRule)?
+    property language : String | Array(String)?
+
+    # ameba:disable Metrics/CyclomaticComplexity
+    def match(content, named_patterns)
+      # This rule matches without conditions
+      return true if !pattern && !negative_pattern && !named_pattern && !and
+
+      if pattern
+        p_arr = [] of String
+        p_arr << pattern.as(String) if pattern.is_a? String
+        p_arr = pattern.as(Array(String)) if pattern.is_a? Array(String)
+        return true if p_arr.any? { |pat| ::Regex.new(pat).matches?(content) }
+      end
+      if negative_pattern
+        p_arr = [] of String
+        p_arr << negative_pattern.as(String) if negative_pattern.is_a? String
+        p_arr = negative_pattern.as(Array(String)) if negative_pattern.is_a? Array(String)
+        return true if p_arr.none? { |pat| ::Regex.new(pat).matches?(content) }
+      end
+      if named_pattern
+        p_arr = [] of String
+        if named_patterns[named_pattern].is_a? String
+          p_arr << named_patterns[named_pattern].as(String)
+        else
+          p_arr = named_patterns[named_pattern].as(Array(String))
+        end
+        result = p_arr.any? { |pat| ::Regex.new(pat).matches?(content) }
+      end
+      if and
+        result = and.as(Array(LangRule)).all?(&.match(content, named_patterns))
+      end
+      result
+    end
+  end
+end
--- a/src/lexer.cr
+++ b/src/lexer.cr
@@ -0,0 +1,433 @@
+require "./constants/lexers"
+require "./heuristics"
+require "baked_file_system"
+require "crystal/syntax_highlighter"
+
+module Tartrazine
+  class LexerFiles
+    extend BakedFileSystem
+
+    macro bake_selected_lexers
+      {% for lexer in env("TT_LEXERS").split "," %}
+      bake_file {{ lexer }}+".xml", {{ read_file "#{__DIR__}/../lexers/" + lexer + ".xml" }}
+      {% end %}
+    end
+
+    {% if flag?(:nolexers) %}
+      bake_selected_lexers
+    {% else %}
+      bake_folder "../lexers", __DIR__
+    {% end %}
+  end
+
+  # Get the lexer object for a language name
+  def self.lexer(name : String? = nil, filename : String? = nil, mimetype : String? = nil) : BaseLexer
+    return lexer_by_name(name) if name && name != "autodetect"
+    return lexer_by_filename(filename) if filename
+    return lexer_by_mimetype(mimetype) if mimetype
+
+    RegexLexer.from_xml(LexerFiles.get("/#{LEXERS_BY_NAME["plaintext"]}.xml").gets_to_end)
+  end
+
+  private def self.lexer_by_mimetype(mimetype : String) : BaseLexer
+    lexer_file_name = LEXERS_BY_MIMETYPE.fetch(mimetype, nil)
+    raise Exception.new("Unknown mimetype: #{mimetype}") if lexer_file_name.nil?
+
+    RegexLexer.from_xml(LexerFiles.get("/#{lexer_file_name}.xml").gets_to_end)
+  end
+
+  private def self.lexer_by_name(name : String) : BaseLexer
+    return CrystalLexer.new if name == "crystal"
+    lexer_file_name = LEXERS_BY_NAME.fetch(name.downcase, nil)
+    return create_delegating_lexer(name) if lexer_file_name.nil? && name.includes? "+"
+    raise Exception.new("Unknown lexer: #{name}") if lexer_file_name.nil?
+
+    RegexLexer.from_xml(LexerFiles.get("/#{lexer_file_name}.xml").gets_to_end)
+  rescue ex : BakedFileSystem::NoSuchFileError
+    raise Exception.new("Unknown lexer: #{name}")
+  end
+
+  private def self.lexer_by_filename(filename : String) : BaseLexer
+    if filename.ends_with?(".cr")
+      return CrystalLexer.new
+    end
+
+    candidates = Set(String).new
+    LEXERS_BY_FILENAME.each do |k, v|
+      candidates += v.to_set if File.match?(k, File.basename(filename))
+    end
+
+    case candidates.size
+    when 0
+      lexer_file_name = LEXERS_BY_NAME["plaintext"]
+    when 1
+      lexer_file_name = candidates.first
+    else
+      lexer_file_name = self.lexer_by_content(filename)
+      begin
+        return self.lexer(lexer_file_name)
+      rescue ex : Exception
+        raise Exception.new("Multiple lexers match the filename: #{candidates.to_a.join(", ")}, heuristics suggest #{lexer_file_name} but there is no matching lexer.")
+      end
+    end
+
+    RegexLexer.from_xml(LexerFiles.get("/#{lexer_file_name}.xml").gets_to_end)
+  end
+
+  private def self.lexer_by_content(fname : String) : String?
+    h = Linguist::Heuristic.from_yaml(LexerFiles.get("/heuristics.yml").gets_to_end)
+    result = h.run(fname, File.read(fname))
+    case result
+    when Nil
+      raise Exception.new "No lexer found for #{fname}"
+    when String
+      result.as(String)
+    when Array(String)
+      result.first
+    end
+  end
+
+  private def self.create_delegating_lexer(name : String) : BaseLexer
+    language, root = name.split("+", 2)
+    language_lexer = lexer(language)
+    root_lexer = lexer(root)
+    DelegatingLexer.new(language_lexer, root_lexer)
+  end
+
+  # Return a list of all lexers
+  def self.lexers : Array(String)
+    file_map = LexerFiles.files.map(&.path)
+    LEXERS_BY_NAME.keys.select { |k| file_map.includes?("/#{k}.xml") }.sort!
+  end
+
+  # A token, the output of the tokenizer
+  alias Token = NamedTuple(type: String, value: String)
+
+  abstract class BaseTokenizer
+  end
+
+  class Tokenizer < BaseTokenizer
+    include Iterator(Token)
+    property lexer : BaseLexer
+    property text : Bytes
+    property pos : Int32 = 0
+    @dq = Deque(Token).new
+    property state_stack = ["root"]
+
+    def initialize(@lexer : BaseLexer, text : String, secondary = false)
+      # Respect the `ensure_nl` config option
+      if text.size > 0 && text[-1] != '\n' && @lexer.config[:ensure_nl] && !secondary
+        text += "\n"
+      end
+      @text = text.to_slice
+    end
+
+    def next : Iterator::Stop | Token
+      if @dq.size > 0
+        return @dq.shift
+      end
+      if pos == @text.size
+        return stop
+      end
+
+      matched = false
+      while @pos < @text.size
+        @lexer.states[@state_stack.last].rules.each do |rule|
+          matched, new_pos, new_tokens = rule.match(@text, @pos, self)
+          if matched
+            @pos = new_pos
+            split_tokens(new_tokens).each { |token| @dq << token }
+            break
+          end
+        end
+        if !matched
+          if @text[@pos] == 10u8
+            @dq << {type: "Text", value: "\n"}
+            @state_stack = ["root"]
+          else
+            @dq << {type: "Error", value: String.new(@text[@pos..@pos])}
+          end
+          @pos += 1
+          break
+        end
+      end
+      self.next
+    end
+
+    # If a token contains a newline, split it into two tokens
+    def split_tokens(tokens : Array(Token)) : Array(Token)
+      split_tokens = [] of Token
+      tokens.each do |token|
+        if token[:value].includes?("\n")
+          values = token[:value].split("\n")
+          values.each_with_index do |value, index|
+            value += "\n" if index < values.size - 1
+            split_tokens << {type: token[:type], value: value}
+          end
+        else
+          split_tokens << token
+        end
+      end
+      split_tokens
+    end
+  end
+
+  alias BaseLexer = Lexer
+
+  abstract class Lexer
+    property config = {
+      name:             "",
+      priority:         0.0,
+      case_insensitive: false,
+      dot_all:          false,
+      not_multiline:    false,
+      ensure_nl:        false,
+    }
+    property states = {} of String => State
+
+    def tokenizer(text : String, secondary = false) : BaseTokenizer
+      Tokenizer.new(self, text, secondary)
+    end
+  end
+
+  # This implements a lexer for Pygments RegexLexers as expressed
+  # in Chroma's XML serialization.
+  #
+  # For explanations on what actions and states do
+  # the Pygments documentation is a good place to start.
+  # https://pygments.org/docs/lexerdevelopment/
+  class RegexLexer < BaseLexer
+    # Collapse consecutive tokens of the same type for easier comparison
+    # and smaller output
+    def self.collapse_tokens(tokens : Array(Tartrazine::Token)) : Array(Tartrazine::Token)
+      result = [] of Tartrazine::Token
+      tokens = tokens.reject { |token| token[:value] == "" }
+      tokens.each do |token|
+        if result.empty?
+          result << token
+          next
+        end
+        last = result.last
+        if last[:type] == token[:type]
+          new_token = {type: last[:type], value: last[:value] + token[:value]}
+          result.pop
+          result << new_token
+        else
+          result << token
+        end
+      end
+      result
+    end
+
+    def self.from_xml(xml : String) : Lexer
+      l = RegexLexer.new
+      lexer = XML.parse(xml).first_element_child
+      if lexer
+        config = lexer.children.find { |node|
+          node.name == "config"
+        }
+        if config
+          l.config = {
+            name:             xml_to_s(config, name) || "",
+            priority:         xml_to_f(config, priority) || 0.0,
+            not_multiline:    xml_to_s(config, not_multiline) == "true",
+            dot_all:          xml_to_s(config, dot_all) == "true",
+            case_insensitive: xml_to_s(config, case_insensitive) == "true",
+            ensure_nl:        xml_to_s(config, ensure_nl) == "true",
+          }
+        end
+
+        rules = lexer.children.find { |node|
+          node.name == "rules"
+        }
+        if rules
+          # Rules contains states 🤷
+          rules.children.select { |node|
+            node.name == "state"
+          }.each do |state_node|
+            state = State.new
+            state.name = state_node["name"]
+            if l.states.has_key?(state.name)
+              raise Exception.new("Duplicate state: #{state.name}")
+            else
+              l.states[state.name] = state
+            end
+            # And states contain rules 🤷
+            state_node.children.select { |node|
+              node.name == "rule"
+            }.each do |rule_node|
+              case rule_node["pattern"]?
+              when nil
+                if rule_node.first_element_child.try &.name == "include"
+                  rule = IncludeStateRule.new(rule_node)
+                else
+                  rule = UnconditionalRule.new(rule_node)
+                end
+              else
+                rule = Rule.new(rule_node,
+                  multiline: !l.config[:not_multiline],
+                  dotall: l.config[:dot_all],
+                  ignorecase: l.config[:case_insensitive])
+              end
+              state.rules << rule
+            end
+          end
+        end
+      end
+      l
+    end
+  end
+
+  # A lexer that takes two lexers as arguments. A root lexer
+  # and a language lexer. Everything is scalled using the
+  # language lexer, afterwards all `Other` tokens are lexed
+  # using the root lexer.
+  #
+  # This is useful for things like template languages, where
+  # you have Jinja + HTML or Jinja + CSS and so on.
+  class DelegatingLexer < Lexer
+    property language_lexer : BaseLexer
+    property root_lexer : BaseLexer
+
+    def initialize(@language_lexer : BaseLexer, @root_lexer : BaseLexer)
+    end
+
+    def tokenizer(text : String, secondary = false) : DelegatingTokenizer
+      DelegatingTokenizer.new(self, text, secondary)
+    end
+  end
+
+  # This Tokenizer works with a DelegatingLexer. It first tokenizes
+  # using the language lexer, and "Other" tokens are tokenized using
+  # the root lexer.
+  class DelegatingTokenizer < BaseTokenizer
+    include Iterator(Token)
+    @dq = Deque(Token).new
+    @language_tokenizer : BaseTokenizer
+
+    def initialize(@lexer : DelegatingLexer, text : String, secondary = false)
+      # Respect the `ensure_nl` config option
+      if text.size > 0 && text[-1] != '\n' && @lexer.config[:ensure_nl] && !secondary
+        text += "\n"
+      end
+      @language_tokenizer = @lexer.language_lexer.tokenizer(text, true)
+    end
+
+    def next : Iterator::Stop | Token
+      if @dq.size > 0
+        return @dq.shift
+      end
+      token = @language_tokenizer.next
+      if token.is_a? Iterator::Stop
+        return stop
+      elsif token.as(Token).[:type] == "Other"
+        root_tokenizer = @lexer.root_lexer.tokenizer(token.as(Token).[:value], true)
+        root_tokenizer.each do |root_token|
+          @dq << root_token
+        end
+      else
+        @dq << token.as(Token)
+      end
+      self.next
+    end
+  end
+
+  # A Lexer state. A state has a name and a list of rules.
+  # The state machine has a state stack containing references
+  # to states to decide which rules to apply.
+  struct State
+    property name : String = ""
+    property rules = [] of BaseRule
+
+    def +(other : State)
+      new_state = State.new
+      new_state.name = Random.base58(8)
+      new_state.rules = rules + other.rules
+      new_state
+    end
+  end
+
+  class CustomCrystalHighlighter < Crystal::SyntaxHighlighter
+    @tokens = [] of Token
+
+    def highlight(text)
+      super
+    rescue ex : Crystal::SyntaxException
+      # Fallback to Ruby highlighting
+      @tokens = Tartrazine.lexer("ruby").tokenizer(text).to_a
+    end
+
+    def render_delimiter(&block)
+      @tokens << {type: "LiteralString", value: block.call.to_s}
+    end
+
+    def render_interpolation(&block)
+      @tokens << {type: "LiteralStringInterpol", value: "\#{"}
+      @tokens << {type: "Text", value: block.call.to_s}
+      @tokens << {type: "LiteralStringInterpol", value: "}"}
+    end
+
+    def render_string_array(&block)
+      @tokens << {type: "LiteralString", value: block.call.to_s}
+    end
+
+    # ameba:disable Metrics/CyclomaticComplexity
+    def render(type : TokenType, value : String)
+      case type
+      when .comment?
+        @tokens << {type: "Comment", value: value}
+      when .number?
+        @tokens << {type: "LiteralNumber", value: value}
+      when .char?
+        @tokens << {type: "LiteralStringChar", value: value}
+      when .symbol?
+        @tokens << {type: "LiteralStringSymbol", value: value}
+      when .const?
+        @tokens << {type: "NameConstant", value: value}
+      when .string?
+        @tokens << {type: "LiteralString", value: value}
+      when .ident?
+        @tokens << {type: "NameVariable", value: value}
+      when .keyword?, .self?
+        @tokens << {type: "NameKeyword", value: value}
+      when .primitive_literal?
+        @tokens << {type: "Literal", value: value}
+      when .operator?
+        @tokens << {type: "Operator", value: value}
+      when Crystal::SyntaxHighlighter::TokenType::DELIMITED_TOKEN, Crystal::SyntaxHighlighter::TokenType::DELIMITER_START, Crystal::SyntaxHighlighter::TokenType::DELIMITER_END
+        @tokens << {type: "LiteralString", value: value}
+      else
+        @tokens << {type: "Text", value: value}
+      end
+    end
+  end
+
+  class CrystalTokenizer < Tartrazine::BaseTokenizer
+    include Iterator(Token)
+    @hl = CustomCrystalHighlighter.new
+    @lexer : BaseLexer
+    @iter : Iterator(Token)
+
+    # delegate next, to: @iter
+
+    def initialize(@lexer : BaseLexer, text : String, secondary = false)
+      # Respect the `ensure_nl` config option
+      if text.size > 0 && text[-1] != '\n' && @lexer.config[:ensure_nl] && !secondary
+        text += "\n"
+      end
+      # Just do the tokenizing
+      @hl.highlight(text)
+      @iter = @hl.@tokens.each
+    end
+
+    def next : Iterator::Stop | Token
+      @iter.next
+    end
+  end
+
+  class CrystalLexer < BaseLexer
+    def tokenizer(text : String, secondary = false) : BaseTokenizer
+      CrystalTokenizer.new(self, text, secondary)
+    end
+  end
+end
--- a/src/main.cr
+++ b/src/main.cr
@@ -1,5 +1,122 @@
-require "./**"
+require "docopt"
+require "./tartrazine"

-lexer = Tartrazine.lexer("crystal")
-theme = Tartrazine.theme(ARGV[1])
-puts Tartrazine::Html.new.format(File.read(ARGV[0]), lexer, theme)
+HELP = <<-HELP
+tartrazine: a syntax highlighting tool
+
+You can use the CLI to generate HTML, terminal, JSON or SVG output
+from a source file using different themes.
+Keep in mind that not all formatters support all features.
+
+Usage:
+  tartrazine (-h, --help)
+  tartrazine FILE -f html [-t theme][--standalone [--template file]]
+                          [--line-numbers][-l lexer][-o output]
+  tartrazine -f html -t theme --css
+  tartrazine FILE -f terminal [-t theme][-l lexer][--line-numbers]
+                              [-o output]
+  tartrazine FILE -f svg  [-t theme][--standalone][--line-numbers]
+                          [-l lexer][-o output]
+  tartrazine FILE -f png  [-t theme][--line-numbers]
+                          [-l lexer][-o output]
+  tartrazine FILE -f json [-o output]
+  tartrazine --list-themes
+  tartrazine --list-lexers
+  tartrazine --list-formatters
+  tartrazine --version
+
+Options:
+  -f <formatter>      Format to use (html, terminal, json, svg)
+  -t <theme>          Theme to use, see --list-themes [default: default-dark]
+  -l <lexer>          Lexer (language) to use, see --list-lexers. Use more than
+                      one lexer with "+" (e.g. jinja+yaml) [default: autodetect]
+  -o <output>         Output file. Default is stdout.
+  --standalone        Generate a standalone HTML file, which includes
+                      all style information. If not given, it will generate just
+                      a HTML fragment ready to include in your own page.
+  --css               Generate a CSS file for the theme called <theme>.css
+  --template <file>   Use a custom template for the HTML output [default: none]
+  --line-numbers      Include line numbers in the output
+  -h, --help          Show this screen
+  -v, --version       Show version number
+HELP
+
+options = Docopt.docopt(HELP, ARGV)
+
+# Handle version manually
+if options["--version"]
+  puts "tartrazine #{Tartrazine::VERSION}"
+  exit 0
+end
+
+if options["--list-themes"]
+  puts Tartrazine.themes.join("\n")
+  exit 0
+end
+
+if options["--list-lexers"]
+  puts Tartrazine.lexers.join("\n")
+  exit 0
+end
+
+if options["--list-formatters"]
+  puts "html\njson\nterminal"
+  exit 0
+end
+
+theme = Tartrazine.theme(options["-t"].as(String))
+template = options["--template"].as(String)
+if template != "none" # Otherwise we will use the default template
+  template = File.open(template).gets_to_end
+else
+  template = nil
+end
+
+if options["-f"]
+  formatter = options["-f"].as(String)
+  case formatter
+  when "html"
+    formatter = Tartrazine::Html.new
+    formatter.standalone = options["--standalone"] != nil
+    formatter.line_numbers = options["--line-numbers"] != nil
+    formatter.theme = theme
+    formatter.template = template if template
+  when "terminal"
+    formatter = Tartrazine::Ansi.new
+    formatter.line_numbers = options["--line-numbers"] != nil
+    formatter.theme = theme
+  when "json"
+    formatter = Tartrazine::Json.new
+  when "svg"
+    formatter = Tartrazine::Svg.new
+    formatter.standalone = options["--standalone"] != nil
+    formatter.line_numbers = options["--line-numbers"] != nil
+    formatter.theme = theme
+  when "png"
+    formatter = Tartrazine::Png.new
+    formatter.line_numbers = options["--line-numbers"] != nil
+    formatter.theme = theme
+  else
+    puts "Invalid formatter: #{formatter}"
+    exit 1
+  end
+
+  if formatter.is_a?(Tartrazine::Html) && options["--css"]
+    File.open("#{options["-t"].as(String)}.css", "w") do |outf|
+      outf << formatter.style_defs
+    end
+    exit 0
+  end
+
+  lexer = Tartrazine.lexer(name: options["-l"].as(String), filename: options["FILE"].as(String))
+
+  input = File.open(options["FILE"].as(String)).gets_to_end
+
+  if options["-o"].nil?
+    outf = STDOUT
+  else
+    outf = File.open(options["-o"].as(String), "w")
+  end
+  formatter.format(input, lexer, outf)
+  outf.close
+end
--- a/src/rules.cr
+++ b/src/rules.cr
@@ -1,9 +1,9 @@
 require "./actions"
-require "./constants"
+require "./bytes_regex"
 require "./formatter"
+require "./lexer"
 require "./rules"
 require "./styles"
-require "./tartrazine"

 # These are lexer rules. They match with the text being parsed
 # and perform actions, either emitting tokens or changing the
@@ -11,37 +11,14 @@ require "./tartrazine"
 module Tartrazine
  # This rule matches via a regex pattern

-  class Rule
-    property pattern : Regex = Re2.new ""
-    property actions : Array(Action) = [] of Action
-    property xml : String = "foo"
+  alias Regex = BytesRegex::Regex
+  alias Match = BytesRegex::Match
+  alias MatchData = Array(Match)

-    def match(text, pos, lexer) : Tuple(Bool, Int32, Array(Token))
-      match = pattern.match(text, pos)
-      # We don't match if the match doesn't move the cursor
-      # because that causes infinite loops
-      return false, pos, [] of Token if match.nil? || match.end == 0
-      # Log.trace { "#{match}, #{pattern.inspect}, #{text}, #{pos}" }
-      tokens = [] of Token
-      # Emit the tokens
-      actions.each do |action|
-        # Emit the token
-        tokens += action.emit(match, lexer)
-      end
-      Log.trace { "#{xml}, #{match.end}, #{tokens}" }
-      return true, match.end, tokens
-    end
+  abstract struct BaseRule
+    abstract def match(text : Bytes, pos : Int32, tokenizer : Tokenizer) : Tuple(Bool, Int32, Array(Token))

-    def initialize(node : XML::Node, multiline, dotall, ignorecase)
-      @xml = node.to_s
-      @pattern = Re2.new(
-        node["pattern"],
-        multiline,
-        dotall,
-        ignorecase,
-        anchored: true)
-      add_actions(node)
-    end
+    @actions : Array(Action) = [] of Action

    def add_actions(node : XML::Node)
      node.children.each do |child|
@@ -51,23 +28,39 @@ module Tartrazine
    end
  end

+  struct Rule < BaseRule
+    property pattern : Regex = Regex.new ""
+
+    def match(text : Bytes, pos, tokenizer) : Tuple(Bool, Int32, Array(Token))
+      match = pattern.match(text, pos)
+
+      # No match
+      return false, pos, [] of Token if match.size == 0
+      return true, pos + match[0].size, @actions.flat_map(&.emit(match, tokenizer))
+    end
+
+    def initialize(node : XML::Node, multiline, dotall, ignorecase)
+      pattern = node["pattern"]
+      pattern = "(?m)" + pattern if multiline
+      @pattern = Regex.new(pattern, multiline, dotall, ignorecase, true)
+      add_actions(node)
+    end
+  end
+
  # This rule includes another state. If any of the rules of the
  # included state matches, this rule matches.
-  class IncludeStateRule < Rule
-    property state : String = ""
+  struct IncludeStateRule < BaseRule
+    @state : String = ""

-    def match(text, pos, lexer) : Tuple(Bool, Int32, Array(Token))
-      Log.trace { "Including state #{state} from #{lexer.state_stack.last}" }
-      lexer.states[state].rules.each do |rule|
-        matched, new_pos, new_tokens = rule.match(text, pos, lexer)
-        Log.trace { "#{xml}, #{new_pos}, #{new_tokens}" } if matched
+    def match(text : Bytes, pos : Int32, tokenizer : Tokenizer) : Tuple(Bool, Int32, Array(Token))
+      tokenizer.@lexer.states[@state].rules.each do |rule|
+        matched, new_pos, new_tokens = rule.match(text, pos, tokenizer)
        return true, new_pos, new_tokens if matched
      end
      return false, pos, [] of Token
    end

    def initialize(node : XML::Node)
-      @xml = node.to_s
      include_node = node.children.find { |child|
        child.name == "include"
      }
@@ -77,39 +70,15 @@ module Tartrazine
  end

  # This rule always matches, unconditionally
-  class UnconditionalRule < Rule
-    def match(text, pos, lexer) : Tuple(Bool, Int32, Array(Token))
-      tokens = [] of Token
-      actions.each do |action|
-        tokens += action.emit(nil, lexer)
-      end
-      return true, pos, tokens
+  struct UnconditionalRule < BaseRule
+    NO_MATCH = [] of Match
+
+    def match(text, pos, tokenizer) : Tuple(Bool, Int32, Array(Token))
+      return true, pos, @actions.flat_map(&.emit(NO_MATCH, tokenizer))
    end

    def initialize(node : XML::Node)
-      @xml = node.to_s
      add_actions(node)
    end
  end
-
-  # This is a hack to workaround that Crystal seems to disallow
-  # having regexes multiline but not dot_all
-  class Re2 < Regex
-    @source = "fa"
-    @options = Regex::Options::None
-    @jit = true
-
-    def initialize(pattern : String, multiline = false, dotall = false, ignorecase = false, anchored = false)
-      flags = LibPCRE2::UTF | LibPCRE2::DUPNAMES |
-              LibPCRE2::UCP
-      flags |= LibPCRE2::MULTILINE if multiline
-      flags |= LibPCRE2::DOTALL if dotall
-      flags |= LibPCRE2::CASELESS if ignorecase
-      flags |= LibPCRE2::ANCHORED if anchored
-      flags |= LibPCRE2::NO_UTF_CHECK
-      @re = Regex::PCRE2.compile(pattern, flags) do |error_message|
-        raise Exception.new(error_message)
-      end
-    end
-  end
 end
--- a/src/run_tests.cr
+++ b/src/run_tests.cr
@@ -0,0 +1 @@
+require "../spec/tartrazine_spec.cr"
--- a/src/styles.cr
+++ b/src/styles.cr
@@ -1,5 +1,4 @@
 require "./actions"
-require "./constants"
 require "./formatter"
 require "./rules"
 require "./styles"
@@ -10,17 +9,50 @@ require "xml"
 module Tartrazine
  alias Color = Sixteen::Color

-  def self.theme(name : String) : Theme
-    return Theme.from_base16(name[7..]) if name.starts_with? "base16_"
-    Theme.from_xml(ThemeFiles.get("/#{name}.xml").gets_to_end)
-  end
-
-  class ThemeFiles
+  struct ThemeFiles
    extend BakedFileSystem
-    bake_folder "../styles", __DIR__
+
+    macro bake_selected_themes
+      {% if env("TT_THEMES") %}
+      {% for theme in env("TT_THEMES").split "," %}
+      bake_file {{ theme }}+".xml", {{ read_file "#{__DIR__}/../styles/" + theme + ".xml" }}
+      {% end %}
+      {% end %}
    end

-  class Style
+    {% if flag?(:nothemes) %}
+      bake_selected_themes
+    {% else %}
+      bake_folder "../styles", __DIR__
+    {% end %}
+  end
+
+  def self.theme(name : String) : Theme
+    begin
+      return Theme.from_base16(name)
+    rescue ex : Exception
+      raise ex unless ex.message.try &.includes? "Theme not found"
+    end
+    begin
+      Theme.from_xml(ThemeFiles.get("/#{name}.xml").gets_to_end)
+    rescue ex : Exception
+      raise Exception.new("Error loading theme #{name}: #{ex.message}")
+    end
+  end
+
+  # Return a list of all themes
+  def self.themes
+    themes = Set(String).new
+    ThemeFiles.files.each do |file|
+      themes << file.path.split("/").last.split(".").first
+    end
+    Sixteen::DataFiles.files.each do |file|
+      themes << file.path.split("/").last.split(".").first
+    end
+    themes.to_a.sort!
+  end
+
+  struct Style
    # These properties are tri-state.
    # true means it's set
    # false means it's not set
@@ -60,32 +92,11 @@ module Tartrazine
    end
  end

-  class Theme
+  struct Theme
    property name : String = ""

    property styles = {} of String => Style

-    # Get the style for a token.
-    def style(token)
-      styles[token] = Style.new unless styles.has_key?(token)
-      s = styles[token]
-
-      # We already got the data from the style hierarchy
-      return s if s.complete?
-
-      # Form the hierarchy of parent styles
-      parents = style_parents(token)
-
-      s = parents.map do |parent|
-        styles[parent]
-      end.reduce(s) do |acc, style|
-        acc + style
-      end
-      s.complete = true
-      styles[token] = s
-      s
-    end
-
    def style_parents(token)
      parents = ["Background"]
      parts = token.underscore.split("_").map(&.capitalize)
@@ -103,7 +114,8 @@ module Tartrazine
      # The color assignments are adapted from
      # https://github.com/mohd-akram/base16-pygments/

-      theme.styles["Background"] = Style.new(color: t["base05"], background: t["base00"])
+      theme.styles["Background"] = Style.new(color: t["base05"], background: t["base00"], bold: true)
+      theme.styles["LineHighlight"] = Style.new(color: t["base0D"], background: t["base01"])
      theme.styles["Text"] = Style.new(color: t["base05"])
      theme.styles["Error"] = Style.new(color: t["base08"])
      theme.styles["Comment"] = Style.new(color: t["base03"])
@@ -162,7 +174,26 @@ module Tartrazine

        theme.styles[node["type"]] = s
      end
+      # We really want a LineHighlight class
+      if !theme.styles.has_key?("LineHighlight")
+        theme.styles["LineHighlight"] = Style.new
+        theme.styles["LineHighlight"].background = make_highlight_color(theme.styles["Background"].background)
+        theme.styles["LineHighlight"].bold = true
+      end
      theme
    end
+
+    # If the color is dark, make it brighter and viceversa
+    def self.make_highlight_color(base_color)
+      if base_color.nil?
+        # WHo knows
+        return Color.new(127, 127, 127)
+      end
+      if base_color.dark?
+        base_color.lighter(0.2)
+      else
+        base_color.darker(0.2)
+      end
+    end
  end
 end
--- a/src/tartrazine.cr
+++ b/src/tartrazine.cr
@@ -1,6 +1,6 @@
 require "./actions"
-require "./constants"
 require "./formatter"
+require "./formatters/**"
 require "./rules"
 require "./styles"
 require "./tartrazine"
@@ -12,189 +12,9 @@ require "xml"

 module Tartrazine
  extend self
-  VERSION = "0.1.1"
+  VERSION = {{ `shards version #{__DIR__}`.chomp.stringify }}

  Log = ::Log.for("tartrazine")
-
-  # This implements a lexer for Pygments RegexLexers as expressed
-  # in Chroma's XML serialization.
-  #
-  # For explanations on what actions and states do
-  # the Pygments documentation is a good place to start.
-  # https://pygments.org/docs/lexerdevelopment/
-
-  # A Lexer state. A state has a name and a list of rules.
-  # The state machine has a state stack containing references
-  # to states to decide which rules to apply.
-  class State
-    property name : String = ""
-    property rules = [] of Rule
-
-    def +(other : State)
-      new_state = State.new
-      new_state.name = Random.base58(8)
-      new_state.rules = rules + other.rules
-      new_state
-    end
-  end
-
-  class LexerFiles
-    extend BakedFileSystem
-
-    bake_folder "../lexers", __DIR__
-  end
-
-  # A token, the output of the tokenizer
-  alias Token = NamedTuple(type: String, value: String)
-
-  class Lexer
-    property config = {
-      name:             "",
-      aliases:          [] of String,
-      filenames:        [] of String,
-      mime_types:       [] of String,
-      priority:         0.0,
-      case_insensitive: false,
-      dot_all:          false,
-      not_multiline:    false,
-      ensure_nl:        false,
-    }
-    property xml : String = ""
-
-    property states = {} of String => State
-
-    property state_stack = ["root"]
-
-    # Turn the text into a list of tokens. The `usingself` parameter
-    # is true when the lexer is being used to tokenize a string
-    # from a larger text that is already being tokenized.
-    # So, when it's true, we don't modify the text.
-    def tokenize(text, usingself = false) : Array(Token)
-      @state_stack = ["root"]
-      tokens = [] of Token
-      pos = 0
-      matched = false
-
-      # Respect the `ensure_nl` config option
-      if text.size > 0 && text[-1] != '\n' && config[:ensure_nl] && !usingself
-        text += "\n"
-      end
-
-      # Loop through the text, applying rules
-      while pos < text.size
-        state = states[@state_stack.last]
-        # Log.trace { "Stack is #{@state_stack} State is #{state.name}, pos is #{pos}, text is #{text[pos..pos + 10]}" }
-        state.rules.each do |rule|
-          matched, new_pos, new_tokens = rule.match(text, pos, self)
-          if matched
-            # Move position forward, save the tokens,
-            # tokenize from the new position
-            # Log.trace { "MATCHED: #{rule.xml}" }
-            pos = new_pos
-            tokens += new_tokens
-            break
-          end
-          # Log.trace { "NOT MATCHED: #{rule.xml}" }
-        end
-        # If no rule matches, emit an error token
-        unless matched
-          # Log.trace { "Error at #{pos}" }
-          tokens << {type: "Error", value: "#{text[pos]}"}
-          pos += 1
-        end
-      end
-      Lexer.collapse_tokens(tokens)
-    end
-
-    # Collapse consecutive tokens of the same type for easier comparison
-    # and smaller output
-    def self.collapse_tokens(tokens : Array(Tartrazine::Token)) : Array(Tartrazine::Token)
-      result = [] of Tartrazine::Token
-      tokens = tokens.reject { |token| token[:value] == "" }
-      tokens.each do |token|
-        if result.empty?
-          result << token
-          next
-        end
-        last = result.last
-        if last[:type] == token[:type]
-          new_token = {type: last[:type], value: last[:value] + token[:value]}
-          result.pop
-          result << new_token
-        else
-          result << token
-        end
-      end
-      result
-    end
-
-    # ameba:disable Metrics/CyclomaticComplexity
-    def self.from_xml(xml : String) : Lexer
-      l = Lexer.new
-      l.xml = xml
-      lexer = XML.parse(xml).first_element_child
-      if lexer
-        config = lexer.children.find { |node|
-          node.name == "config"
-        }
-        if config
-          l.config = {
-            name:             xml_to_s(config, name) || "",
-            aliases:          xml_to_a(config, _alias) || [] of String,
-            filenames:        xml_to_a(config, filename) || [] of String,
-            mime_types:       xml_to_a(config, mime_type) || [] of String,
-            priority:         xml_to_f(config, priority) || 0.0,
-            not_multiline:    xml_to_s(config, not_multiline) == "true",
-            dot_all:          xml_to_s(config, dot_all) == "true",
-            case_insensitive: xml_to_s(config, case_insensitive) == "true",
-            ensure_nl:        xml_to_s(config, ensure_nl) == "true",
-          }
-        end
-
-        rules = lexer.children.find { |node|
-          node.name == "rules"
-        }
-        if rules
-          # Rules contains states 🤷
-          rules.children.select { |node|
-            node.name == "state"
-          }.each do |state_node|
-            state = State.new
-            state.name = state_node["name"]
-            if l.states.has_key?(state.name)
-              raise Exception.new("Duplicate state: #{state.name}")
-            else
-              l.states[state.name] = state
-            end
-            # And states contain rules 🤷
-            state_node.children.select { |node|
-              node.name == "rule"
-            }.each do |rule_node|
-              case rule_node["pattern"]?
-              when nil
-                if rule_node.first_element_child.try &.name == "include"
-                  rule = IncludeStateRule.new(rule_node)
-                else
-                  rule = UnconditionalRule.new(rule_node)
-                end
-              else
-                rule = Rule.new(rule_node,
-                  multiline: !l.config[:not_multiline],
-                  dotall: l.config[:dot_all],
-                  ignorecase: l.config[:case_insensitive])
-              end
-              state.rules << rule
-            end
-          end
-        end
-      end
-      l
-    end
-  end
-
-  def self.lexer(name : String) : Lexer
-    Lexer.from_xml(LexerFiles.get("/#{name}.xml").gets_to_end)
-  end
 end

 # Convenience macros to parse XML
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Roberto Alsina	d1ce83a9c8	fix: Don't log when falling back to ruby, it breaks stuff Some checks failed Tests / build (push) Has been cancelled Details Coverage / build (push) Has been cancelled Details	2025-03-09 18:38:42 -03:00
Roberto Alsina	aabe028767	feat: Support custom template for HTML standalone output	2025-02-21 19:43:09 -03:00
Roberto Alsina	ed61a84553	fix: when the internal crystal highlighter fails, fallback to ruby. Fixes #13	2025-02-20 13:24:53 -03:00
Roberto Alsina	b7e4aaa1f9	chore: typo	2025-02-19 09:38:12 -03:00
Roberto Alsina	0f6b9b0117	fix: better error message when loading a XML theme	2025-02-18 21:45:26 -03:00
Roberto Alsina	b81e9c4405	chore: upgrade ci image	2025-01-21 15:11:06 -03:00
Roberto Alsina	9c85c6cf18	bump: Release v0.12.0	2025-01-21 14:37:49 -03:00
Roberto Alsina	d4e189e596	chore: mark more mcfunction tests as bad	2025-01-21 14:37:10 -03:00
Roberto Alsina	71fb699f96	chore: Pin ubuntu version in CI	2025-01-21 14:37:03 -03:00
Roberto Alsina	5fe309f24c	feat: Bumped to latest chroma release	2025-01-21 12:31:39 -03:00
Roberto Alsina	62db71ae4d	build: automate AUR release Some checks failed Tests / build (push) Has been cancelled Details Coverage / build (push) Has been cancelled Details	2024-10-14 17:27:31 -03:00
Roberto Alsina	fff6cad5ac	bump: Release v0.11.1	2024-10-14 16:56:17 -03:00
Roberto Alsina	44e6af8546	fix: support choosing lexers when used as a library	2024-10-14 16:45:50 -03:00
Roberto Alsina	9e2585a875	bump: Release v0.11.0	2024-10-14 13:28:47 -03:00
Roberto Alsina	c16b139fa3	feat: support selecting only some themes Some checks are pending Tests / build (push) Waiting to run Details	2024-10-14 13:11:22 -03:00
Roberto Alsina	e11775040c	chore(build): strip static binary Some checks failed Tests / build (push) Has been cancelled Details	2024-09-26 20:50:14 -03:00
Roberto Alsina	30bc8cccba	chore: build before tag	2024-09-26 20:47:58 -03:00
Roberto Alsina	1638c253cb	bump: Release v0.10.0	2024-09-26 20:35:51 -03:00
Roberto Alsina	c374f52aee	Merge pull request #9 from ralsina/conditional-lexers-and-themes Conditional lexers and themes	2024-09-26 20:34:24 -03:00
Roberto Alsina	96fd9bdfe9	fix: Fix metadata to show crystal	2024-09-26 18:47:47 -03:00
Roberto Alsina	0423811c5d	feat: optional conditional baking of lexers	2024-09-26 18:47:47 -03:00
Roberto Alsina	3d9d3ab5cf	fix: strip binaries for release artifacts Some checks failed Tests / build (push) Has been cancelled Details	2024-09-21 21:28:13 -03:00
Roberto Alsina	92a97490f1	bump: Release v0.9.1	2024-09-21 21:08:41 -03:00
Roberto Alsina	22decedf3a	test: added minimal tests for svg and png formatters Some checks are pending Tests / build (push) Waiting to run Details	2024-09-21 21:08:03 -03:00
Roberto Alsina	8b34a1659d	fix: Bug in high-level API for png formatter	2024-09-21 21:07:44 -03:00
Roberto Alsina	3bf8172b89	fix: Terminal formatter was skipping things that it could highlight	2024-09-21 20:57:24 -03:00
Roberto Alsina	4432da2893	bump: Release v0.9.0	2024-09-21 20:33:24 -03:00
Roberto Alsina	6a6827f26a	feat: PNG writer based on Stumpy libs	2024-09-21 20:22:30 -03:00
Roberto Alsina	766f9b4708	chore: Improve changelog handling	2024-09-21 14:08:07 -03:00
Roberto Alsina	9d49ff78d6	chore: detect version bump in release script	2024-09-21 14:05:14 -03:00
Roberto Alsina	fb924543a0	chore: clean	2024-09-21 14:00:13 -03:00
Roberto Alsina	09d4b7b02e	bump: Release v0.8.0	2024-09-21 13:40:01 -03:00
Roberto Alsina	08e81683ca	fix: HTML formatter was setting bold wrong	2024-09-21 13:36:31 -03:00
Roberto Alsina	9c70fbf389	feat: SVG formatter	2024-09-21 12:56:40 -03:00
Roberto Alsina	d26393d8c9	chore: fix example code in README Some checks failed Tests / build (push) Has been cancelled Details	2024-09-19 11:20:08 -03:00
Roberto Alsina	c95658320c	docs: added instructions to add as a dependency Some checks are pending Tests / build (push) Waiting to run Details	2024-09-18 10:58:52 -03:00
Roberto Alsina	ca988defc1	chore(ignore): skip chore+ignore commits in changelog	2024-09-18 10:58:15 -03:00
Roberto Alsina	687c6c81df	bump: Release v0.7.0	2024-09-18 10:58:15 -03:00
Roberto Alsina	ac8b7e3800	test: added tests for CSS generation Some checks failed Tests / build (push) Has been cancelled Details Coverage / build (push) Has been cancelled Details	2024-09-10 22:38:04 -03:00
Roberto Alsina	e288a55812	test: add basic tests for crystal and delegating lexers	2024-09-10 21:57:05 -03:00
Roberto Alsina	11cb5fc48e	Merge pull request #3 from ralsina/crystal-native-tokenizer Some checks are pending Tests / build (push) Waiting to run Details feat: use the native crystal highlighter	2024-09-09 21:52:39 -03:00
Roberto Alsina	bf2f08c1d0	fix: make install work	2024-09-09 16:16:33 -03:00
Roberto Alsina	84980459cf	feat: use the native crystal highlighter The chroma highlighter for crystal is not great, because the pygments one special cases things like heredocs and that got lost in translation. Since the crystal compiler comes with a highlighter why not use it?	2024-09-09 16:14:35 -03:00
Roberto Alsina	c011bd8347	feat: higher level API (`to_html` and `to_ansi`) Some checks failed Tests / build (push) Has been cancelled Details	2024-09-05 12:03:33 -03:00
Roberto Alsina	6a38f2f5fb	chore(ignore): removed random file Some checks are pending Tests / build (push) Waiting to run Details	2024-09-04 11:56:52 -03:00
Roberto Alsina	c4a2d1a752	chore: Added badges	2024-09-04 11:44:33 -03:00
Roberto Alsina	358be51e27	chore: Added badges	2024-09-04 11:42:48 -03:00
Roberto Alsina	2cff0fea48	test: Add CI workflows	2024-09-04 11:39:14 -03:00
Roberto Alsina	40202eb2d6	chore(ignore): fix tests	2024-09-04 11:37:36 -03:00
Roberto Alsina	3ed4a7eab8	fix: renamed BaseLexer to Lexer and Lexer to RegexLexer to make API nicer	2024-09-04 11:37:36 -03:00
Roberto Alsina	6f797f999a	fix: make it easier to import the Ansi formatter	2024-09-04 11:37:36 -03:00
Roberto Alsina	b762307660	bump: Release v0.6.4	2024-09-04 11:37:36 -03:00
Roberto Alsina	eb0cc089a9	fix: variable bame in Hacefile	2024-09-04 11:37:36 -03:00
Roberto Alsina	88f2aace20	bump: Release v0.6.4	2024-09-04 11:37:36 -03:00
Roberto Alsina	fe943fa399	build: fix markdown check	2024-09-04 11:37:36 -03:00
Roberto Alsina	08f8138e05	fix: ameba	2024-09-04 11:37:36 -03:00
Roberto Alsina	3c0b3c38e2	build: added do_release script	2024-09-04 11:37:36 -03:00
Roberto Alsina	a1318501a5	build: switch from Makefile to Hacefile	2024-09-04 11:37:33 -03:00
Roberto Alsina	daf24189bf	chore: updated pre-commit	2024-09-04 11:37:04 -03:00
Roberto Alsina	3d3f9fcc24	chore: force conventional commit messages	2024-08-26 21:27:38 -03:00
Roberto Alsina	a583b7359e	docs: Mention AUR package	2024-08-26 20:30:37 -03:00
Roberto Alsina	de2a4a1996	chore: force conventional commit messages	2024-08-26 20:30:12 -03:00
Roberto Alsina	31334ac802	chore: Started changelog	2024-08-26 20:20:26 -03:00
Roberto Alsina	6d64491938	chore: git-cliff config	2024-08-26 20:19:02 -03:00
Roberto Alsina	fb693bb221	chore: pre-commit hooks	2024-08-26 20:18:28 -03:00
Roberto Alsina	c6824a99df	Use latest sixteen release	2024-08-26 17:09:31 -03:00
Roberto Alsina	4dd2e925b0	Fix bug in ansi formatter	2024-08-26 16:44:44 -03:00
Roberto Alsina	7bda19cdea	Use forked baked_file_system for now	2024-08-25 17:05:04 -03:00
Roberto Alsina	0e7dafe711	Updated README	2024-08-24 22:33:24 -03:00
Roberto Alsina	082241eb0f	Load lexer by mimetype	2024-08-24 22:20:38 -03:00
Roberto Alsina	df88047ca8	v0.6.1	2024-08-24 21:45:57 -03:00
Roberto Alsina	5a3b50d7a3	Integrate heuristics into lexer selection	2024-08-24 21:39:39 -03:00
Roberto Alsina	a5926af518	Comments	2024-08-24 20:53:14 -03:00
Roberto Alsina	fc9f834bc8	Make it work again	2024-08-24 20:09:29 -03:00
Roberto Alsina	58fd42d936	Rebase to main	2024-08-24 19:59:05 -03:00
Roberto Alsina	5a88a51f3e	Implement heuristics from linguist	2024-08-24 19:55:56 -03:00
Roberto Alsina	fd7c6fa4b3	Sort of working?	2024-08-24 19:55:56 -03:00
Roberto Alsina	6264bfc754	Beginning deserialization of data	2024-08-24 19:55:56 -03:00
Roberto Alsina	38196d6e96	Rst lexer	2024-08-24 19:49:02 -03:00
Roberto Alsina	c6cd74e339	248 languages	2024-08-23 14:49:01 -03:00
Roberto Alsina	17c66a6572	typo	2024-08-23 14:46:26 -03:00
Roberto Alsina	cd7e150aae	Merge pull request #1 from ralgozino/docs/improve-v0.6.0-instructions docs: improve readme and help message	2024-08-23 14:45:56 -03:00
Ramiro Algozino	176b8e9bc9	docs: improve readme and help message - Add example for printing output to the terminal - Fix example for usage as CLI tool (missing -f flag) - Add instructions in the help message for combining lexers	2024-08-23 18:30:14 +02:00
Roberto Alsina	d8ddf5d8b6	v0.6.0	2024-08-23 10:39:08 -03:00
Roberto Alsina	06556877ef	Merge branch 'more_lexers'	2024-08-23 10:34:17 -03:00
Roberto Alsina	3d5d073471	Implemented usingbygroup action, so code-in-markdown works	2024-08-23 10:20:03 -03:00
Roberto Alsina	a2884c4c78	Refactor	2024-08-22 21:58:21 -03:00
Roberto Alsina	bd3df10d2c	Use classes instead of structs to allow properties of the same type	2024-08-22 21:52:59 -03:00
Roberto Alsina	0f3b7fc3c5	Initial implementation of delegatinglexer	2024-08-22 20:55:08 -03:00
Roberto Alsina	7f4296e9d7	Some template lexers	2024-08-22 16:11:30 -03:00
Roberto Alsina	f883065092	Fix weird bug	2024-08-22 15:00:17 -03:00
Roberto Alsina	746abe53ea	Fix weird bug	2024-08-22 14:58:05 -03:00
Roberto Alsina	90971e8f1b	Generate constants sorted so git diffs are smaller	2024-08-22 10:24:09 -03:00
Roberto Alsina	057879c6ee	oops	2024-08-22 10:11:36 -03:00
Roberto Alsina	215d53e173	3 more lexers (markdown moinwiki bbcode)	2024-08-21 22:21:38 -03:00
Roberto Alsina	f435d7df21	0.5.1	2024-08-21 21:22:36 -03:00
Roberto Alsina	5b0a1789dc	v0.5.0	2024-08-21 21:22:36 -03:00
Roberto Alsina	76ef1fea41	Fix example code in README	2024-08-21 21:22:36 -03:00
Roberto Alsina	3ebedec6c1	Make formatter a bit more convenient	2024-08-19 11:26:34 -03:00
Roberto Alsina	57e63f2308	Make formatter a bit more convenient	2024-08-19 11:20:08 -03:00
Roberto Alsina	4a598a575b	Make formatter a bit more convenient	2024-08-19 11:18:54 -03:00
Roberto Alsina	9042138053	Make formatter a bit more convenient	2024-08-19 11:17:44 -03:00
Roberto Alsina	fa647e898a	Make formatter a bit more convenient	2024-08-19 10:15:02 -03:00
Roberto Alsina	ad92929a10	Make formatter a bit more convenient	2024-08-19 09:59:01 -03:00
Roberto Alsina	bb952a44b8	Use IO for output	2024-08-16 17:25:33 -03:00
Roberto Alsina	ae03e4612e	todo management	2024-08-16 14:05:34 -03:00
Roberto Alsina	471b2f5050	updated	2024-08-16 14:03:05 -03:00
Roberto Alsina	5a3b08e716	lint	2024-08-16 14:01:16 -03:00
Roberto Alsina	9ebb9f2765	Fix off-by-1	2024-08-16 13:36:11 -03:00
Roberto Alsina	7538fc76aa	Tokenize via an iterator, makes everything much faster	2024-08-16 13:27:02 -03:00
Roberto Alsina	788577b226	Fix comment	2024-08-15 23:56:52 -03:00
Roberto Alsina	1f01146b1f	Minor cleanup	2024-08-15 23:21:21 -03:00
Roberto Alsina	9041b763ea	Remove unused bits of lexer config	2024-08-15 23:17:49 -03:00
Roberto Alsina	ada30915c3	Idiomatic changes	2024-08-15 23:16:29 -03:00
Roberto Alsina	78eff45ea0	Idiomatic changes	2024-08-15 23:11:49 -03:00
Roberto Alsina	e817aedd60	Idiomatic changes	2024-08-15 22:41:24 -03:00
Roberto Alsina	20d6b65346	More idiomatic	2024-08-15 22:01:50 -03:00
Roberto Alsina	cb09dff9f1	Minor cleanup	2024-08-15 21:35:06 -03:00
Roberto Alsina	b589726352	Make action a struct, guard against popping too much	2024-08-15 21:16:17 -03:00
Roberto Alsina	a3a7b5bd9a	Many cleanups	2024-08-15 21:10:25 -03:00
Roberto Alsina	58e8dac038	Make usingself MUCH cheaper, since it was called many times when parsing C	2024-08-15 19:20:12 -03:00
Roberto Alsina	f72a40f095	Oops, escape things in HTML formatter!	2024-08-15 17:12:29 -03:00
Roberto Alsina	bf257a5b82	cleanup	2024-08-15 17:05:03 -03:00
Roberto Alsina	029495590c	cleanup	2024-08-15 17:04:48 -03:00
Roberto Alsina	115debdec6	Allocate match_data once	2024-08-15 17:04:16 -03:00
Roberto Alsina	4612db58fe	Prefetch XML data	2024-08-15 17:03:58 -03:00
Roberto Alsina	f45a86c83a	ignore	2024-08-15 16:35:58 -03:00
Roberto Alsina	27008640a6	v0.4.0	2024-08-14 13:25:39 -03:00
Roberto Alsina	7db8fdc9e4	Updated README	2024-08-14 13:25:20 -03:00
Roberto Alsina	ad664d9f93	Added error handling	2024-08-14 11:24:25 -03:00
Roberto Alsina	0626c8619f	Working bytes-regexes, faster, MORE tests pass	2024-08-14 11:06:53 -03:00
Roberto Alsina	3725201f8a	Merge branch 'main' of github.com:ralsina/tartrazine	2024-08-14 09:25:08 -03:00
Roberto Alsina	6f64b76c44	lint	2024-08-13 22:07:23 -03:00
Roberto Alsina	5218af6855	lint	2024-08-13 22:06:19 -03:00
Roberto Alsina	c898f395a1	reset stack on EOL instead of error, makes no difference, but it's in pygments version	2024-08-13 22:06:07 -03:00
Roberto Alsina	56e49328fb	Tiny bug	2024-08-13 21:00:00 -03:00
Roberto Alsina	8d7faf2098	0.3.0	2024-08-13 11:06:06 -03:00
Roberto Alsina	2e87762f1b	API changes to make it nicer These are incompatible, tho. * Theme is now a property of the formatter instead of passing it arounf * get_style_defs is now style_defs	2024-08-13 10:57:02 -03:00
Roberto Alsina	88f5674917	Tiny bug	2024-08-12 21:02:17 -03:00
Roberto Alsina	ce6f3d29b5	Remove Re2 hack	2024-08-12 19:01:13 -03:00
Roberto Alsina	46d6d3f467	Make how-heavy-is-bold configurable	2024-08-12 10:55:58 -03:00
Roberto Alsina	78ddc69937	Merge branch 'main' of github.com:ralsina/tartrazine	2024-08-12 10:11:03 -03:00
Roberto Alsina	b1ad7b64c0	oops	2024-08-12 10:10:51 -03:00
Roberto Alsina	cbedf8a8db	Bump to 0.2.0	2024-08-11 13:24:30 -03:00
Roberto Alsina	ec8c53c823	Added --line-numbers for the terminal formatter	2024-08-11 13:21:47 -03:00
Roberto Alsina	e3a1ce37b4	Support guessing lexer by filename	2024-08-11 13:04:35 -03:00
Roberto Alsina	b4f38e00e1	Script to generate lexer metadata constants	2024-08-11 12:41:22 -03:00
Roberto Alsina	08daabe1c3	Cleanup token abbreviation generation script	2024-08-11 12:06:02 -03:00
Roberto Alsina	e8d405fc99	Implemented decent version of the CLI	2024-08-11 11:54:00 -03:00
Roberto Alsina	e295256573	Implemented decent version of the CLI	2024-08-11 11:49:42 -03:00
Roberto Alsina	e40c8b586c	Removed duplicate snazzy theme	2024-08-11 11:27:37 -03:00
Roberto Alsina	bc34f93cc5	Use regular sixteen now	2024-08-10 17:16:26 -03:00
Roberto Alsina	f64c91801e	lint	2024-08-10 16:58:36 -03:00
Roberto Alsina	8e29500fcf	Make line-numbers not-selectable. This makes the listing copy-friendly AND doesn't require wrapping things in tables	2024-08-10 16:54:46 -03:00
Roberto Alsina	f2e638ce3b	Require main branch sixteen for now, line-highlight style improvements	2024-08-10 16:50:55 -03:00
Roberto Alsina	84ee7e6934	JSON formatter	2024-08-09 16:58:15 -03:00
Roberto Alsina	89d212b71c	Start actual CLI	2024-08-09 16:53:24 -03:00
Roberto Alsina	a92d2501f7	HTML formatter option: wrap_long_lines	2024-08-09 16:20:30 -03:00
Roberto Alsina	6b44bcb5ad	HTML formatter option: surrounding_pre	2024-08-09 15:59:49 -03:00
Roberto Alsina	86a5894429	Hack luminance tweaking for creating highlight color (needs a proper implementation)	2024-08-09 14:54:00 -03:00
Roberto Alsina	be12e0f4f1	Sort constants	2024-08-09 14:44:23 -03:00
Roberto Alsina	96dcb7e15e	Fix line highlight for non-base16 themes	2024-08-09 14:42:33 -03:00
Roberto Alsina	d1762f477a	Fix constants for non-base16 themes	2024-08-09 14:17:24 -03:00
Roberto Alsina	f98f44365f	HTML formatter option: line_numbers / highlight_lines	2024-08-09 14:00:42 -03:00
Roberto Alsina	d0c2b1764a	HTML formatter option: line_number_start / line_number_id_prefix	2024-08-09 13:28:05 -03:00
Roberto Alsina	e6a292ade0	HTML formatter option: tab_width	2024-08-09 12:29:56 -03:00
Roberto Alsina	4ced996f90	HTML formatter option: class_prefix	2024-08-09 12:21:02 -03:00
Roberto Alsina	fd5af6ba3b	Starting to add options to HTML formatter: standalone	2024-08-09 11:57:23 -03:00
Roberto Alsina	47237eecc3	Refactor things into separate files for easier reading	2024-08-09 11:31:18 -03:00
				`@@ -0,0 +1 @@`
				.e {color: #aa0000;background-color: #ffaaaa;}.b {background-color: #f0f3f3;tab-size: 8;}.k {color: #006699;font-weight: 600;}.kp {}.kt {color: #007788;}.na {color: #330099;}.nb {color: #336666;}.nc {color: #00aa88;font-weight: 600;}.nc {color: #336600;}.nd {color: #9999ff;}.ne {color: #999999;font-weight: 600;}.ne {color: #cc0000;font-weight: 600;}.nf {color: #cc00ff;}.nl {color: #9999ff;}.nn {color: #00ccff;font-weight: 600;}.nt {color: #330099;font-weight: 600;}.nv {color: #003333;}.ls {color: #cc3300;}.lsd {font-style: italic;}.lse {color: #cc3300;font-weight: 600;}.lsi {color: #aa0000;}.lso {color: #cc3300;}.lsr {color: #33aaaa;}.lss {color: #ffcc33;}.ln {color: #ff6600;}.o {color: #555555;}.ow {color: #000000;font-weight: 600;}.c {color: #0099ff;font-style: italic;}.cs {font-weight: 600;}.cp {color: #009999;font-style: normal;}.gd {background-color: #ffcccc;border: 1px solid #cc0000;}.ge {font-style: italic;}.ge {color: #ff0000;}.gh {color: #003300;font-weight: 600;}.gi {background-color: #ccffcc;border: 1px solid #00cc00;}.go {color: #aaaaaa;}.gp {color: #000099;font-weight: 600;}.gs {font-weight: 600;}.gs {color: #003300;font-weight: 600;}.gt {color: #99cc66;}.gu {text-decoration: underline;}.tw {color: #bbbbbb;}.lh {}
				`@@ -0,0 +1 @@`
				.b {color: #b7b7b7;background-color: #101010;font-weight: 600;tab-size: 8;}.lh {color: #8eaaaa;background-color: #232323;}.t {color: #b7b7b7;}.e {color: #de6e6e;}.c {color: #333333;}.cp {color: #876c4f;}.cpf {color: #5f8787;}.k {color: #d69094;}.kt {color: #de6e6e;}.na {color: #8eaaaa;}.nb {color: #de6e6e;}.nbp {color: #de6e6e;}.nc {color: #8eaaaa;}.nc {color: #dab083;}.nd {color: #dab083;}.nf {color: #8eaaaa;}.nn {color: #8eaaaa;}.nt {color: #d69094;}.nv {color: #8eaaaa;}.nvi {color: #de6e6e;}.ln {color: #dab083;}.o {color: #60a592;}.ow {color: #d69094;}.l {color: #5f8787;}.ls {color: #5f8787;}.lsi {color: #876c4f;}.lsr {color: #60a592;}.lss {color: #dab083;}
				`@@ -0,0 +1 @@`
				`[{"type":"Text","value":"puts "},{"type":"LiteralString","value":"\"Hello Crystal!\""},{"type":"Text","value":"\n"}]`
				`@@ -0,0 +1 @@`
				[{"type":"KeywordNamespace","value":"from"},{"type":"Text","value":" "},{"type":"NameNamespace","value":"flask"},{"type":"Text","value":" "},{"type":"KeywordNamespace","value":"import"},{"type":"Text","value":" "},{"type":"Name","value":"Flask"},{"type":"Punctuation","value":","},{"type":"Text","value":" "},{"type":"Name","value":"request"},{"type":"Text","value":"\n\n"},{"type":"Name","value":"app"},{"type":"Text","value":" "},{"type":"Operator","value":"="},{"type":"Text","value":" "},{"type":"Name","value":"Flask"},{"type":"Punctuation","value":"("},{"type":"LiteralStringDouble","value":"\""},{"type":"CommentPreproc","value":"{{"},{"type":"NameVariable","value":"name"},{"type":"CommentPreproc","value":"}}"},{"type":"LiteralStringDouble","value":"\")"},{"type":"Text","value":"\n\n"},{"type":"NameDecorator","value":"@app.route"},{"type":"Punctuation","value":"("},{"type":"LiteralStringSingle","value":"'/'"},{"type":"Punctuation","value":")"},{"type":"Text","value":"\n"},{"type":"Keyword","value":"def"},{"type":"Text","value":" "},{"type":"NameFunction","value":"handle"},{"type":"Punctuation","value":"():"},{"type":"Text","value":"\n "},{"type":"Keyword","value":"return"},{"type":"Text","value":" "},{"type":"LiteralStringDouble","value":"\"Hello World from Flask!\""},{"type":"Text","value":"\n\n"},{"type":"NameDecorator","value":"@app.route"},{"type":"Punctuation","value":"("},{"type":"LiteralStringSingle","value":"'/ping'"},{"type":"Punctuation","value":")"},{"type":"Text","value":"\n"},{"type":"Keyword","value":"def"},{"type":"Text","value":" "},{"type":"NameFunction","value":"ping"},{"type":"Punctuation","value":"():"},{"type":"Text","value":"\n "},{"type":"Keyword","value":"return"},{"type":"Text","value":" "},{"type":"LiteralStringDouble","value":"\"OK\""},{"type":"Text","value":"\n"}]