28 Commits

Author SHA1 Message Date
411c969cc4 chore(ignore): keep ci workflow just for manual running for now 2024-09-03 05:34:02 -03:00
36bf784e35 chore(ignore): use ubuntu 24.04 2024-09-03 05:29:26 -03:00
396a806b50 chore(ignore): use ubuntu 24.04 2024-09-03 05:28:10 -03:00
0f31534468 chore(ignore): mutation doesn't work 2024-09-03 05:04:33 -03:00
db95abf31c chore(ignore): mutation doesn't work 2024-09-03 05:03:39 -03:00
9136ff9768 test: github workflows 2024-09-03 04:54:46 -03:00
9a2ebc6c12 chore(ignore): fix tests 2024-09-03 04:49:21 -03:00
8eb8b8cb48 fix: renamed BaseLexer to Lexer and Lexer to RegexLexer to make API nicer 2024-08-31 15:07:05 -03:00
1252376663 fix: make it easier to import the Ansi formatter 2024-08-31 14:57:50 -03:00
2aa8b235ee build: fix markdown check 2024-08-28 18:47:20 -03:00
dcfd960107 fix: ameba 2024-08-28 18:42:21 -03:00
5af09edc5f build: added do_release script 2024-08-28 18:26:17 -03:00
fc53344649 build: switch from Makefile to Hacefile 2024-08-28 18:23:23 -03:00
6766eb14f3 Merge branch 'main' of github.com:ralsina/tartrazine 2024-08-26 22:28:19 -03:00
3d3f9fcc24 chore: force conventional commit messages 2024-08-26 21:27:38 -03:00
61899cfe83 chore: updated pre-commit 2024-08-26 21:18:39 -03:00
a583b7359e docs: Mention AUR package 2024-08-26 20:30:37 -03:00
de2a4a1996 chore: force conventional commit messages 2024-08-26 20:30:12 -03:00
31334ac802 chore: Started changelog 2024-08-26 20:20:26 -03:00
6d64491938 chore: git-cliff config 2024-08-26 20:19:02 -03:00
fb693bb221 chore: pre-commit hooks 2024-08-26 20:18:28 -03:00
c6824a99df Use latest sixteen release 2024-08-26 17:09:31 -03:00
4dd2e925b0 Fix bug in ansi formatter 2024-08-26 16:44:44 -03:00
7bda19cdea Use forked baked_file_system for now 2024-08-25 17:05:04 -03:00
0e7dafe711 Updated README 2024-08-24 22:33:24 -03:00
082241eb0f Load lexer by mimetype 2024-08-24 22:20:38 -03:00
df88047ca8 v0.6.1 2024-08-24 21:45:57 -03:00
5a3b50d7a3 Integrate heuristics into lexer selection 2024-08-24 21:39:39 -03:00
313 changed files with 741 additions and 395 deletions

22
.github/workflows/ci.yml vendored Normal file
View File

@@ -0,0 +1,22 @@
name: Tests
on:
# This can't yet run automatically, because tests fail because of
# different versions of chroma. Need to get the same one in my
# local env and in CI
workflow_dispatch:
permissions:
contents: read
jobs:
build:
runs-on: ubuntu-24.04
steps:
- name: Download source
uses: actions/checkout@v4
- name: Install Crystal
uses: crystal-lang/install-crystal@v1
- name: Run tests
run: |
sudo apt-get update && sudo apt-get install golang-chroma -y
shards install
crystal tool format --check
crystal spec -v

26
.github/workflows/coverage.yml vendored Normal file
View File

@@ -0,0 +1,26 @@
name: Coverage
on:
workflow_dispatch:
schedule:
- cron: "0 1 * * *"
permissions:
contents: read
jobs:
build:
runs-on: ubuntu-latest
steps:
- name: Download source
uses: actions/checkout@v4
- name: Install Crystal
uses: crystal-lang/install-crystal@v1
- name: Run tests using kcov
run: |
sudo apt update && sudo apt install kcov
shards install
crystal build src/run_tests.cr
kcov --clean --include-path=./src coverage ./run_tests
curl -Os https://uploader.codecov.io/latest/linux/codecov
chmod +x codecov
./codecov -t ${CODECOV_TOKEN} -s coverage
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}

1
.gitignore vendored
View File

@@ -9,3 +9,4 @@ shard.lock
.vscode/
.crystal/
venv/
.croupier

3
.md.rb Normal file
View File

@@ -0,0 +1,3 @@
exclude_rule 'MD033' # Inline HTML
exclude_rule 'MD005' # 3-space indent for lists
exclude_rule 'MD024' # Repeated headings

1
.mdlrc Normal file
View File

@@ -0,0 +1 @@
style ".md.rb"

35
.pre-commit-config.yaml Normal file
View File

@@ -0,0 +1,35 @@
# See https://pre-commit.com for more information
# See https://pre-commit.com/hooks.html for more hooks
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.6.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
- id: check-yaml
- id: check-added-large-files
- id: check-merge-conflict
- repo: https://github.com/jumanjihouse/pre-commit-hooks
rev: 3.0.0
hooks:
- id: shellcheck
- id: markdownlint
exclude: '^content'
- repo: https://github.com/mrtazz/checkmake
rev: 0.2.2
hooks:
- id: checkmake
exclude: lexers/makefile.xml
- repo: https://github.com/python-jsonschema/check-jsonschema
rev: 0.29.2
hooks:
- id: check-github-workflows
- repo: https://github.com/commitizen-tools/commitizen
rev: v3.29.0 # automatically updated by Commitizen
hooks:
- id: commitizen
- id: commitizen-branch
stages:
- post-commit
- push

35
CHANGELOG.md Normal file
View File

@@ -0,0 +1,35 @@
# Changelog
All notable changes to this project will be documented in this file.
## [0.6.4] - 2024-08-28
### 🐛 Bug Fixes
- Ameba
### 📚 Documentation
- Mention AUR package
### ⚙️ Miscellaneous Tasks
- Pre-commit hooks
- Git-cliff config
- Started changelog
- Force conventional commit messages
- Force conventional commit messages
- Updated pre-commit
### Build
- Switch from Makefile to Hacefile
- Added do_release script
## [0.6.1] - 2024-08-25
### 📚 Documentation
- Improve readme and help message
<!-- generated by git-cliff -->

115
Hacefile.yml Normal file
View File

@@ -0,0 +1,115 @@
variables:
FLAGS: "-d --error-trace"
NAME: "tartrazine"
tasks:
build:
default: true
dependencies:
- src
- shard.lock
- shard.yml
- Hacefile.yml
- lexers/*xml
- styles/*xml
outputs:
- bin/{{NAME}}
commands: |
shards build {{FLAGS}}
get-deps:
dependencies:
- shard.yml
outputs:
- shard.lock
commands: |
shards install
build-release:
phony: true
always_run: true
commands: |
hace build FLAGS="--release"
install:
phony: true
always_run: true
dependencies:
- bin/tartrazine
commands: |
rm ${HOME}/.local/bin/{{name}}
cp bin/hace ${HOME}/.local/bin/{{name}}
static:
outputs:
- bin/{{name}}-static-linux-amd64
- bin/{{name}}-static-linux-arm64
commands: |
hace clean
./build_static.sh
test:
dependencies:
- src
- spec
- shard.lock
- shard.yml
commands: |
crystal spec -v --error-trace
phony: true
always_run: true
lint:
dependencies:
- src
- spec
- shard.lock
- shard.yml
commands: |
crystal tool format src/*.cr spec/*.cr
ameba --fix
always_run: true
phony: true
docs:
dependencies:
- src
- shard.lock
- shard.yml
- README.md
commands: |
crystal docs
outputs:
- docs/index.html
pre-commit:
default: true
outputs:
- .git/hooks/commit-msg
- .git/hooks/pre-commit
dependencies:
- .pre-commit-config.yaml
commands: |
pre-commit install --hook-type commit-msg
pre-commit install
clean:
phony: true
always_run: true
commands: |
rm -rf shard.lock bin lib
coverage:
dependencies:
- src
- spec
- shard.lock
- shard.yml
commands: |
shards install
crystal build -o bin/run_tests src/run_tests.cr
rm -rf coverage/
mkdir coverage
kcov --clean --include-path=./src ${PWD}/coverage ./bin/run_tests
outputs:
- coverage/index.html

View File

@@ -1,7 +0,0 @@
build: $(wildcard src/**/*.cr) $(wildcard lexers/*xml) $(wildcard styles/*xml) shard.yml
shards build -Dstrict_multi_assign -Dno_number_autocast -d --error-trace
release: $(wildcard src/**/*.cr) $(wildcard lexers/*xml) $(wildcard styles/*xml) shard.yml
shards build --release
static: $(wildcard src/**/*.cr) $(wildcard lexers/*xml) $(wildcard styles/*xml) shard.yml
shards build --release --static
strip bin/tartrazine

View File

@@ -2,44 +2,22 @@
Tartrazine is a library to syntax-highlight code. It is
a port of [Pygments](https://pygments.org/) to
[Crystal](https://crystal-lang.org/). Kind of.
[Crystal](https://crystal-lang.org/).
The CLI tool can be used to highlight many things in many styles.
It also provides a CLI tool which can be used to highlight many things in many styles.
# A port of what? Why "kind of"?
Pygments is a staple of the Python ecosystem, and it's great.
It lets you highlight code in many languages, and it has many
themes. Chroma is "Pygments for Go", it's actually a port of
Pygments to Go, and it's great too.
I wanted that in Crystal, so I started this project. But I did
not read much of the Pygments code. Or much of Chroma's.
Chroma has taken most of the Pygments lexers and turned them into
XML descriptions. What I did was take those XML files from Chroma
and a pile of test cases from Pygments, and I slapped them together
until the tests passed and my code produced the same output as
Chroma. Think of it as *extreme TDD*.
Currently the pass rate for tests in the supported languages
is `96.8%`, which is *not bad for a couple days hacking*.
This only covers the RegexLexers, which are the most common ones,
but it means the supported languages are a subset of Chroma's, which
is a subset of Pygments'.
Currently Tartrazine supports ... 248 languages.
It has 331 themes (63 from Chroma, the rest are base16 themes via
[Sixteen](https://github.com/ralsina/sixteen)
Currently Tartrazine supports 247 languages and has 331 themes (63 from Chroma,
the rest are base16 themes via [Sixteen](https://github.com/ralsina/sixteen)
## Installation
If you are using Arch: Use yay or your favourite AUR helper, package name is `tartrazine`.
From prebuilt binaries:
Each release provides statically-linked binaries that should
work on any Linux. Get them from the [releases page](https://github.com/ralsina/tartrazine/releases) and put them in your PATH.
work on any Linux. Get them from the [releases page](https://github.com/ralsina/tartrazine/releases)
and put them in your PATH.
To build from source:
@@ -52,13 +30,13 @@ To build from source:
Show a syntax highlighted version of a C source file in your terminal:
```shell
$ tartrazine whatever.c -l c -t catppuccin-macchiato --line-numbers -f terminal
tartrazine whatever.c -l c -t catppuccin-macchiato --line-numbers -f terminal
```
Generate a standalone HTML file from a C source file with the syntax highlighted:
```shell
$ tartrazine whatever.c -l c -t catppuccin-macchiato --line-numbers \
$ tartrazine whatever.c -t catppuccin-macchiato --line-numbers \
--standalone -f html -o whatever.html
```
@@ -87,3 +65,30 @@ puts formatter.format(File.read(ARGV[0]), lexer)
## Contributors
- [Roberto Alsina](https://github.com/ralsina) - creator and maintainer
## A port of what, and why "kind of"
Pygments is a staple of the Python ecosystem, and it's great.
It lets you highlight code in many languages, and it has many
themes. Chroma is "Pygments for Go", it's actually a port of
Pygments to Go, and it's great too.
I wanted that in Crystal, so I started this project. But I did
not read much of the Pygments code. Or much of Chroma's.
Chroma has taken most of the Pygments lexers and turned them into
XML descriptions. What I did was take those XML files from Chroma
and a pile of test cases from Pygments, and I slapped them together
until the tests passed and my code produced the same output as
Chroma. Think of it as [*extreme TDD*](https://ralsina.me/weblog/posts/tartrazine-reimplementing-pygments.html)
Currently the pass rate for tests in the supported languages
is `96.8%`, which is *not bad for a couple days hacking*.
This only covers the RegexLexers, which are the most common ones,
but it means the supported languages are a subset of Chroma's, which
is a subset of Pygments' and DelegatingLexers (useful for things like template languages)
Then performance was bad, so I hacked and hacked and made it significantly
[faster than chroma](https://ralsina.me/weblog/posts/a-tale-of-optimization.html)
which is fun.

View File

@@ -8,8 +8,8 @@
* ✅ Implement lexer loader that respects aliases
* ✅ Implement lexer loader by file extension
* ✅ Add --line-numbers to terminal formatter
* Implement lexer loader by mime type
* Implement lexer loader by mime type
* ✅ Implement Delegating lexers
* ✅ Add RstLexer
* Add Mako template lexer
* Implement heuristic lexer detection
* Implement heuristic lexer detection

View File

@@ -7,10 +7,10 @@ docker run --rm --privileged \
# Build for AMD64
docker build . -f Dockerfile.static -t tartrazine-builder
docker run -ti --rm -v "$PWD":/app --user="$UID" tartrazine-builder /bin/sh -c "cd /app && rm -rf lib shard.lock && make static"
docker run -ti --rm -v "$PWD":/app --user="$UID" tartrazine-builder /bin/sh -c "cd /app && rm -rf lib shard.lock && shards build --static --release"
mv bin/tartrazine bin/tartrazine-static-linux-amd64
# Build for ARM64
docker build . -f Dockerfile.static --platform linux/arm64 -t tartrazine-builder
docker run -ti --rm -v "$PWD":/app --platform linux/arm64 --user="$UID" tartrazine-builder /bin/sh -c "cd /app && rm -rf lib shard.lock && make static"
docker run -ti --rm -v "$PWD":/app --platform linux/arm64 --user="$UID" tartrazine-builder /bin/sh -c "cd /app && rm -rf lib shard.lock && shards build --static --release"
mv bin/tartrazine bin/tartrazine-static-linux-arm64

79
cliff.toml Normal file
View File

@@ -0,0 +1,79 @@
# git-cliff ~ default configuration file
# https://git-cliff.org/docs/configuration
#
# Lines starting with "#" are comments.
# Configuration options are organized into tables and keys.
# See documentation for more information on available options.
[changelog]
# template for the changelog header
header = """
# Changelog\n
All notable changes to this project will be documented in this file.\n
"""
# template for the changelog body
# https://keats.github.io/tera/docs/#introduction
body = """
{% if version %}\
## [{{ version | trim_start_matches(pat="v") }}] - {{ timestamp | date(format="%Y-%m-%d") }}
{% else %}\
## [unreleased]
{% endif %}\
{% for group, commits in commits | group_by(attribute="group") %}
### {{ group | striptags | trim | upper_first }}
{% for commit in commits %}
- {% if commit.scope %}*({{ commit.scope }})* {% endif %}\
{% if commit.breaking %}[**breaking**] {% endif %}\
{{ commit.message | upper_first }}\
{% endfor %}
{% endfor %}\n
"""
# template for the changelog footer
footer = """
<!-- generated by git-cliff -->
"""
# remove the leading and trailing s
trim = true
# postprocessors
postprocessors = [
# { pattern = '<REPO>', replace = "https://github.com/orhun/git-cliff" }, # replace repository URL
]
[git]
# parse the commits based on https://www.conventionalcommits.org
conventional_commits = true
# filter out the commits that are not conventional
filter_unconventional = true
# process each line of a commit as an individual commit
split_commits = false
# regex for preprocessing the commit messages
commit_preprocessors = [
# Replace issue numbers
#{ pattern = '\((\w+\s)?#([0-9]+)\)', replace = "([#${2}](<REPO>/issues/${2}))"},
# Check spelling of the commit with https://github.com/crate-ci/typos
# If the spelling is incorrect, it will be automatically fixed.
#{ pattern = '.*', replace_command = 'typos --write-changes -' },
]
# regex for parsing and grouping commits
commit_parsers = [
{ message = "^feat", group = "<!-- 0 -->🚀 Features" },
{ message = "^fix", group = "<!-- 1 -->🐛 Bug Fixes" },
{ message = "^doc", group = "<!-- 3 -->📚 Documentation" },
{ message = "^perf", group = "<!-- 4 -->⚡ Performance" },
{ message = "^refactor", group = "<!-- 2 -->🚜 Refactor" },
{ message = "^style", group = "<!-- 5 -->🎨 Styling" },
{ message = "^test", group = "<!-- 6 -->🧪 Testing" },
{ message = "^chore\\(release\\): prepare for", skip = true },
{ message = "^chore\\(deps.*\\)", skip = true },
{ message = "^chore\\(pr\\)", skip = true },
{ message = "^chore\\(pull\\)", skip = true },
{ message = "^chore|^ci", group = "<!-- 7 -->⚙️ Miscellaneous Tasks" },
{ body = ".*security", group = "<!-- 8 -->🛡️ Security" },
{ message = "^revert", group = "<!-- 9 -->◀️ Revert" },
]
# filter out the commits that are not matched by commit parsers
filter_commits = false
# sort the tags topologically
topo_order = false
# sort the commits inside sections by oldest/newest order
sort_commits = "oldest"

15
do_release.sh Executable file
View File

@@ -0,0 +1,15 @@
#!/bin/bash
set e
PKGNAME=$(basename "$PWD")
VERSION=$(git cliff --bumped-version |cut -dv -f2)
sed "s/^version:.*$/version: $VERSION/g" -i shard.yml
git add shard.yml
hace lint test
git cliff --bump -o
git commit -a -m "bump: Release v$VERSION"
git tag "v$VERSION"
git push --tags
hace static
gh release create "v$VERSION" "bin/$PKGNAME-static-linux-amd64" "bin/$PKGNAME-static-linux-arm64" --title "Release v$VERSION" --notes "$(git cliff -l -s all)"

View File

@@ -127,4 +127,3 @@
</state>
</rules>
</lexer>

View File

@@ -52,4 +52,3 @@
</state>
</rules>
</lexer>

View File

@@ -63,4 +63,3 @@
</state>
</rules>
</lexer>

View File

@@ -55,4 +55,3 @@
</state>
</rules>
</lexer>

View File

@@ -75,4 +75,3 @@
</state>
</rules>
</lexer>

View File

@@ -67,4 +67,3 @@
</state>
</rules>
</lexer>

View File

@@ -19,4 +19,3 @@
</state>
</rules>
</lexer>

View File

@@ -3,6 +3,7 @@
<name>Groff</name>
<alias>groff</alias>
<alias>nroff</alias>
<alias>roff</alias>
<alias>man</alias>
<filename>*.[1-9]</filename>
<filename>*.1p</filename>

View File

@@ -30,12 +30,12 @@
disambiguations:
- extensions: ['.1', '.2', '.3', '.4', '.5', '.6', '.7', '.8', '.9']
rules:
- language: Roff Manpage
- language: man
and:
- named_pattern: mdoc-date
- named_pattern: mdoc-title
- named_pattern: mdoc-heading
- language: Roff Manpage
- language: man
and:
- named_pattern: man-title
- named_pattern: man-heading
@@ -43,12 +43,12 @@ disambiguations:
pattern: '^\.(?:[A-Za-z]{2}(?:\s|$)|\\")'
- extensions: ['.1in', '.1m', '.1x', '.3in', '.3m', '.3p', '.3pm', '.3qt', '.3x', '.man', '.mdoc']
rules:
- language: Roff Manpage
- language: man
and:
- named_pattern: mdoc-date
- named_pattern: mdoc-title
- named_pattern: mdoc-heading
- language: Roff Manpage
- language: man
and:
- named_pattern: man-title
- named_pattern: man-heading

View File

@@ -53,4 +53,3 @@
</state>
</rules>
</lexer>

View File

@@ -31,4 +31,3 @@
</state>
</rules>
</lexer>

View File

@@ -55,4 +55,3 @@
</state>
</rules>
</lexer>

View File

@@ -73,4 +73,3 @@
</state>
</rules>
</lexer>

View File

@@ -70,4 +70,3 @@
</state>
</rules>
</lexer>

View File

@@ -40,4 +40,3 @@
</state>
</rules>
</lexer>

View File

@@ -52,6 +52,6 @@ with open("src/constants/lexers.cr", "w") as f:
f.write(" LEXERS_BY_FILENAME = {\n")
for k in sorted(lexer_by_filename.keys()):
v = lexer_by_filename[k]
f.write(f'"{k}" => {str(list(v)).replace("'", "\"")}, \n')
f.write(f'"{k}" => {str(sorted(list(v))).replace("'", "\"")}, \n')
f.write("}\n")
f.write("end\n")

View File

@@ -1,5 +1,5 @@
name: tartrazine
version: 0.6.0
version: 0.6.4
authors:
- Roberto Alsina <roberto.alsina@gmail.com>
@@ -10,7 +10,8 @@ targets:
dependencies:
baked_file_system:
github: schovi/baked_file_system
github: ralsina/baked_file_system
branch: master
base58:
github: crystal-china/base58.cr
sixteen:

View File

@@ -73,7 +73,7 @@ end
# Helper that creates lexer and tokenizes
def tokenize(lexer_name, text)
tokenizer = Tartrazine.lexer(lexer_name).tokenizer(text)
Tartrazine::Lexer.collapse_tokens(tokenizer.to_a)
Tartrazine::RegexLexer.collapse_tokens(tokenizer.to_a)
end
# Helper that tokenizes using chroma to validate the lexer
@@ -85,5 +85,5 @@ def chroma_tokenize(lexer_name, text)
["-f", "json", "-l", lexer_name],
input: input, output: output
)
Tartrazine::Lexer.collapse_tokens(Array(Tartrazine::Token).from_json(output.to_s))
Tartrazine::RegexLexer.collapse_tokens(Array(Tartrazine::Token).from_json(output.to_s))
end

View File

@@ -328,6 +328,7 @@ module Tartrazine
"restructuredtext" => "rst",
"rexx" => "rexx",
"rkt" => "racket",
"roff" => "groff",
"rpmspec" => "rpm_spec",
"rs" => "rust",
"rst" => "rst",
@@ -730,8 +731,8 @@ module Tartrazine
"*.applescript" => ["applescript"],
"*.aql" => ["arangodb_aql"],
"*.arexx" => ["rexx"],
"*.as" => ["actionscript_3", "actionscript"],
"*.asm" => ["tasm", "nasm", "z80_assembly"],
"*.as" => ["actionscript", "actionscript_3"],
"*.asm" => ["nasm", "tasm", "z80_assembly"],
"*.au3" => ["autoit"],
"*.automount" => ["systemd"],
"*.aux" => ["tex"],
@@ -739,7 +740,7 @@ module Tartrazine
"*.awk" => ["awk"],
"*.b" => ["brainfuck"],
"*.bal" => ["ballerina"],
"*.bas" => ["vb_net", "qbasic"],
"*.bas" => ["qbasic", "vb_net"],
"*.bash" => ["bash"],
"*.bat" => ["batchfile"],
"*.batch" => ["psl"],
@@ -750,7 +751,7 @@ module Tartrazine
"*.bnf" => ["bnf"],
"*.bqn" => ["bqn"],
"*.bzl" => ["python"],
"*.c" => ["c++", "c"],
"*.c" => ["c", "c++"],
"*.c++" => ["c++"],
"*.capnp" => ["cap_n_proto"],
"*.cc" => ["c++"],
@@ -839,7 +840,7 @@ module Tartrazine
"*.fx" => ["hlsl"],
"*.fxh" => ["hlsl"],
"*.fzn" => ["minizinc"],
"*.gd" => ["gdscript3", "gdscript"],
"*.gd" => ["gdscript", "gdscript3"],
"*.gemspec" => ["ruby"],
"*.geo" => ["glsl"],
"*.gleam" => ["gleam"],
@@ -849,7 +850,7 @@ module Tartrazine
"*.graphql" => ["graphql"],
"*.graphqls" => ["graphql"],
"*.groovy" => ["groovy"],
"*.h" => ["c++", "c", "objective-c"],
"*.h" => ["c", "c++", "objective-c"],
"*.h++" => ["c++"],
"*.ha" => ["hare"],
"*.handlebars" => ["handlebars"],
@@ -872,7 +873,7 @@ module Tartrazine
"*.idc" => ["c"],
"*.idr" => ["idris"],
"*.ijs" => ["j"],
"*.inc" => ["objectpascal", "povray", "php", "sourcepawn"],
"*.inc" => ["objectpascal", "php", "povray", "sourcepawn"],
"*.inf" => ["ini"],
"*.ini" => ["ini"],
"*.ino" => ["arduino"],
@@ -898,13 +899,13 @@ module Tartrazine
"*.lpk" => ["objectpascal"],
"*.lpr" => ["objectpascal"],
"*.lua" => ["lua"],
"*.m" => ["mathematica", "octave", "matlab", "objective-c", "mason"],
"*.m" => ["mason", "mathematica", "matlab", "objective-c", "octave"],
"*.ma" => ["mathematica"],
"*.mak" => ["makefile"],
"*.man" => ["groff"],
"*.mao" => ["mako"],
"*.markdown" => ["markdown"],
"*.mc" => ["monkeyc", "mason"],
"*.mc" => ["mason", "monkeyc"],
"*.mcfunction" => ["mcfunction"],
"*.md" => ["markdown"],
"*.metal" => ["metal"],
@@ -961,7 +962,7 @@ module Tartrazine
"*.pml" => ["promela"],
"*.pony" => ["pony"],
"*.pov" => ["povray"],
"*.pp" => ["puppet", "objectpascal"],
"*.pp" => ["objectpascal", "puppet"],
"*.pq" => ["powerquery"],
"*.pr" => ["promela"],
"*.prm" => ["promela"],
@@ -1010,7 +1011,7 @@ module Tartrazine
"*.rst" => ["rst"],
"*.rvt" => ["tcl"],
"*.rx" => ["rexx"],
"*.s" => ["armasm", "r", "gas"],
"*.s" => ["armasm", "gas", "r"],
"*.sage" => ["python"],
"*.sas" => ["sas"],
"*.sass" => ["sass"],
@@ -1023,7 +1024,7 @@ module Tartrazine
"*.scope" => ["systemd"],
"*.scss" => ["scss"],
"*.sed" => ["sed"],
"*.service" => ["systemd", "ini"],
"*.service" => ["ini", "systemd"],
"*.sh" => ["bash"],
"*.sh-session" => ["bash_session"],
"*.sieve" => ["sieve"],
@@ -1033,7 +1034,7 @@ module Tartrazine
"*.smali" => ["smali"],
"*.sml" => ["standard_ml"],
"*.snobol" => ["snobol"],
"*.socket" => ["systemd", "ini"],
"*.socket" => ["ini", "systemd"],
"*.sol" => ["solidity"],
"*.sp" => ["sourcepawn"],
"*.sparql" => ["sparql"],
@@ -1068,7 +1069,7 @@ module Tartrazine
"*.tpl" => ["smarty"],
"*.tpp" => ["c++"],
"*.trig" => ["psl"],
"*.ts" => ["typoscript", "typescript"],
"*.ts" => ["typescript", "typoscript"],
"*.tst" => ["scilab"],
"*.tsx" => ["typescript"],
"*.ttl" => ["turtle"],
@@ -1104,7 +1105,7 @@ module Tartrazine
"*.xml" => ["xml"],
"*.xsd" => ["xml"],
"*.xsl" => ["xml"],
"*.xslt" => ["xml", "html"],
"*.xslt" => ["html", "xml"],
"*.yaml" => ["yaml"],
"*.yang" => ["yang"],
"*.yml" => ["yaml"],

View File

@@ -11,7 +11,7 @@ module Tartrazine
"#{i + 1}".rjust(4).ljust(5)
end
def format(text : String, lexer : Lexer) : String
def format(text : String, lexer : BaseLexer) : String
outp = String::Builder.new("")
format(text, lexer, outp)
outp.to_s

View File

@@ -12,7 +12,7 @@ module Tartrazine
def format(text : String, lexer : BaseLexer, io : IO) : Nil
tokenizer = lexer.tokenizer(text)
io << Tartrazine::Lexer.collapse_tokens(tokenizer.to_a).to_json
io << Tartrazine::RegexLexer.collapse_tokens(tokenizer.to_a).to_json
end
end
end

View File

@@ -1,13 +1,12 @@
require "yaml"
# Use linguist's heuristics to disambiguate between languages
# This is *shamelessly* stolen from https://github.com/github-linguist/linguist
# and ported to Crystal. Deepest thanks to the authors of Linguist
# for licensing it liberally.
#
# Consider this code (c) 2017 GitHub, Inc. even if I wrote it.
module Linguist
# Use linguist's heuristics to disambiguate between languages
# This is *shamelessly* stolen from https://github.com/github-linguist/linguist
# and ported to Crystal. Deepest thanks to the authors of Linguist
# for licensing it liberally.
#
# Consider this code (c) 2017 GitHub, Inc. even if I wrote it.
module Linguist
class Heuristic
include YAML::Serializable
@@ -80,7 +79,3 @@ require "yaml"
end
end
end
h = Linguist::Heuristic.from_yaml(File.read("heuristics/heuristics.yml"))
fname = "/usr/include/sqlite3.h"
p! h.run(fname, File.read(fname))

View File

@@ -1,5 +1,6 @@
require "baked_file_system"
require "./constants/lexers"
require "./heuristics"
require "baked_file_system"
module Tartrazine
class LexerFiles
@@ -9,11 +10,19 @@ module Tartrazine
# Get the lexer object for a language name
# FIXME: support mimetypes
def self.lexer(name : String? = nil, filename : String? = nil) : BaseLexer
def self.lexer(name : String? = nil, filename : String? = nil, mimetype : String? = nil) : BaseLexer
return lexer_by_name(name) if name && name != "autodetect"
return lexer_by_filename(filename) if filename
return lexer_by_mimetype(mimetype) if mimetype
Lexer.from_xml(LexerFiles.get("/#{LEXERS_BY_NAME["plaintext"]}.xml").gets_to_end)
RegexLexer.from_xml(LexerFiles.get("/#{LEXERS_BY_NAME["plaintext"]}.xml").gets_to_end)
end
private def self.lexer_by_mimetype(mimetype : String) : BaseLexer
lexer_file_name = LEXERS_BY_MIMETYPE.fetch(mimetype, nil)
raise Exception.new("Unknown mimetype: #{mimetype}") if lexer_file_name.nil?
RegexLexer.from_xml(LexerFiles.get("/#{lexer_file_name}.xml").gets_to_end)
end
private def self.lexer_by_name(name : String) : BaseLexer
@@ -21,7 +30,7 @@ module Tartrazine
return create_delegating_lexer(name) if lexer_file_name.nil? && name.includes? "+"
raise Exception.new("Unknown lexer: #{name}") if lexer_file_name.nil?
Lexer.from_xml(LexerFiles.get("/#{lexer_file_name}.xml").gets_to_end)
RegexLexer.from_xml(LexerFiles.get("/#{lexer_file_name}.xml").gets_to_end)
end
private def self.lexer_by_filename(filename : String) : BaseLexer
@@ -36,10 +45,28 @@ module Tartrazine
when 1
lexer_file_name = candidates.first
else
raise Exception.new("Multiple lexers match the filename: #{candidates.to_a.join(", ")}")
lexer_file_name = self.lexer_by_content(filename)
begin
return self.lexer(lexer_file_name)
rescue ex : Exception
raise Exception.new("Multiple lexers match the filename: #{candidates.to_a.join(", ")}, heuristics suggest #{lexer_file_name} but there is no matching lexer.")
end
end
Lexer.from_xml(LexerFiles.get("/#{lexer_file_name}.xml").gets_to_end)
RegexLexer.from_xml(LexerFiles.get("/#{lexer_file_name}.xml").gets_to_end)
end
private def self.lexer_by_content(fname : String) : String?
h = Linguist::Heuristic.from_yaml(LexerFiles.get("/heuristics.yml").gets_to_end)
result = h.run(fname, File.read(fname))
case result
when Nil
raise Exception.new "No lexer found for #{fname}"
when String
result.as(String)
when Array(String)
result.first
end
end
private def self.create_delegating_lexer(name : String) : BaseLexer
@@ -126,7 +153,9 @@ module Tartrazine
end
end
abstract class BaseLexer
alias BaseLexer = Lexer
abstract class Lexer
property config = {
name: "",
priority: 0.0,
@@ -148,7 +177,7 @@ module Tartrazine
# For explanations on what actions and states do
# the Pygments documentation is a good place to start.
# https://pygments.org/docs/lexerdevelopment/
class Lexer < BaseLexer
class RegexLexer < BaseLexer
# Collapse consecutive tokens of the same type for easier comparison
# and smaller output
def self.collapse_tokens(tokens : Array(Tartrazine::Token)) : Array(Tartrazine::Token)
@@ -172,7 +201,7 @@ module Tartrazine
end
def self.from_xml(xml : String) : Lexer
l = Lexer.new
l = RegexLexer.new
lexer = XML.parse(xml).first_element_child
if lexer
config = lexer.children.find { |node|
@@ -237,7 +266,7 @@ module Tartrazine
#
# This is useful for things like template languages, where
# you have Jinja + HTML or Jinja + CSS and so on.
class DelegatingLexer < BaseLexer
class DelegatingLexer < Lexer
property language_lexer : BaseLexer
property root_lexer : BaseLexer

View File

@@ -1,5 +1,5 @@
require "docopt"
require "./**"
require "./tartrazine"
HELP = <<-HELP
tartrazine: a syntax highlighting tool

1
src/run_tests.cr Normal file
View File

@@ -0,0 +1 @@
require "../spec/**"

View File

@@ -1,5 +1,6 @@
require "./actions"
require "./formatter"
require "./formatters/**"
require "./rules"
require "./styles"
require "./tartrazine"