mirror of
https://github.com/ralsina/tartrazine.git
synced 2025-07-01 20:37:08 -03:00
Compare commits
22 Commits
heuristics
...
v0.6.4
Author | SHA1 | Date | |
---|---|---|---|
81e813202b | |||
c3ed284bcb | |||
fd549681d6 | |||
2aa8b235ee | |||
dcfd960107 | |||
5af09edc5f | |||
fc53344649 | |||
6766eb14f3 | |||
3d3f9fcc24 | |||
61899cfe83 | |||
a583b7359e | |||
de2a4a1996 | |||
31334ac802 | |||
6d64491938 | |||
fb693bb221 | |||
c6824a99df | |||
4dd2e925b0 | |||
7bda19cdea | |||
0e7dafe711 | |||
082241eb0f | |||
df88047ca8 | |||
5a3b50d7a3 |
1
.gitignore
vendored
1
.gitignore
vendored
@ -9,3 +9,4 @@ shard.lock
|
||||
.vscode/
|
||||
.crystal/
|
||||
venv/
|
||||
.croupier
|
||||
|
3
.md.rb
Normal file
3
.md.rb
Normal file
@ -0,0 +1,3 @@
|
||||
exclude_rule 'MD033' # Inline HTML
|
||||
exclude_rule 'MD005' # 3-space indent for lists
|
||||
exclude_rule 'MD024' # Repeated headings
|
35
.pre-commit-config.yaml
Normal file
35
.pre-commit-config.yaml
Normal file
@ -0,0 +1,35 @@
|
||||
# See https://pre-commit.com for more information
|
||||
# See https://pre-commit.com/hooks.html for more hooks
|
||||
|
||||
repos:
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
rev: v4.6.0
|
||||
hooks:
|
||||
- id: trailing-whitespace
|
||||
- id: end-of-file-fixer
|
||||
- id: check-yaml
|
||||
- id: check-added-large-files
|
||||
- id: check-merge-conflict
|
||||
- repo: https://github.com/jumanjihouse/pre-commit-hooks
|
||||
rev: 3.0.0
|
||||
hooks:
|
||||
- id: shellcheck
|
||||
- id: markdownlint
|
||||
exclude: '^content'
|
||||
- repo: https://github.com/mrtazz/checkmake
|
||||
rev: 0.2.2
|
||||
hooks:
|
||||
- id: checkmake
|
||||
exclude: lexers/makefile.xml
|
||||
- repo: https://github.com/python-jsonschema/check-jsonschema
|
||||
rev: 0.29.2
|
||||
hooks:
|
||||
- id: check-github-workflows
|
||||
- repo: https://github.com/commitizen-tools/commitizen
|
||||
rev: v3.29.0 # automatically updated by Commitizen
|
||||
hooks:
|
||||
- id: commitizen
|
||||
- id: commitizen-branch
|
||||
stages:
|
||||
- post-commit
|
||||
- push
|
41
CHANGELOG.md
Normal file
41
CHANGELOG.md
Normal file
@ -0,0 +1,41 @@
|
||||
# Changelog
|
||||
|
||||
All notable changes to this project will be documented in this file.
|
||||
|
||||
## [0.6.4] - 2024-08-28
|
||||
|
||||
### 🐛 Bug Fixes
|
||||
|
||||
- Ameba
|
||||
- Variable bame in Hacefile
|
||||
|
||||
### 📚 Documentation
|
||||
|
||||
- Mention AUR package
|
||||
|
||||
### ⚙️ Miscellaneous Tasks
|
||||
|
||||
- Pre-commit hooks
|
||||
- Git-cliff config
|
||||
- Started changelog
|
||||
- Force conventional commit messages
|
||||
- Force conventional commit messages
|
||||
- Updated pre-commit
|
||||
|
||||
### Build
|
||||
|
||||
- Switch from Makefile to Hacefile
|
||||
- Added do_release script
|
||||
- Fix markdown check
|
||||
|
||||
### Bump
|
||||
|
||||
- Release v0.6.4
|
||||
|
||||
## [0.6.1] - 2024-08-25
|
||||
|
||||
### 📚 Documentation
|
||||
|
||||
- Improve readme and help message
|
||||
|
||||
<!-- generated by git-cliff -->
|
115
Hacefile.yml
Normal file
115
Hacefile.yml
Normal file
@ -0,0 +1,115 @@
|
||||
variables:
|
||||
FLAGS: "-d --error-trace"
|
||||
NAME: "tartrazine"
|
||||
|
||||
tasks:
|
||||
build:
|
||||
default: true
|
||||
dependencies:
|
||||
- src
|
||||
- shard.lock
|
||||
- shard.yml
|
||||
- Hacefile.yml
|
||||
- lexers/*xml
|
||||
- styles/*xml
|
||||
outputs:
|
||||
- bin/{{NAME}}
|
||||
commands: |
|
||||
shards build {{FLAGS}}
|
||||
|
||||
get-deps:
|
||||
dependencies:
|
||||
- shard.yml
|
||||
outputs:
|
||||
- shard.lock
|
||||
commands: |
|
||||
shards install
|
||||
|
||||
build-release:
|
||||
phony: true
|
||||
always_run: true
|
||||
commands: |
|
||||
hace build FLAGS="--release"
|
||||
|
||||
install:
|
||||
phony: true
|
||||
always_run: true
|
||||
dependencies:
|
||||
- bin/hace
|
||||
commands: |
|
||||
rm ${HOME}/.local/bin/{{NAME}}
|
||||
cp bin/hace ${HOME}/.local/bin/{{NAME}}
|
||||
|
||||
static:
|
||||
outputs:
|
||||
- bin/{{NAME}}-static-linux-amd64
|
||||
- bin/{{NAME}}-static-linux-arm64
|
||||
commands: |
|
||||
hace clean
|
||||
./build_static.sh
|
||||
|
||||
test:
|
||||
dependencies:
|
||||
- src
|
||||
- spec
|
||||
- shard.lock
|
||||
- shard.yml
|
||||
commands: |
|
||||
crystal spec -v --error-trace
|
||||
phony: true
|
||||
always_run: true
|
||||
|
||||
lint:
|
||||
dependencies:
|
||||
- src
|
||||
- spec
|
||||
- shard.lock
|
||||
- shard.yml
|
||||
commands: |
|
||||
crystal tool format src/*.cr spec/*.cr
|
||||
ameba --fix
|
||||
always_run: true
|
||||
phony: true
|
||||
|
||||
docs:
|
||||
dependencies:
|
||||
- src
|
||||
- shard.lock
|
||||
- shard.yml
|
||||
- README.md
|
||||
commands: |
|
||||
crystal docs
|
||||
outputs:
|
||||
- docs/index.html
|
||||
|
||||
pre-commit:
|
||||
default: true
|
||||
outputs:
|
||||
- .git/hooks/commit-msg
|
||||
- .git/hooks/pre-commit
|
||||
dependencies:
|
||||
- .pre-commit-config.yaml
|
||||
commands: |
|
||||
pre-commit install --hook-type commit-msg
|
||||
pre-commit install
|
||||
|
||||
clean:
|
||||
phony: true
|
||||
always_run: true
|
||||
commands: |
|
||||
rm -rf shard.lock bin lib
|
||||
|
||||
coverage:
|
||||
dependencies:
|
||||
- src
|
||||
- spec
|
||||
- shard.lock
|
||||
- shard.yml
|
||||
commands: |
|
||||
shards install
|
||||
crystal build -o bin/run_tests src/run_tests.cr
|
||||
rm -rf coverage/
|
||||
mkdir coverage
|
||||
kcov --clean --include-path=./src ${PWD}/coverage ./bin/run_tests
|
||||
outputs:
|
||||
- coverage/index.html
|
7
Makefile
7
Makefile
@ -1,7 +0,0 @@
|
||||
build: $(wildcard src/**/*.cr) $(wildcard lexers/*xml) $(wildcard styles/*xml) shard.yml
|
||||
shards build -Dstrict_multi_assign -Dno_number_autocast -d --error-trace
|
||||
release: $(wildcard src/**/*.cr) $(wildcard lexers/*xml) $(wildcard styles/*xml) shard.yml
|
||||
shards build --release
|
||||
static: $(wildcard src/**/*.cr) $(wildcard lexers/*xml) $(wildcard styles/*xml) shard.yml
|
||||
shards build --release --static
|
||||
strip bin/tartrazine
|
69
README.md
69
README.md
@ -2,44 +2,22 @@
|
||||
|
||||
Tartrazine is a library to syntax-highlight code. It is
|
||||
a port of [Pygments](https://pygments.org/) to
|
||||
[Crystal](https://crystal-lang.org/). Kind of.
|
||||
[Crystal](https://crystal-lang.org/).
|
||||
|
||||
The CLI tool can be used to highlight many things in many styles.
|
||||
It also provides a CLI tool which can be used to highlight many things in many styles.
|
||||
|
||||
# A port of what? Why "kind of"?
|
||||
|
||||
Pygments is a staple of the Python ecosystem, and it's great.
|
||||
It lets you highlight code in many languages, and it has many
|
||||
themes. Chroma is "Pygments for Go", it's actually a port of
|
||||
Pygments to Go, and it's great too.
|
||||
|
||||
I wanted that in Crystal, so I started this project. But I did
|
||||
not read much of the Pygments code. Or much of Chroma's.
|
||||
|
||||
Chroma has taken most of the Pygments lexers and turned them into
|
||||
XML descriptions. What I did was take those XML files from Chroma
|
||||
and a pile of test cases from Pygments, and I slapped them together
|
||||
until the tests passed and my code produced the same output as
|
||||
Chroma. Think of it as *extreme TDD*.
|
||||
|
||||
Currently the pass rate for tests in the supported languages
|
||||
is `96.8%`, which is *not bad for a couple days hacking*.
|
||||
|
||||
This only covers the RegexLexers, which are the most common ones,
|
||||
but it means the supported languages are a subset of Chroma's, which
|
||||
is a subset of Pygments'.
|
||||
|
||||
Currently Tartrazine supports ... 248 languages.
|
||||
|
||||
It has 331 themes (63 from Chroma, the rest are base16 themes via
|
||||
[Sixteen](https://github.com/ralsina/sixteen)
|
||||
Currently Tartrazine supports 247 languages and has 331 themes (63 from Chroma,
|
||||
the rest are base16 themes via [Sixteen](https://github.com/ralsina/sixteen)
|
||||
|
||||
## Installation
|
||||
|
||||
If you are using Arch: Use yay or your favourite AUR helper, package name is `tartrazine`.
|
||||
|
||||
From prebuilt binaries:
|
||||
|
||||
Each release provides statically-linked binaries that should
|
||||
work on any Linux. Get them from the [releases page](https://github.com/ralsina/tartrazine/releases) and put them in your PATH.
|
||||
work on any Linux. Get them from the [releases page](https://github.com/ralsina/tartrazine/releases)
|
||||
and put them in your PATH.
|
||||
|
||||
To build from source:
|
||||
|
||||
@ -52,13 +30,13 @@ To build from source:
|
||||
Show a syntax highlighted version of a C source file in your terminal:
|
||||
|
||||
```shell
|
||||
$ tartrazine whatever.c -l c -t catppuccin-macchiato --line-numbers -f terminal
|
||||
tartrazine whatever.c -l c -t catppuccin-macchiato --line-numbers -f terminal
|
||||
```
|
||||
|
||||
Generate a standalone HTML file from a C source file with the syntax highlighted:
|
||||
|
||||
```shell
|
||||
$ tartrazine whatever.c -l c -t catppuccin-macchiato --line-numbers \
|
||||
$ tartrazine whatever.c -t catppuccin-macchiato --line-numbers \
|
||||
--standalone -f html -o whatever.html
|
||||
```
|
||||
|
||||
@ -87,3 +65,30 @@ puts formatter.format(File.read(ARGV[0]), lexer)
|
||||
## Contributors
|
||||
|
||||
- [Roberto Alsina](https://github.com/ralsina) - creator and maintainer
|
||||
|
||||
## A port of what, and why "kind of"
|
||||
|
||||
Pygments is a staple of the Python ecosystem, and it's great.
|
||||
It lets you highlight code in many languages, and it has many
|
||||
themes. Chroma is "Pygments for Go", it's actually a port of
|
||||
Pygments to Go, and it's great too.
|
||||
|
||||
I wanted that in Crystal, so I started this project. But I did
|
||||
not read much of the Pygments code. Or much of Chroma's.
|
||||
|
||||
Chroma has taken most of the Pygments lexers and turned them into
|
||||
XML descriptions. What I did was take those XML files from Chroma
|
||||
and a pile of test cases from Pygments, and I slapped them together
|
||||
until the tests passed and my code produced the same output as
|
||||
Chroma. Think of it as [*extreme TDD*](https://ralsina.me/weblog/posts/tartrazine-reimplementing-pygments.html)
|
||||
|
||||
Currently the pass rate for tests in the supported languages
|
||||
is `96.8%`, which is *not bad for a couple days hacking*.
|
||||
|
||||
This only covers the RegexLexers, which are the most common ones,
|
||||
but it means the supported languages are a subset of Chroma's, which
|
||||
is a subset of Pygments' and DelegatingLexers (useful for things like template languages)
|
||||
|
||||
Then performance was bad, so I hacked and hacked and made it significantly
|
||||
[faster than chroma](https://ralsina.me/weblog/posts/a-tale-of-optimization.html)
|
||||
which is fun.
|
||||
|
4
TODO.md
4
TODO.md
@ -8,8 +8,8 @@
|
||||
* ✅ Implement lexer loader that respects aliases
|
||||
* ✅ Implement lexer loader by file extension
|
||||
* ✅ Add --line-numbers to terminal formatter
|
||||
* Implement lexer loader by mime type
|
||||
* ✅ Implement lexer loader by mime type
|
||||
* ✅ Implement Delegating lexers
|
||||
* ✅ Add RstLexer
|
||||
* Add Mako template lexer
|
||||
* Implement heuristic lexer detection
|
||||
* ✅ Implement heuristic lexer detection
|
||||
|
@ -7,10 +7,10 @@ docker run --rm --privileged \
|
||||
|
||||
# Build for AMD64
|
||||
docker build . -f Dockerfile.static -t tartrazine-builder
|
||||
docker run -ti --rm -v "$PWD":/app --user="$UID" tartrazine-builder /bin/sh -c "cd /app && rm -rf lib shard.lock && make static"
|
||||
docker run -ti --rm -v "$PWD":/app --user="$UID" tartrazine-builder /bin/sh -c "cd /app && rm -rf lib shard.lock && shards build --static --release"
|
||||
mv bin/tartrazine bin/tartrazine-static-linux-amd64
|
||||
|
||||
# Build for ARM64
|
||||
docker build . -f Dockerfile.static --platform linux/arm64 -t tartrazine-builder
|
||||
docker run -ti --rm -v "$PWD":/app --platform linux/arm64 --user="$UID" tartrazine-builder /bin/sh -c "cd /app && rm -rf lib shard.lock && make static"
|
||||
docker run -ti --rm -v "$PWD":/app --platform linux/arm64 --user="$UID" tartrazine-builder /bin/sh -c "cd /app && rm -rf lib shard.lock && shards build --static --release"
|
||||
mv bin/tartrazine bin/tartrazine-static-linux-arm64
|
||||
|
79
cliff.toml
Normal file
79
cliff.toml
Normal file
@ -0,0 +1,79 @@
|
||||
# git-cliff ~ default configuration file
|
||||
# https://git-cliff.org/docs/configuration
|
||||
#
|
||||
# Lines starting with "#" are comments.
|
||||
# Configuration options are organized into tables and keys.
|
||||
# See documentation for more information on available options.
|
||||
|
||||
[changelog]
|
||||
# template for the changelog header
|
||||
header = """
|
||||
# Changelog\n
|
||||
All notable changes to this project will be documented in this file.\n
|
||||
"""
|
||||
# template for the changelog body
|
||||
# https://keats.github.io/tera/docs/#introduction
|
||||
body = """
|
||||
{% if version %}\
|
||||
## [{{ version | trim_start_matches(pat="v") }}] - {{ timestamp | date(format="%Y-%m-%d") }}
|
||||
{% else %}\
|
||||
## [unreleased]
|
||||
{% endif %}\
|
||||
{% for group, commits in commits | group_by(attribute="group") %}
|
||||
### {{ group | striptags | trim | upper_first }}
|
||||
{% for commit in commits %}
|
||||
- {% if commit.scope %}*({{ commit.scope }})* {% endif %}\
|
||||
{% if commit.breaking %}[**breaking**] {% endif %}\
|
||||
{{ commit.message | upper_first }}\
|
||||
{% endfor %}
|
||||
{% endfor %}\n
|
||||
"""
|
||||
# template for the changelog footer
|
||||
footer = """
|
||||
<!-- generated by git-cliff -->
|
||||
"""
|
||||
# remove the leading and trailing s
|
||||
trim = true
|
||||
# postprocessors
|
||||
postprocessors = [
|
||||
# { pattern = '<REPO>', replace = "https://github.com/orhun/git-cliff" }, # replace repository URL
|
||||
]
|
||||
|
||||
[git]
|
||||
# parse the commits based on https://www.conventionalcommits.org
|
||||
conventional_commits = true
|
||||
# filter out the commits that are not conventional
|
||||
filter_unconventional = true
|
||||
# process each line of a commit as an individual commit
|
||||
split_commits = false
|
||||
# regex for preprocessing the commit messages
|
||||
commit_preprocessors = [
|
||||
# Replace issue numbers
|
||||
#{ pattern = '\((\w+\s)?#([0-9]+)\)', replace = "([#${2}](<REPO>/issues/${2}))"},
|
||||
# Check spelling of the commit with https://github.com/crate-ci/typos
|
||||
# If the spelling is incorrect, it will be automatically fixed.
|
||||
#{ pattern = '.*', replace_command = 'typos --write-changes -' },
|
||||
]
|
||||
# regex for parsing and grouping commits
|
||||
commit_parsers = [
|
||||
{ message = "^feat", group = "<!-- 0 -->🚀 Features" },
|
||||
{ message = "^fix", group = "<!-- 1 -->🐛 Bug Fixes" },
|
||||
{ message = "^doc", group = "<!-- 3 -->📚 Documentation" },
|
||||
{ message = "^perf", group = "<!-- 4 -->⚡ Performance" },
|
||||
{ message = "^refactor", group = "<!-- 2 -->🚜 Refactor" },
|
||||
{ message = "^style", group = "<!-- 5 -->🎨 Styling" },
|
||||
{ message = "^test", group = "<!-- 6 -->🧪 Testing" },
|
||||
{ message = "^chore\\(release\\): prepare for", skip = true },
|
||||
{ message = "^chore\\(deps.*\\)", skip = true },
|
||||
{ message = "^chore\\(pr\\)", skip = true },
|
||||
{ message = "^chore\\(pull\\)", skip = true },
|
||||
{ message = "^chore|^ci", group = "<!-- 7 -->⚙️ Miscellaneous Tasks" },
|
||||
{ body = ".*security", group = "<!-- 8 -->🛡️ Security" },
|
||||
{ message = "^revert", group = "<!-- 9 -->◀️ Revert" },
|
||||
]
|
||||
# filter out the commits that are not matched by commit parsers
|
||||
filter_commits = false
|
||||
# sort the tags topologically
|
||||
topo_order = false
|
||||
# sort the commits inside sections by oldest/newest order
|
||||
sort_commits = "oldest"
|
15
do_release.sh
Executable file
15
do_release.sh
Executable file
@ -0,0 +1,15 @@
|
||||
#!/bin/bash
|
||||
set e
|
||||
|
||||
PKGNAME=$(basename "$PWD")
|
||||
VERSION=$(git cliff --bumped-version |cut -dv -f2)
|
||||
|
||||
sed "s/^version:.*$/version: $VERSION/g" -i shard.yml
|
||||
git add shard.yml
|
||||
hace lint test
|
||||
git cliff --bump -o
|
||||
git commit -a -m "bump: Release v$VERSION"
|
||||
git tag "v$VERSION"
|
||||
git push --tags
|
||||
hace static
|
||||
gh release create "v$VERSION" "bin/$PKGNAME-static-linux-amd64" "bin/$PKGNAME-static-linux-arm64" --title "Release v$VERSION" --notes "$(git cliff -l -s all)"
|
@ -127,4 +127,3 @@
|
||||
</state>
|
||||
</rules>
|
||||
</lexer>
|
||||
|
||||
|
@ -52,4 +52,3 @@
|
||||
</state>
|
||||
</rules>
|
||||
</lexer>
|
||||
|
||||
|
@ -63,4 +63,3 @@
|
||||
</state>
|
||||
</rules>
|
||||
</lexer>
|
||||
|
||||
|
@ -55,4 +55,3 @@
|
||||
</state>
|
||||
</rules>
|
||||
</lexer>
|
||||
|
||||
|
@ -75,4 +75,3 @@
|
||||
</state>
|
||||
</rules>
|
||||
</lexer>
|
||||
|
||||
|
@ -67,4 +67,3 @@
|
||||
</state>
|
||||
</rules>
|
||||
</lexer>
|
||||
|
||||
|
@ -19,4 +19,3 @@
|
||||
</state>
|
||||
</rules>
|
||||
</lexer>
|
||||
|
||||
|
@ -3,6 +3,7 @@
|
||||
<name>Groff</name>
|
||||
<alias>groff</alias>
|
||||
<alias>nroff</alias>
|
||||
<alias>roff</alias>
|
||||
<alias>man</alias>
|
||||
<filename>*.[1-9]</filename>
|
||||
<filename>*.1p</filename>
|
||||
|
@ -30,12 +30,12 @@
|
||||
disambiguations:
|
||||
- extensions: ['.1', '.2', '.3', '.4', '.5', '.6', '.7', '.8', '.9']
|
||||
rules:
|
||||
- language: Roff Manpage
|
||||
- language: man
|
||||
and:
|
||||
- named_pattern: mdoc-date
|
||||
- named_pattern: mdoc-title
|
||||
- named_pattern: mdoc-heading
|
||||
- language: Roff Manpage
|
||||
- language: man
|
||||
and:
|
||||
- named_pattern: man-title
|
||||
- named_pattern: man-heading
|
||||
@ -43,12 +43,12 @@ disambiguations:
|
||||
pattern: '^\.(?:[A-Za-z]{2}(?:\s|$)|\\")'
|
||||
- extensions: ['.1in', '.1m', '.1x', '.3in', '.3m', '.3p', '.3pm', '.3qt', '.3x', '.man', '.mdoc']
|
||||
rules:
|
||||
- language: Roff Manpage
|
||||
- language: man
|
||||
and:
|
||||
- named_pattern: mdoc-date
|
||||
- named_pattern: mdoc-title
|
||||
- named_pattern: mdoc-heading
|
||||
- language: Roff Manpage
|
||||
- language: man
|
||||
and:
|
||||
- named_pattern: man-title
|
||||
- named_pattern: man-heading
|
@ -53,4 +53,3 @@
|
||||
</state>
|
||||
</rules>
|
||||
</lexer>
|
||||
|
||||
|
@ -31,4 +31,3 @@
|
||||
</state>
|
||||
</rules>
|
||||
</lexer>
|
||||
|
||||
|
@ -55,4 +55,3 @@
|
||||
</state>
|
||||
</rules>
|
||||
</lexer>
|
||||
|
||||
|
@ -73,4 +73,3 @@
|
||||
</state>
|
||||
</rules>
|
||||
</lexer>
|
||||
|
||||
|
@ -70,4 +70,3 @@
|
||||
</state>
|
||||
</rules>
|
||||
</lexer>
|
||||
|
||||
|
@ -40,4 +40,3 @@
|
||||
</state>
|
||||
</rules>
|
||||
</lexer>
|
||||
|
||||
|
@ -52,6 +52,6 @@ with open("src/constants/lexers.cr", "w") as f:
|
||||
f.write(" LEXERS_BY_FILENAME = {\n")
|
||||
for k in sorted(lexer_by_filename.keys()):
|
||||
v = lexer_by_filename[k]
|
||||
f.write(f'"{k}" => {str(list(v)).replace("'", "\"")}, \n')
|
||||
f.write(f'"{k}" => {str(sorted(list(v))).replace("'", "\"")}, \n')
|
||||
f.write("}\n")
|
||||
f.write("end\n")
|
||||
|
@ -1,5 +1,5 @@
|
||||
name: tartrazine
|
||||
version: 0.6.0
|
||||
version: 0.6.4
|
||||
|
||||
authors:
|
||||
- Roberto Alsina <roberto.alsina@gmail.com>
|
||||
@ -10,7 +10,8 @@ targets:
|
||||
|
||||
dependencies:
|
||||
baked_file_system:
|
||||
github: schovi/baked_file_system
|
||||
github: ralsina/baked_file_system
|
||||
branch: master
|
||||
base58:
|
||||
github: crystal-china/base58.cr
|
||||
sixteen:
|
||||
|
@ -328,6 +328,7 @@ module Tartrazine
|
||||
"restructuredtext" => "rst",
|
||||
"rexx" => "rexx",
|
||||
"rkt" => "racket",
|
||||
"roff" => "groff",
|
||||
"rpmspec" => "rpm_spec",
|
||||
"rs" => "rust",
|
||||
"rst" => "rst",
|
||||
@ -730,8 +731,8 @@ module Tartrazine
|
||||
"*.applescript" => ["applescript"],
|
||||
"*.aql" => ["arangodb_aql"],
|
||||
"*.arexx" => ["rexx"],
|
||||
"*.as" => ["actionscript_3", "actionscript"],
|
||||
"*.asm" => ["tasm", "nasm", "z80_assembly"],
|
||||
"*.as" => ["actionscript", "actionscript_3"],
|
||||
"*.asm" => ["nasm", "tasm", "z80_assembly"],
|
||||
"*.au3" => ["autoit"],
|
||||
"*.automount" => ["systemd"],
|
||||
"*.aux" => ["tex"],
|
||||
@ -739,7 +740,7 @@ module Tartrazine
|
||||
"*.awk" => ["awk"],
|
||||
"*.b" => ["brainfuck"],
|
||||
"*.bal" => ["ballerina"],
|
||||
"*.bas" => ["vb_net", "qbasic"],
|
||||
"*.bas" => ["qbasic", "vb_net"],
|
||||
"*.bash" => ["bash"],
|
||||
"*.bat" => ["batchfile"],
|
||||
"*.batch" => ["psl"],
|
||||
@ -750,7 +751,7 @@ module Tartrazine
|
||||
"*.bnf" => ["bnf"],
|
||||
"*.bqn" => ["bqn"],
|
||||
"*.bzl" => ["python"],
|
||||
"*.c" => ["c++", "c"],
|
||||
"*.c" => ["c", "c++"],
|
||||
"*.c++" => ["c++"],
|
||||
"*.capnp" => ["cap_n_proto"],
|
||||
"*.cc" => ["c++"],
|
||||
@ -839,7 +840,7 @@ module Tartrazine
|
||||
"*.fx" => ["hlsl"],
|
||||
"*.fxh" => ["hlsl"],
|
||||
"*.fzn" => ["minizinc"],
|
||||
"*.gd" => ["gdscript3", "gdscript"],
|
||||
"*.gd" => ["gdscript", "gdscript3"],
|
||||
"*.gemspec" => ["ruby"],
|
||||
"*.geo" => ["glsl"],
|
||||
"*.gleam" => ["gleam"],
|
||||
@ -849,7 +850,7 @@ module Tartrazine
|
||||
"*.graphql" => ["graphql"],
|
||||
"*.graphqls" => ["graphql"],
|
||||
"*.groovy" => ["groovy"],
|
||||
"*.h" => ["c++", "c", "objective-c"],
|
||||
"*.h" => ["c", "c++", "objective-c"],
|
||||
"*.h++" => ["c++"],
|
||||
"*.ha" => ["hare"],
|
||||
"*.handlebars" => ["handlebars"],
|
||||
@ -872,7 +873,7 @@ module Tartrazine
|
||||
"*.idc" => ["c"],
|
||||
"*.idr" => ["idris"],
|
||||
"*.ijs" => ["j"],
|
||||
"*.inc" => ["objectpascal", "povray", "php", "sourcepawn"],
|
||||
"*.inc" => ["objectpascal", "php", "povray", "sourcepawn"],
|
||||
"*.inf" => ["ini"],
|
||||
"*.ini" => ["ini"],
|
||||
"*.ino" => ["arduino"],
|
||||
@ -898,13 +899,13 @@ module Tartrazine
|
||||
"*.lpk" => ["objectpascal"],
|
||||
"*.lpr" => ["objectpascal"],
|
||||
"*.lua" => ["lua"],
|
||||
"*.m" => ["mathematica", "octave", "matlab", "objective-c", "mason"],
|
||||
"*.m" => ["mason", "mathematica", "matlab", "objective-c", "octave"],
|
||||
"*.ma" => ["mathematica"],
|
||||
"*.mak" => ["makefile"],
|
||||
"*.man" => ["groff"],
|
||||
"*.mao" => ["mako"],
|
||||
"*.markdown" => ["markdown"],
|
||||
"*.mc" => ["monkeyc", "mason"],
|
||||
"*.mc" => ["mason", "monkeyc"],
|
||||
"*.mcfunction" => ["mcfunction"],
|
||||
"*.md" => ["markdown"],
|
||||
"*.metal" => ["metal"],
|
||||
@ -961,7 +962,7 @@ module Tartrazine
|
||||
"*.pml" => ["promela"],
|
||||
"*.pony" => ["pony"],
|
||||
"*.pov" => ["povray"],
|
||||
"*.pp" => ["puppet", "objectpascal"],
|
||||
"*.pp" => ["objectpascal", "puppet"],
|
||||
"*.pq" => ["powerquery"],
|
||||
"*.pr" => ["promela"],
|
||||
"*.prm" => ["promela"],
|
||||
@ -1010,7 +1011,7 @@ module Tartrazine
|
||||
"*.rst" => ["rst"],
|
||||
"*.rvt" => ["tcl"],
|
||||
"*.rx" => ["rexx"],
|
||||
"*.s" => ["armasm", "r", "gas"],
|
||||
"*.s" => ["armasm", "gas", "r"],
|
||||
"*.sage" => ["python"],
|
||||
"*.sas" => ["sas"],
|
||||
"*.sass" => ["sass"],
|
||||
@ -1023,7 +1024,7 @@ module Tartrazine
|
||||
"*.scope" => ["systemd"],
|
||||
"*.scss" => ["scss"],
|
||||
"*.sed" => ["sed"],
|
||||
"*.service" => ["systemd", "ini"],
|
||||
"*.service" => ["ini", "systemd"],
|
||||
"*.sh" => ["bash"],
|
||||
"*.sh-session" => ["bash_session"],
|
||||
"*.sieve" => ["sieve"],
|
||||
@ -1033,7 +1034,7 @@ module Tartrazine
|
||||
"*.smali" => ["smali"],
|
||||
"*.sml" => ["standard_ml"],
|
||||
"*.snobol" => ["snobol"],
|
||||
"*.socket" => ["systemd", "ini"],
|
||||
"*.socket" => ["ini", "systemd"],
|
||||
"*.sol" => ["solidity"],
|
||||
"*.sp" => ["sourcepawn"],
|
||||
"*.sparql" => ["sparql"],
|
||||
@ -1068,7 +1069,7 @@ module Tartrazine
|
||||
"*.tpl" => ["smarty"],
|
||||
"*.tpp" => ["c++"],
|
||||
"*.trig" => ["psl"],
|
||||
"*.ts" => ["typoscript", "typescript"],
|
||||
"*.ts" => ["typescript", "typoscript"],
|
||||
"*.tst" => ["scilab"],
|
||||
"*.tsx" => ["typescript"],
|
||||
"*.ttl" => ["turtle"],
|
||||
@ -1104,7 +1105,7 @@ module Tartrazine
|
||||
"*.xml" => ["xml"],
|
||||
"*.xsd" => ["xml"],
|
||||
"*.xsl" => ["xml"],
|
||||
"*.xslt" => ["xml", "html"],
|
||||
"*.xslt" => ["html", "xml"],
|
||||
"*.yaml" => ["yaml"],
|
||||
"*.yang" => ["yang"],
|
||||
"*.yml" => ["yaml"],
|
||||
|
@ -11,7 +11,7 @@ module Tartrazine
|
||||
"#{i + 1}".rjust(4).ljust(5)
|
||||
end
|
||||
|
||||
def format(text : String, lexer : Lexer) : String
|
||||
def format(text : String, lexer : BaseLexer) : String
|
||||
outp = String::Builder.new("")
|
||||
format(text, lexer, outp)
|
||||
outp.to_s
|
||||
|
@ -1,13 +1,12 @@
|
||||
require "yaml"
|
||||
|
||||
# Use linguist's heuristics to disambiguate between languages
|
||||
# This is *shamelessly* stolen from https://github.com/github-linguist/linguist
|
||||
# and ported to Crystal. Deepest thanks to the authors of Linguist
|
||||
# for licensing it liberally.
|
||||
#
|
||||
# Consider this code (c) 2017 GitHub, Inc. even if I wrote it.
|
||||
module Linguist
|
||||
|
||||
# Use linguist's heuristics to disambiguate between languages
|
||||
# This is *shamelessly* stolen from https://github.com/github-linguist/linguist
|
||||
# and ported to Crystal. Deepest thanks to the authors of Linguist
|
||||
# for licensing it liberally.
|
||||
#
|
||||
# Consider this code (c) 2017 GitHub, Inc. even if I wrote it.
|
||||
module Linguist
|
||||
class Heuristic
|
||||
include YAML::Serializable
|
||||
|
||||
@ -80,7 +79,3 @@ require "yaml"
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
h = Linguist::Heuristic.from_yaml(File.read("heuristics/heuristics.yml"))
|
||||
fname = "/usr/include/sqlite3.h"
|
||||
p! h.run(fname, File.read(fname))
|
||||
|
30
src/lexer.cr
30
src/lexer.cr
@ -9,13 +9,21 @@ module Tartrazine
|
||||
|
||||
# Get the lexer object for a language name
|
||||
# FIXME: support mimetypes
|
||||
def self.lexer(name : String? = nil, filename : String? = nil) : BaseLexer
|
||||
def self.lexer(name : String? = nil, filename : String? = nil, mimetype : String? = nil) : BaseLexer
|
||||
return lexer_by_name(name) if name && name != "autodetect"
|
||||
return lexer_by_filename(filename) if filename
|
||||
return lexer_by_mimetype(mimetype) if mimetype
|
||||
|
||||
Lexer.from_xml(LexerFiles.get("/#{LEXERS_BY_NAME["plaintext"]}.xml").gets_to_end)
|
||||
end
|
||||
|
||||
private def self.lexer_by_mimetype(mimetype : String) : BaseLexer
|
||||
lexer_file_name = LEXERS_BY_MIMETYPE.fetch(mimetype, nil)
|
||||
raise Exception.new("Unknown mimetype: #{mimetype}") if lexer_file_name.nil?
|
||||
|
||||
Lexer.from_xml(LexerFiles.get("/#{lexer_file_name}.xml").gets_to_end)
|
||||
end
|
||||
|
||||
private def self.lexer_by_name(name : String) : BaseLexer
|
||||
lexer_file_name = LEXERS_BY_NAME.fetch(name.downcase, nil)
|
||||
return create_delegating_lexer(name) if lexer_file_name.nil? && name.includes? "+"
|
||||
@ -36,12 +44,30 @@ module Tartrazine
|
||||
when 1
|
||||
lexer_file_name = candidates.first
|
||||
else
|
||||
raise Exception.new("Multiple lexers match the filename: #{candidates.to_a.join(", ")}")
|
||||
lexer_file_name = self.lexer_by_content(filename)
|
||||
begin
|
||||
return self.lexer(lexer_file_name)
|
||||
rescue ex : Exception
|
||||
raise Exception.new("Multiple lexers match the filename: #{candidates.to_a.join(", ")}, heuristics suggest #{lexer_file_name} but there is no matching lexer.")
|
||||
end
|
||||
end
|
||||
|
||||
Lexer.from_xml(LexerFiles.get("/#{lexer_file_name}.xml").gets_to_end)
|
||||
end
|
||||
|
||||
private def self.lexer_by_content(fname : String) : String?
|
||||
h = Linguist::Heuristic.from_yaml(LexerFiles.get("/heuristics.yml").gets_to_end)
|
||||
result = h.run(fname, File.read(fname))
|
||||
case result
|
||||
when Nil
|
||||
raise Exception.new "No lexer found for #{fname}"
|
||||
when String
|
||||
result.as(String)
|
||||
when Array(String)
|
||||
result.first
|
||||
end
|
||||
end
|
||||
|
||||
private def self.create_delegating_lexer(name : String) : BaseLexer
|
||||
language, root = name.split("+", 2)
|
||||
language_lexer = lexer(language)
|
||||
|
1
src/run_tests.cr
Normal file
1
src/run_tests.cr
Normal file
@ -0,0 +1 @@
|
||||
require "../spec/**"
|
Reference in New Issue
Block a user