mirror of
https://github.com/ralsina/tartrazine.git
synced 2025-07-01 20:37:08 -03:00
Compare commits
1 Commits
v0.6.3
...
heuristics
Author | SHA1 | Date | |
---|---|---|---|
72afec773e |
59
README.md
59
README.md
@ -2,11 +2,36 @@
|
||||
|
||||
Tartrazine is a library to syntax-highlight code. It is
|
||||
a port of [Pygments](https://pygments.org/) to
|
||||
[Crystal](https://crystal-lang.org/).
|
||||
[Crystal](https://crystal-lang.org/). Kind of.
|
||||
|
||||
It also provides a CLI tool which can be used to highlight many things in many styles.
|
||||
The CLI tool can be used to highlight many things in many styles.
|
||||
|
||||
Currently Tartrazine supports 247 languages. and it has 331 themes (63 from Chroma, the rest are base16 themes via
|
||||
# A port of what? Why "kind of"?
|
||||
|
||||
Pygments is a staple of the Python ecosystem, and it's great.
|
||||
It lets you highlight code in many languages, and it has many
|
||||
themes. Chroma is "Pygments for Go", it's actually a port of
|
||||
Pygments to Go, and it's great too.
|
||||
|
||||
I wanted that in Crystal, so I started this project. But I did
|
||||
not read much of the Pygments code. Or much of Chroma's.
|
||||
|
||||
Chroma has taken most of the Pygments lexers and turned them into
|
||||
XML descriptions. What I did was take those XML files from Chroma
|
||||
and a pile of test cases from Pygments, and I slapped them together
|
||||
until the tests passed and my code produced the same output as
|
||||
Chroma. Think of it as *extreme TDD*.
|
||||
|
||||
Currently the pass rate for tests in the supported languages
|
||||
is `96.8%`, which is *not bad for a couple days hacking*.
|
||||
|
||||
This only covers the RegexLexers, which are the most common ones,
|
||||
but it means the supported languages are a subset of Chroma's, which
|
||||
is a subset of Pygments'.
|
||||
|
||||
Currently Tartrazine supports ... 248 languages.
|
||||
|
||||
It has 331 themes (63 from Chroma, the rest are base16 themes via
|
||||
[Sixteen](https://github.com/ralsina/sixteen)
|
||||
|
||||
## Installation
|
||||
@ -33,7 +58,7 @@ $ tartrazine whatever.c -l c -t catppuccin-macchiato --line-numbers -f terminal
|
||||
Generate a standalone HTML file from a C source file with the syntax highlighted:
|
||||
|
||||
```shell
|
||||
$ tartrazine whatever.c -t catppuccin-macchiato --line-numbers \
|
||||
$ tartrazine whatever.c -l c -t catppuccin-macchiato --line-numbers \
|
||||
--standalone -f html -o whatever.html
|
||||
```
|
||||
|
||||
@ -62,29 +87,3 @@ puts formatter.format(File.read(ARGV[0]), lexer)
|
||||
## Contributors
|
||||
|
||||
- [Roberto Alsina](https://github.com/ralsina) - creator and maintainer
|
||||
|
||||
## A port of what? Why "kind of"?
|
||||
|
||||
Pygments is a staple of the Python ecosystem, and it's great.
|
||||
It lets you highlight code in many languages, and it has many
|
||||
themes. Chroma is "Pygments for Go", it's actually a port of
|
||||
Pygments to Go, and it's great too.
|
||||
|
||||
I wanted that in Crystal, so I started this project. But I did
|
||||
not read much of the Pygments code. Or much of Chroma's.
|
||||
|
||||
Chroma has taken most of the Pygments lexers and turned them into
|
||||
XML descriptions. What I did was take those XML files from Chroma
|
||||
and a pile of test cases from Pygments, and I slapped them together
|
||||
until the tests passed and my code produced the same output as
|
||||
Chroma. Think of it as [*extreme TDD*](https://ralsina.me/weblog/posts/tartrazine-reimplementing-pygments.html)
|
||||
|
||||
Currently the pass rate for tests in the supported languages
|
||||
is `96.8%`, which is *not bad for a couple days hacking*.
|
||||
|
||||
This only covers the RegexLexers, which are the most common ones,
|
||||
but it means the supported languages are a subset of Chroma's, which
|
||||
is a subset of Pygments' and DelegatingLexers (useful for things like template languages)
|
||||
|
||||
Then performance was bad, so I hacked and hacked and made it
|
||||
significantly [faster than chroma](https://ralsina.me/weblog/posts/a-tale-of-optimization.html) which is fun.
|
4
TODO.md
4
TODO.md
@ -8,8 +8,8 @@
|
||||
* ✅ Implement lexer loader that respects aliases
|
||||
* ✅ Implement lexer loader by file extension
|
||||
* ✅ Add --line-numbers to terminal formatter
|
||||
* ✅ Implement lexer loader by mime type
|
||||
* Implement lexer loader by mime type
|
||||
* ✅ Implement Delegating lexers
|
||||
* ✅ Add RstLexer
|
||||
* Add Mako template lexer
|
||||
* ✅ Implement heuristic lexer detection
|
||||
* Implement heuristic lexer detection
|
||||
|
@ -52,6 +52,6 @@ with open("src/constants/lexers.cr", "w") as f:
|
||||
f.write(" LEXERS_BY_FILENAME = {\n")
|
||||
for k in sorted(lexer_by_filename.keys()):
|
||||
v = lexer_by_filename[k]
|
||||
f.write(f'"{k}" => {str(sorted(list(v))).replace("'", "\"")}, \n')
|
||||
f.write(f'"{k}" => {str(list(v)).replace("'", "\"")}, \n')
|
||||
f.write("}\n")
|
||||
f.write("end\n")
|
||||
|
@ -1,5 +1,5 @@
|
||||
name: tartrazine
|
||||
version: 0.6.1
|
||||
version: 0.6.0
|
||||
|
||||
authors:
|
||||
- Roberto Alsina <roberto.alsina@gmail.com>
|
||||
@ -10,13 +10,11 @@ targets:
|
||||
|
||||
dependencies:
|
||||
baked_file_system:
|
||||
github: ralsina/baked_file_system
|
||||
branch: master
|
||||
github: schovi/baked_file_system
|
||||
base58:
|
||||
github: crystal-china/base58.cr
|
||||
sixteen:
|
||||
github: ralsina/sixteen
|
||||
branch: main
|
||||
docopt:
|
||||
github: chenkovsky/docopt.cr
|
||||
|
||||
|
@ -731,8 +731,8 @@ module Tartrazine
|
||||
"*.applescript" => ["applescript"],
|
||||
"*.aql" => ["arangodb_aql"],
|
||||
"*.arexx" => ["rexx"],
|
||||
"*.as" => ["actionscript", "actionscript_3"],
|
||||
"*.asm" => ["nasm", "tasm", "z80_assembly"],
|
||||
"*.as" => ["actionscript_3", "actionscript"],
|
||||
"*.asm" => ["nasm", "z80_assembly", "tasm"],
|
||||
"*.au3" => ["autoit"],
|
||||
"*.automount" => ["systemd"],
|
||||
"*.aux" => ["tex"],
|
||||
@ -740,7 +740,7 @@ module Tartrazine
|
||||
"*.awk" => ["awk"],
|
||||
"*.b" => ["brainfuck"],
|
||||
"*.bal" => ["ballerina"],
|
||||
"*.bas" => ["qbasic", "vb_net"],
|
||||
"*.bas" => ["vb_net", "qbasic"],
|
||||
"*.bash" => ["bash"],
|
||||
"*.bat" => ["batchfile"],
|
||||
"*.batch" => ["psl"],
|
||||
@ -850,7 +850,7 @@ module Tartrazine
|
||||
"*.graphql" => ["graphql"],
|
||||
"*.graphqls" => ["graphql"],
|
||||
"*.groovy" => ["groovy"],
|
||||
"*.h" => ["c", "c++", "objective-c"],
|
||||
"*.h" => ["objective-c", "c", "c++"],
|
||||
"*.h++" => ["c++"],
|
||||
"*.ha" => ["hare"],
|
||||
"*.handlebars" => ["handlebars"],
|
||||
@ -873,7 +873,7 @@ module Tartrazine
|
||||
"*.idc" => ["c"],
|
||||
"*.idr" => ["idris"],
|
||||
"*.ijs" => ["j"],
|
||||
"*.inc" => ["objectpascal", "php", "povray", "sourcepawn"],
|
||||
"*.inc" => ["php", "objectpascal", "povray", "sourcepawn"],
|
||||
"*.inf" => ["ini"],
|
||||
"*.ini" => ["ini"],
|
||||
"*.ino" => ["arduino"],
|
||||
@ -899,7 +899,7 @@ module Tartrazine
|
||||
"*.lpk" => ["objectpascal"],
|
||||
"*.lpr" => ["objectpascal"],
|
||||
"*.lua" => ["lua"],
|
||||
"*.m" => ["mason", "mathematica", "matlab", "objective-c", "octave"],
|
||||
"*.m" => ["mathematica", "mason", "octave", "objective-c", "matlab"],
|
||||
"*.ma" => ["mathematica"],
|
||||
"*.mak" => ["makefile"],
|
||||
"*.man" => ["groff"],
|
||||
@ -954,7 +954,7 @@ module Tartrazine
|
||||
"*.php" => ["php"],
|
||||
"*.php[345]" => ["php"],
|
||||
"*.pig" => ["pig"],
|
||||
"*.pl" => ["perl", "prolog"],
|
||||
"*.pl" => ["prolog", "perl"],
|
||||
"*.plc" => ["plutus_core"],
|
||||
"*.plot" => ["gnuplot"],
|
||||
"*.plt" => ["gnuplot"],
|
||||
@ -962,7 +962,7 @@ module Tartrazine
|
||||
"*.pml" => ["promela"],
|
||||
"*.pony" => ["pony"],
|
||||
"*.pov" => ["povray"],
|
||||
"*.pp" => ["objectpascal", "puppet"],
|
||||
"*.pp" => ["puppet", "objectpascal"],
|
||||
"*.pq" => ["powerquery"],
|
||||
"*.pr" => ["promela"],
|
||||
"*.prm" => ["promela"],
|
||||
@ -1011,7 +1011,7 @@ module Tartrazine
|
||||
"*.rst" => ["rst"],
|
||||
"*.rvt" => ["tcl"],
|
||||
"*.rx" => ["rexx"],
|
||||
"*.s" => ["armasm", "gas", "r"],
|
||||
"*.s" => ["armasm", "r", "gas"],
|
||||
"*.sage" => ["python"],
|
||||
"*.sas" => ["sas"],
|
||||
"*.sass" => ["sass"],
|
||||
@ -1024,7 +1024,7 @@ module Tartrazine
|
||||
"*.scope" => ["systemd"],
|
||||
"*.scss" => ["scss"],
|
||||
"*.sed" => ["sed"],
|
||||
"*.service" => ["ini", "systemd"],
|
||||
"*.service" => ["systemd", "ini"],
|
||||
"*.sh" => ["bash"],
|
||||
"*.sh-session" => ["bash_session"],
|
||||
"*.sieve" => ["sieve"],
|
||||
@ -1034,13 +1034,13 @@ module Tartrazine
|
||||
"*.smali" => ["smali"],
|
||||
"*.sml" => ["standard_ml"],
|
||||
"*.snobol" => ["snobol"],
|
||||
"*.socket" => ["ini", "systemd"],
|
||||
"*.socket" => ["systemd", "ini"],
|
||||
"*.sol" => ["solidity"],
|
||||
"*.sp" => ["sourcepawn"],
|
||||
"*.sparql" => ["sparql"],
|
||||
"*.spec" => ["rpm_spec"],
|
||||
"*.spt" => ["cheetah"],
|
||||
"*.sql" => ["mysql", "sql"],
|
||||
"*.sql" => ["sql", "mysql"],
|
||||
"*.ss" => ["scheme"],
|
||||
"*.st" => ["smalltalk"],
|
||||
"*.stas" => ["stas"],
|
||||
@ -1069,7 +1069,7 @@ module Tartrazine
|
||||
"*.tpl" => ["smarty"],
|
||||
"*.tpp" => ["c++"],
|
||||
"*.trig" => ["psl"],
|
||||
"*.ts" => ["typescript", "typoscript"],
|
||||
"*.ts" => ["typoscript", "typescript"],
|
||||
"*.tst" => ["scilab"],
|
||||
"*.tsx" => ["typescript"],
|
||||
"*.ttl" => ["turtle"],
|
||||
@ -1079,7 +1079,7 @@ module Tartrazine
|
||||
"*.twig" => ["twig"],
|
||||
"*.txt" => ["plaintext"],
|
||||
"*.uc" => ["ucode"],
|
||||
"*.v" => ["coq", "v", "verilog"],
|
||||
"*.v" => ["verilog", "v", "coq"],
|
||||
"*.vala" => ["vala"],
|
||||
"*.vapi" => ["vala"],
|
||||
"*.vb" => ["vb_net"],
|
||||
|
@ -11,7 +11,7 @@ module Tartrazine
|
||||
"#{i + 1}".rjust(4).ljust(5)
|
||||
end
|
||||
|
||||
def format(text : String, lexer : BaseLexer) : String
|
||||
def format(text : String, lexer : Lexer) : String
|
||||
outp = String::Builder.new("")
|
||||
format(text, lexer, outp)
|
||||
outp.to_s
|
||||
|
10
src/lexer.cr
10
src/lexer.cr
@ -9,21 +9,13 @@ module Tartrazine
|
||||
|
||||
# Get the lexer object for a language name
|
||||
# FIXME: support mimetypes
|
||||
def self.lexer(name : String? = nil, filename : String? = nil, mimetype : String? = nil) : BaseLexer
|
||||
def self.lexer(name : String? = nil, filename : String? = nil) : BaseLexer
|
||||
return lexer_by_name(name) if name && name != "autodetect"
|
||||
return lexer_by_filename(filename) if filename
|
||||
return lexer_by_mimetype(mimetype) if mimetype
|
||||
|
||||
Lexer.from_xml(LexerFiles.get("/#{LEXERS_BY_NAME["plaintext"]}.xml").gets_to_end)
|
||||
end
|
||||
|
||||
private def self.lexer_by_mimetype(mimetype : String) : BaseLexer
|
||||
lexer_file_name = LEXERS_BY_MIMETYPE.fetch(mimetype, nil)
|
||||
raise Exception.new("Unknown mimetype: #{mimetype}") if lexer_file_name.nil?
|
||||
|
||||
Lexer.from_xml(LexerFiles.get("/#{lexer_file_name}.xml").gets_to_end)
|
||||
end
|
||||
|
||||
private def self.lexer_by_name(name : String) : BaseLexer
|
||||
lexer_file_name = LEXERS_BY_NAME.fetch(name.downcase, nil)
|
||||
return create_delegating_lexer(name) if lexer_file_name.nil? && name.includes? "+"
|
||||
|
Reference in New Issue
Block a user