feat: optional conditional baking of lexers

This commit is contained in:
Roberto Alsina 2024-09-23 17:34:59 -03:00
parent 3d9d3ab5cf
commit 2fab6eaff0
3 changed files with 37 additions and 5 deletions

View File

@ -82,6 +82,25 @@ puts formatter.format("puts \"Hello, world!\"", lexer)
The reason you may want to use the manual version is to reuse
the lexer and formatter objects for performance reasons.
## Choosing what Lexers you want
By default Tartrazine will support all its lexers by embedding
them in the binary. This makes the binary large. If you are
using it as a library, you may want to just include a selection of lexers. To do that:
* Pass the `-Dnolexers` flag to the compiler
* Set the `TT_LEXERS` environment variable to a
comma-separated list of lexers you want to include.
This builds a binary with only the python, markdown, bash and yaml lexers (enough to highlight this `README.md`):
```bash
> TT_LEXERS=python,markdown,bash,yaml shards build -Dnolexers -d --error-trace
Dependencies are satisfied
Building: tartrazine
```
## Contributing
1. Fork it (<https://github.com/ralsina/tartrazine/fork>)

View File

@ -471,7 +471,7 @@ module Tartrazine
"application/x-fennel" => "fennel",
"application/x-fish" => "fish",
"application/x-forth" => "forth",
"application/x-gdscript" => "gdscript",
"application/x-gdscript" => "gdscript3",
"application/x-hcl" => "hcl",
"application/x-hy" => "hy",
"application/x-javascript" => "javascript",
@ -594,7 +594,7 @@ module Tartrazine
"text/x-fortran" => "fortran",
"text/x-fsharp" => "fsharp",
"text/x-gas" => "gas",
"text/x-gdscript" => "gdscript",
"text/x-gdscript" => "gdscript3",
"text/x-gherkin" => "gherkin",
"text/x-gleam" => "gleam",
"text/x-glslsrc" => "glsl",

View File

@ -6,11 +6,21 @@ require "crystal/syntax_highlighter"
module Tartrazine
class LexerFiles
extend BakedFileSystem
bake_folder "../lexers", __DIR__
macro bake_selected_lexers
{% for lexer in `echo -n $TT_LEXERS`.split "," %}
bake_file {{ lexer }}+".xml", {{ read_file "lexers/" + lexer + ".xml" }}
{% end %}
end
{% if flag?(:nolexers) %}
bake_selected_lexers
{% else %}
bake_folder "../lexers", __DIR__
{% end %}
end
# Get the lexer object for a language name
# FIXME: support mimetypes
def self.lexer(name : String? = nil, filename : String? = nil, mimetype : String? = nil) : BaseLexer
return lexer_by_name(name) if name && name != "autodetect"
return lexer_by_filename(filename) if filename
@ -33,6 +43,8 @@ module Tartrazine
raise Exception.new("Unknown lexer: #{name}") if lexer_file_name.nil?
RegexLexer.from_xml(LexerFiles.get("/#{lexer_file_name}.xml").gets_to_end)
rescue ex : BakedFileSystem::NoSuchFileError
raise Exception.new("Unknown lexer: #{name}")
end
private def self.lexer_by_filename(filename : String) : BaseLexer
@ -84,7 +96,8 @@ module Tartrazine
# Return a list of all lexers
def self.lexers : Array(String)
LEXERS_BY_NAME.keys.sort!
file_map = LexerFiles.files.map(&.path)
LEXERS_BY_NAME.keys.select { |k| file_map.includes?("/#{k}.xml") }.sort!
end
# A token, the output of the tokenizer