From 0423811c5d5ee13cb05a6373bccd88e5e9609770 Mon Sep 17 00:00:00 2001 From: Roberto Alsina Date: Mon, 23 Sep 2024 17:34:59 -0300 Subject: [PATCH] feat: optional conditional baking of lexers --- README.md | 19 +++++++++++++++++++ src/constants/lexers.cr | 4 ++-- src/lexer.cr | 19 ++++++++++++++++--- 3 files changed, 37 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 1e2bc4f..1edf38a 100644 --- a/README.md +++ b/README.md @@ -82,6 +82,25 @@ puts formatter.format("puts \"Hello, world!\"", lexer) The reason you may want to use the manual version is to reuse the lexer and formatter objects for performance reasons. +## Choosing what Lexers you want + +By default Tartrazine will support all its lexers by embedding +them in the binary. This makes the binary large. If you are +using it as a library, you may want to just include a selection of lexers. To do that: + +* Pass the `-Dnolexers` flag to the compiler +* Set the `TT_LEXERS` environment variable to a + comma-separated list of lexers you want to include. + + +This builds a binary with only the python, markdown, bash and yaml lexers (enough to highlight this `README.md`): + +```bash +> TT_LEXERS=python,markdown,bash,yaml shards build -Dnolexers -d --error-trace +Dependencies are satisfied +Building: tartrazine +``` + ## Contributing 1. Fork it () diff --git a/src/constants/lexers.cr b/src/constants/lexers.cr index 0d2a416..92d1239 100644 --- a/src/constants/lexers.cr +++ b/src/constants/lexers.cr @@ -471,7 +471,7 @@ module Tartrazine "application/x-fennel" => "fennel", "application/x-fish" => "fish", "application/x-forth" => "forth", - "application/x-gdscript" => "gdscript", + "application/x-gdscript" => "gdscript3", "application/x-hcl" => "hcl", "application/x-hy" => "hy", "application/x-javascript" => "javascript", @@ -594,7 +594,7 @@ module Tartrazine "text/x-fortran" => "fortran", "text/x-fsharp" => "fsharp", "text/x-gas" => "gas", - "text/x-gdscript" => "gdscript", + "text/x-gdscript" => "gdscript3", "text/x-gherkin" => "gherkin", "text/x-gleam" => "gleam", "text/x-glslsrc" => "glsl", diff --git a/src/lexer.cr b/src/lexer.cr index 6ec8522..6478739 100644 --- a/src/lexer.cr +++ b/src/lexer.cr @@ -6,11 +6,21 @@ require "crystal/syntax_highlighter" module Tartrazine class LexerFiles extend BakedFileSystem - bake_folder "../lexers", __DIR__ + + macro bake_selected_lexers + {% for lexer in env("TT_LEXERS").split "," %} + bake_file {{ lexer }}+".xml", {{ read_file "lexers/" + lexer + ".xml" }} + {% end %} + end + + {% if flag?(:nolexers) %} + bake_selected_lexers + {% else %} + bake_folder "../lexers", __DIR__ + {% end %} end # Get the lexer object for a language name - # FIXME: support mimetypes def self.lexer(name : String? = nil, filename : String? = nil, mimetype : String? = nil) : BaseLexer return lexer_by_name(name) if name && name != "autodetect" return lexer_by_filename(filename) if filename @@ -33,6 +43,8 @@ module Tartrazine raise Exception.new("Unknown lexer: #{name}") if lexer_file_name.nil? RegexLexer.from_xml(LexerFiles.get("/#{lexer_file_name}.xml").gets_to_end) + rescue ex : BakedFileSystem::NoSuchFileError + raise Exception.new("Unknown lexer: #{name}") end private def self.lexer_by_filename(filename : String) : BaseLexer @@ -84,7 +96,8 @@ module Tartrazine # Return a list of all lexers def self.lexers : Array(String) - LEXERS_BY_NAME.keys.sort! + file_map = LexerFiles.files.map(&.path) + LEXERS_BY_NAME.keys.select { |k| file_map.includes?("/#{k}.xml") }.sort! end # A token, the output of the tokenizer