From a2394a7313443dd59135888715f6e7c13117977d Mon Sep 17 00:00:00 2001 From: Roberto Alsina Date: Tue, 6 Aug 2024 17:01:14 -0300 Subject: [PATCH] Tweaks --- README.md | 23 ++++++++----- spec/tartrazine_spec.cr | 27 ++-------------- src/actions.cr | 2 +- src/formatter.cr | 72 +++++++++++++++++++++++++++++++++++++++++ src/styles.cr | 22 +++++++++---- src/tartrazine.cr | 29 +++++++++++++++-- 6 files changed, 133 insertions(+), 42 deletions(-) create mode 100644 src/formatter.cr diff --git a/README.md b/README.md index 4b07fb5..5b9e51c 100644 --- a/README.md +++ b/README.md @@ -4,10 +4,11 @@ Tartrazine is a library to syntax-highlight code. It is a port of [Pygments](https://pygments.org/) to [Crystal](https://crystal-lang.org/). Kind of. -It's not currently usable unless what you need is a way -to turn your files into a pile of json describing its -constituent tokens, because I have not implemented any -formatters, yet, only the part that parses the code (the lexers). +It's not currently usable because it's not finished, but: + +* The lexers work for the implemented languages +* The provided styles work +* There is a very very simple HTML formatter # A port of what? Why "kind of"? @@ -32,15 +33,21 @@ Currently Tartrazine supports ... 241 languages. ## Installation -If you need to ask how to install this, it's not ready for you yet. +This will have a CLI tool that can be installed, but it's not +there yet. + ## Usage -If you need to ask how to use this, it's not ready for you yet. +This works: -## Development +```crystal +require "tartrazine" -TODO: Write development instructions here +lexer = Tartrazine.lexer("crystal") +theme = Tartrazine.theme("catppuccin-macchiato") +puts Tartrazine::Html.new.format(File.read(ARGV[0]), lexer, theme) +``` ## Contributing diff --git a/spec/tartrazine_spec.cr b/spec/tartrazine_spec.cr index 7b564c2..4615a25 100644 --- a/spec/tartrazine_spec.cr +++ b/spec/tartrazine_spec.cr @@ -74,8 +74,8 @@ end # Helper that creates lexer and tokenizes def tokenize(lexer_name, text) - lexer = Tartrazine.get_lexer(lexer_name) - collapse_tokens(lexer.tokenize(text)) + lexer = Tartrazine.lexer(lexer_name) + lexer.tokenize(text) end # Helper that tokenizes using chroma to validate the lexer @@ -87,26 +87,5 @@ def chroma_tokenize(lexer_name, text) ["-f", "json", "-l", lexer_name], input: input, output: output ) - collapse_tokens(Array(Tartrazine::Token).from_json(output.to_s)) -end - -# Collapse consecutive tokens of the same type for easier comparison -def collapse_tokens(tokens : Array(Tartrazine::Token)) - result = [] of Tartrazine::Token - - tokens.each do |token| - if result.empty? - result << token - next - end - last = result.last - if last[:type] == token[:type] - new_token = {type: last[:type], value: last[:value] + token[:value]} - result.pop - result << new_token - else - result << token - end - end - result + Tartrazine::Lexer.collapse_tokens(Array(Tartrazine::Token).from_json(output.to_s)) end diff --git a/src/actions.cr b/src/actions.cr index 9c2878c..b899859 100644 --- a/src/actions.cr +++ b/src/actions.cr @@ -82,7 +82,7 @@ module Tartrazine return [] of Token if match.nil? lexer_name = xml["lexer"].downcase Log.trace { "to tokenize: #{match[match_group]}" } - Tartrazine.get_lexer(lexer_name).tokenize(match[match_group], usingself: true) + Tartrazine.lexer(lexer_name).tokenize(match[match_group], usingself: true) when "usingself" # Shunt to another copy of this lexer return [] of Token if match.nil? diff --git a/src/formatter.cr b/src/formatter.cr new file mode 100644 index 0000000..5f0fb8a --- /dev/null +++ b/src/formatter.cr @@ -0,0 +1,72 @@ +require "./tartrazine.cr" +require "./styles.cr" + +module Tartrazine + # This is the base class for all formatters. + abstract class Formatter + property name : String = "" + + def format(text : String, lexer : Lexer, theme : Theme) : String + raise Exception.new("Not implemented") + end + + # ameba:disable Metrics/CyclomaticComplexity + def get_style_defs(theme : Theme) : String + output = String.build do |outp| + theme.styles.each do |token, style| + outp << ".#{token} {" + # These are set or nil + outp << "color: #{style.color};" if style.color + outp << "background-color: #{style.background};" if style.background + outp << "border: 1px solid #{style.border};" if style.border + + # These are true/false/nil + outp << "border: none;" if style.border == false + outp << "font-weight: bold;" if style.bold + outp << "font-weight: 400;" if style.bold == false + outp << "font-style: italic;" if style.italic + outp << "font-style: normal;" if style.italic == false + outp << "text-decoration: underline;" if style.underline + outp << "text-decoration: none;" if style.underline == false + + outp << "}" + end + end + output + end + end + + class Html < Formatter + def format(text : String, lexer : Lexer, theme : Theme) : String + output = String.build do |outp| + outp << "" + outp << "
"
+        lexer.tokenize(text).each do |token|
+          fragment = "#{token[:value]}"
+          outp << fragment
+        end
+        outp << "
" + end + output + end + + # Given a token type, return the CSS class to use. + def get_css_class(token, theme) + return token if theme.styles.has_key?(token) + + # Themes don't contain information for each specific + # token type. However, they may contain information + # for a parent style. Worst case, we go to the root + # (Background) style. + theme.style_parents(token).reverse.find { |parent| + theme.styles.has_key?(parent) + } + end + end +end + +lexer = Tartrazine.lexer("crystal") +theme = Tartrazine.theme("catppuccin-macchiato") +puts Tartrazine::Html.new.format(File.read(ARGV[0]), lexer, theme) diff --git a/src/styles.cr b/src/styles.cr index 5e83105..ea609dd 100644 --- a/src/styles.cr +++ b/src/styles.cr @@ -1,6 +1,11 @@ require "xml" module Tartrazine + def self.theme(name : String) : Theme + path = File.join("styles", "#{name}.xml") + Theme.from_xml(File.read(path)) + end + class Style # These properties are tri-state. # true means it's set @@ -52,11 +57,7 @@ module Tartrazine return s if s.complete? # Form the hierarchy of parent styles - parents = ["Background"] - parts = token.underscore.split("_").map(&.capitalize) - parts.each_with_index do |_, i| - parents << parts[..i].join("") - end + parents = style_parents(token) s = parents.map do |parent| styles[parent] @@ -68,6 +69,15 @@ module Tartrazine s end + def style_parents(token) + parents = ["Background"] + parts = token.underscore.split("_").map(&.capitalize) + parts.each_with_index do |_, i| + parents << parts[..i].join("") + end + parents + end + # Load from a Chroma XML file def self.from_xml(xml : String) : Theme document = XML.parse(xml) @@ -100,4 +110,4 @@ module Tartrazine theme end end -end \ No newline at end of file +end diff --git a/src/tartrazine.cr b/src/tartrazine.cr index 4aa9daf..aa3bc87 100644 --- a/src/tartrazine.cr +++ b/src/tartrazine.cr @@ -6,6 +6,7 @@ require "log" require "xml" module Tartrazine + extend self VERSION = "0.1.0" Log = ::Log.for("tartrazine") @@ -13,7 +14,7 @@ module Tartrazine # This implements a lexer for Pygments RegexLexers as expressed # in Chroma's XML serialization. # - # For explanations on what actions, transformers, etc do + # For explanations on what actions and states do # the Pygments documentation is a good place to start. # https://pygments.org/docs/lexerdevelopment/ @@ -82,7 +83,29 @@ module Tartrazine pos += 1 end end - tokens.reject { |token| token[:value] == "" } + Lexer.collapse_tokens(tokens) + end + + # Collapse consecutive tokens of the same type for easier comparison + # and smaller output + def self.collapse_tokens(tokens : Array(Tartrazine::Token)) : Array(Tartrazine::Token) + result = [] of Tartrazine::Token + tokens = tokens.reject { |token| token[:value] == "" } + tokens.each do |token| + if result.empty? + result << token + next + end + last = result.last + if last[:type] == token[:type] + new_token = {type: last[:type], value: last[:value] + token[:value]} + result.pop + result << new_token + else + result << token + end + end + result end # ameba:disable Metrics/CyclomaticComplexity @@ -149,7 +172,7 @@ module Tartrazine end end - def self.get_lexer(name : String) : Lexer + def self.lexer(name : String) : Lexer Lexer.from_xml(File.read("lexers/#{name}.xml")) end