diff --git a/README.md b/README.md index 4b07fb5..5b9e51c 100644 --- a/README.md +++ b/README.md @@ -4,10 +4,11 @@ Tartrazine is a library to syntax-highlight code. It is a port of [Pygments](https://pygments.org/) to [Crystal](https://crystal-lang.org/). Kind of. -It's not currently usable unless what you need is a way -to turn your files into a pile of json describing its -constituent tokens, because I have not implemented any -formatters, yet, only the part that parses the code (the lexers). +It's not currently usable because it's not finished, but: + +* The lexers work for the implemented languages +* The provided styles work +* There is a very very simple HTML formatter # A port of what? Why "kind of"? @@ -32,15 +33,21 @@ Currently Tartrazine supports ... 241 languages. ## Installation -If you need to ask how to install this, it's not ready for you yet. +This will have a CLI tool that can be installed, but it's not +there yet. + ## Usage -If you need to ask how to use this, it's not ready for you yet. +This works: -## Development +```crystal +require "tartrazine" -TODO: Write development instructions here +lexer = Tartrazine.lexer("crystal") +theme = Tartrazine.theme("catppuccin-macchiato") +puts Tartrazine::Html.new.format(File.read(ARGV[0]), lexer, theme) +``` ## Contributing diff --git a/spec/tartrazine_spec.cr b/spec/tartrazine_spec.cr index 7b564c2..4615a25 100644 --- a/spec/tartrazine_spec.cr +++ b/spec/tartrazine_spec.cr @@ -74,8 +74,8 @@ end # Helper that creates lexer and tokenizes def tokenize(lexer_name, text) - lexer = Tartrazine.get_lexer(lexer_name) - collapse_tokens(lexer.tokenize(text)) + lexer = Tartrazine.lexer(lexer_name) + lexer.tokenize(text) end # Helper that tokenizes using chroma to validate the lexer @@ -87,26 +87,5 @@ def chroma_tokenize(lexer_name, text) ["-f", "json", "-l", lexer_name], input: input, output: output ) - collapse_tokens(Array(Tartrazine::Token).from_json(output.to_s)) -end - -# Collapse consecutive tokens of the same type for easier comparison -def collapse_tokens(tokens : Array(Tartrazine::Token)) - result = [] of Tartrazine::Token - - tokens.each do |token| - if result.empty? - result << token - next - end - last = result.last - if last[:type] == token[:type] - new_token = {type: last[:type], value: last[:value] + token[:value]} - result.pop - result << new_token - else - result << token - end - end - result + Tartrazine::Lexer.collapse_tokens(Array(Tartrazine::Token).from_json(output.to_s)) end diff --git a/src/actions.cr b/src/actions.cr index 9c2878c..b899859 100644 --- a/src/actions.cr +++ b/src/actions.cr @@ -82,7 +82,7 @@ module Tartrazine return [] of Token if match.nil? lexer_name = xml["lexer"].downcase Log.trace { "to tokenize: #{match[match_group]}" } - Tartrazine.get_lexer(lexer_name).tokenize(match[match_group], usingself: true) + Tartrazine.lexer(lexer_name).tokenize(match[match_group], usingself: true) when "usingself" # Shunt to another copy of this lexer return [] of Token if match.nil? diff --git a/src/formatter.cr b/src/formatter.cr new file mode 100644 index 0000000..5f0fb8a --- /dev/null +++ b/src/formatter.cr @@ -0,0 +1,72 @@ +require "./tartrazine.cr" +require "./styles.cr" + +module Tartrazine + # This is the base class for all formatters. + abstract class Formatter + property name : String = "" + + def format(text : String, lexer : Lexer, theme : Theme) : String + raise Exception.new("Not implemented") + end + + # ameba:disable Metrics/CyclomaticComplexity + def get_style_defs(theme : Theme) : String + output = String.build do |outp| + theme.styles.each do |token, style| + outp << ".#{token} {" + # These are set or nil + outp << "color: #{style.color};" if style.color + outp << "background-color: #{style.background};" if style.background + outp << "border: 1px solid #{style.border};" if style.border + + # These are true/false/nil + outp << "border: none;" if style.border == false + outp << "font-weight: bold;" if style.bold + outp << "font-weight: 400;" if style.bold == false + outp << "font-style: italic;" if style.italic + outp << "font-style: normal;" if style.italic == false + outp << "text-decoration: underline;" if style.underline + outp << "text-decoration: none;" if style.underline == false + + outp << "}" + end + end + output + end + end + + class Html < Formatter + def format(text : String, lexer : Lexer, theme : Theme) : String + output = String.build do |outp| + outp << "
" + outp << ""
+ lexer.tokenize(text).each do |token|
+ fragment = "#{token[:value]}"
+ outp << fragment
+ end
+ outp << "
"
+ end
+ output
+ end
+
+ # Given a token type, return the CSS class to use.
+ def get_css_class(token, theme)
+ return token if theme.styles.has_key?(token)
+
+ # Themes don't contain information for each specific
+ # token type. However, they may contain information
+ # for a parent style. Worst case, we go to the root
+ # (Background) style.
+ theme.style_parents(token).reverse.find { |parent|
+ theme.styles.has_key?(parent)
+ }
+ end
+ end
+end
+
+lexer = Tartrazine.lexer("crystal")
+theme = Tartrazine.theme("catppuccin-macchiato")
+puts Tartrazine::Html.new.format(File.read(ARGV[0]), lexer, theme)
diff --git a/src/styles.cr b/src/styles.cr
index 5e83105..ea609dd 100644
--- a/src/styles.cr
+++ b/src/styles.cr
@@ -1,6 +1,11 @@
require "xml"
module Tartrazine
+ def self.theme(name : String) : Theme
+ path = File.join("styles", "#{name}.xml")
+ Theme.from_xml(File.read(path))
+ end
+
class Style
# These properties are tri-state.
# true means it's set
@@ -52,11 +57,7 @@ module Tartrazine
return s if s.complete?
# Form the hierarchy of parent styles
- parents = ["Background"]
- parts = token.underscore.split("_").map(&.capitalize)
- parts.each_with_index do |_, i|
- parents << parts[..i].join("")
- end
+ parents = style_parents(token)
s = parents.map do |parent|
styles[parent]
@@ -68,6 +69,15 @@ module Tartrazine
s
end
+ def style_parents(token)
+ parents = ["Background"]
+ parts = token.underscore.split("_").map(&.capitalize)
+ parts.each_with_index do |_, i|
+ parents << parts[..i].join("")
+ end
+ parents
+ end
+
# Load from a Chroma XML file
def self.from_xml(xml : String) : Theme
document = XML.parse(xml)
@@ -100,4 +110,4 @@ module Tartrazine
theme
end
end
-end
\ No newline at end of file
+end
diff --git a/src/tartrazine.cr b/src/tartrazine.cr
index 4aa9daf..aa3bc87 100644
--- a/src/tartrazine.cr
+++ b/src/tartrazine.cr
@@ -6,6 +6,7 @@ require "log"
require "xml"
module Tartrazine
+ extend self
VERSION = "0.1.0"
Log = ::Log.for("tartrazine")
@@ -13,7 +14,7 @@ module Tartrazine
# This implements a lexer for Pygments RegexLexers as expressed
# in Chroma's XML serialization.
#
- # For explanations on what actions, transformers, etc do
+ # For explanations on what actions and states do
# the Pygments documentation is a good place to start.
# https://pygments.org/docs/lexerdevelopment/
@@ -82,7 +83,29 @@ module Tartrazine
pos += 1
end
end
- tokens.reject { |token| token[:value] == "" }
+ Lexer.collapse_tokens(tokens)
+ end
+
+ # Collapse consecutive tokens of the same type for easier comparison
+ # and smaller output
+ def self.collapse_tokens(tokens : Array(Tartrazine::Token)) : Array(Tartrazine::Token)
+ result = [] of Tartrazine::Token
+ tokens = tokens.reject { |token| token[:value] == "" }
+ tokens.each do |token|
+ if result.empty?
+ result << token
+ next
+ end
+ last = result.last
+ if last[:type] == token[:type]
+ new_token = {type: last[:type], value: last[:value] + token[:value]}
+ result.pop
+ result << new_token
+ else
+ result << token
+ end
+ end
+ result
end
# ameba:disable Metrics/CyclomaticComplexity
@@ -149,7 +172,7 @@ module Tartrazine
end
end
- def self.get_lexer(name : String) : Lexer
+ def self.lexer(name : String) : Lexer
Lexer.from_xml(File.read("lexers/#{name}.xml"))
end