This commit is contained in:
Roberto Alsina 2024-08-06 17:01:14 -03:00
parent fe917da89d
commit a2394a7313
6 changed files with 133 additions and 42 deletions

View File

@ -4,10 +4,11 @@ Tartrazine is a library to syntax-highlight code. It is
a port of [Pygments](https://pygments.org/) to
[Crystal](https://crystal-lang.org/). Kind of.
It's not currently usable unless what you need is a way
to turn your files into a pile of json describing its
constituent tokens, because I have not implemented any
formatters, yet, only the part that parses the code (the lexers).
It's not currently usable because it's not finished, but:
* The lexers work for the implemented languages
* The provided styles work
* There is a very very simple HTML formatter
# A port of what? Why "kind of"?
@ -32,15 +33,21 @@ Currently Tartrazine supports ... 241 languages.
## Installation
If you need to ask how to install this, it's not ready for you yet.
This will have a CLI tool that can be installed, but it's not
there yet.
## Usage
If you need to ask how to use this, it's not ready for you yet.
This works:
## Development
```crystal
require "tartrazine"
TODO: Write development instructions here
lexer = Tartrazine.lexer("crystal")
theme = Tartrazine.theme("catppuccin-macchiato")
puts Tartrazine::Html.new.format(File.read(ARGV[0]), lexer, theme)
```
## Contributing

View File

@ -74,8 +74,8 @@ end
# Helper that creates lexer and tokenizes
def tokenize(lexer_name, text)
lexer = Tartrazine.get_lexer(lexer_name)
collapse_tokens(lexer.tokenize(text))
lexer = Tartrazine.lexer(lexer_name)
lexer.tokenize(text)
end
# Helper that tokenizes using chroma to validate the lexer
@ -87,26 +87,5 @@ def chroma_tokenize(lexer_name, text)
["-f", "json", "-l", lexer_name],
input: input, output: output
)
collapse_tokens(Array(Tartrazine::Token).from_json(output.to_s))
end
# Collapse consecutive tokens of the same type for easier comparison
def collapse_tokens(tokens : Array(Tartrazine::Token))
result = [] of Tartrazine::Token
tokens.each do |token|
if result.empty?
result << token
next
end
last = result.last
if last[:type] == token[:type]
new_token = {type: last[:type], value: last[:value] + token[:value]}
result.pop
result << new_token
else
result << token
end
end
result
Tartrazine::Lexer.collapse_tokens(Array(Tartrazine::Token).from_json(output.to_s))
end

View File

@ -82,7 +82,7 @@ module Tartrazine
return [] of Token if match.nil?
lexer_name = xml["lexer"].downcase
Log.trace { "to tokenize: #{match[match_group]}" }
Tartrazine.get_lexer(lexer_name).tokenize(match[match_group], usingself: true)
Tartrazine.lexer(lexer_name).tokenize(match[match_group], usingself: true)
when "usingself"
# Shunt to another copy of this lexer
return [] of Token if match.nil?

72
src/formatter.cr Normal file
View File

@ -0,0 +1,72 @@
require "./tartrazine.cr"
require "./styles.cr"
module Tartrazine
# This is the base class for all formatters.
abstract class Formatter
property name : String = ""
def format(text : String, lexer : Lexer, theme : Theme) : String
raise Exception.new("Not implemented")
end
# ameba:disable Metrics/CyclomaticComplexity
def get_style_defs(theme : Theme) : String
output = String.build do |outp|
theme.styles.each do |token, style|
outp << ".#{token} {"
# These are set or nil
outp << "color: #{style.color};" if style.color
outp << "background-color: #{style.background};" if style.background
outp << "border: 1px solid #{style.border};" if style.border
# These are true/false/nil
outp << "border: none;" if style.border == false
outp << "font-weight: bold;" if style.bold
outp << "font-weight: 400;" if style.bold == false
outp << "font-style: italic;" if style.italic
outp << "font-style: normal;" if style.italic == false
outp << "text-decoration: underline;" if style.underline
outp << "text-decoration: none;" if style.underline == false
outp << "}"
end
end
output
end
end
class Html < Formatter
def format(text : String, lexer : Lexer, theme : Theme) : String
output = String.build do |outp|
outp << "<html><head><style>"
outp << get_style_defs(theme)
outp << "</style></head><body>"
outp << "<pre class=\"Background\"><code class=\"Background\">"
lexer.tokenize(text).each do |token|
fragment = "<span class=\"#{get_css_class(token[:type], theme)}\">#{token[:value]}</span>"
outp << fragment
end
outp << "</code></pre></body></html>"
end
output
end
# Given a token type, return the CSS class to use.
def get_css_class(token, theme)
return token if theme.styles.has_key?(token)
# Themes don't contain information for each specific
# token type. However, they may contain information
# for a parent style. Worst case, we go to the root
# (Background) style.
theme.style_parents(token).reverse.find { |parent|
theme.styles.has_key?(parent)
}
end
end
end
lexer = Tartrazine.lexer("crystal")
theme = Tartrazine.theme("catppuccin-macchiato")
puts Tartrazine::Html.new.format(File.read(ARGV[0]), lexer, theme)

View File

@ -1,6 +1,11 @@
require "xml"
module Tartrazine
def self.theme(name : String) : Theme
path = File.join("styles", "#{name}.xml")
Theme.from_xml(File.read(path))
end
class Style
# These properties are tri-state.
# true means it's set
@ -52,11 +57,7 @@ module Tartrazine
return s if s.complete?
# Form the hierarchy of parent styles
parents = ["Background"]
parts = token.underscore.split("_").map(&.capitalize)
parts.each_with_index do |_, i|
parents << parts[..i].join("")
end
parents = style_parents(token)
s = parents.map do |parent|
styles[parent]
@ -68,6 +69,15 @@ module Tartrazine
s
end
def style_parents(token)
parents = ["Background"]
parts = token.underscore.split("_").map(&.capitalize)
parts.each_with_index do |_, i|
parents << parts[..i].join("")
end
parents
end
# Load from a Chroma XML file
def self.from_xml(xml : String) : Theme
document = XML.parse(xml)
@ -100,4 +110,4 @@ module Tartrazine
theme
end
end
end
end

View File

@ -6,6 +6,7 @@ require "log"
require "xml"
module Tartrazine
extend self
VERSION = "0.1.0"
Log = ::Log.for("tartrazine")
@ -13,7 +14,7 @@ module Tartrazine
# This implements a lexer for Pygments RegexLexers as expressed
# in Chroma's XML serialization.
#
# For explanations on what actions, transformers, etc do
# For explanations on what actions and states do
# the Pygments documentation is a good place to start.
# https://pygments.org/docs/lexerdevelopment/
@ -82,7 +83,29 @@ module Tartrazine
pos += 1
end
end
tokens.reject { |token| token[:value] == "" }
Lexer.collapse_tokens(tokens)
end
# Collapse consecutive tokens of the same type for easier comparison
# and smaller output
def self.collapse_tokens(tokens : Array(Tartrazine::Token)) : Array(Tartrazine::Token)
result = [] of Tartrazine::Token
tokens = tokens.reject { |token| token[:value] == "" }
tokens.each do |token|
if result.empty?
result << token
next
end
last = result.last
if last[:type] == token[:type]
new_token = {type: last[:type], value: last[:value] + token[:value]}
result.pop
result << new_token
else
result << token
end
end
result
end
# ameba:disable Metrics/CyclomaticComplexity
@ -149,7 +172,7 @@ module Tartrazine
end
end
def self.get_lexer(name : String) : Lexer
def self.lexer(name : String) : Lexer
Lexer.from_xml(File.read("lexers/#{name}.xml"))
end