feat: SVG formatter

This commit is contained in:
Roberto Alsina 2024-09-21 00:17:41 -03:00
parent d26393d8c9
commit 9c70fbf389
10 changed files with 164 additions and 430 deletions

View File

@ -1,9 +1,9 @@
# This configuration file was generated by `ameba --gen-config`
# on 2024-09-11 00:56:14 UTC using Ameba version 1.6.1.
# on 2024-09-21 14:59:30 UTC using Ameba version 1.6.1.
# The point is for the user to remove these configuration records
# one by one as the reported problems are removed from the code base.
# Problems found: 4
# Problems found: 3
# Run `ameba --only Documentation/DocumentationAdmonition` for details
Documentation/DocumentationAdmonition:
Description: Reports documentation admonitions
@ -11,10 +11,24 @@ Documentation/DocumentationAdmonition:
Excluded:
- src/lexer.cr
- src/actions.cr
- spec/examples/crystal/lexer_spec.cr
Admonitions:
- TODO
- FIXME
- BUG
Enabled: true
Severity: Warning
# Problems found: 1
# Run `ameba --only Lint/SpecFilename` for details
Lint/SpecFilename:
Description: Enforces spec filenames to have `_spec` suffix
Excluded:
- spec/examples/crystal/hello.cr
IgnoredDirs:
- spec/support
- spec/fixtures
- spec/data
IgnoredFilenames:
- spec_helper
Enabled: true
Severity: Warning

View File

@ -0,0 +1 @@
puts "Hello Crystal!"

View File

@ -0,0 +1 @@
[{"type":"Text","value":"puts "},{"type":"LiteralString","value":"\"Hello Crystal!\""},{"type":"Text","value":"\n"}]

View File

@ -1,413 +0,0 @@
require "./constants/lexers"
require "./heuristics"
require "baked_file_system"
require "crystal/syntax_highlighter"
module Tartrazine
class LexerFiles
extend BakedFileSystem
bake_folder "../lexers", __DIR__
end
# Get the lexer object for a language name
# FIXME: support mimetypes
def self.lexer(name : String? = nil, filename : String? = nil, mimetype : String? = nil) : BaseLexer
return lexer_by_name(name) if name && name != "autodetect"
return lexer_by_filename(filename) if filename
return lexer_by_mimetype(mimetype) if mimetype
RegexLexer.from_xml(LexerFiles.get("/#{LEXERS_BY_NAME["plaintext"]}.xml").gets_to_end)
end
private def self.lexer_by_mimetype(mimetype : String) : BaseLexer
lexer_file_name = LEXERS_BY_MIMETYPE.fetch(mimetype, nil)
raise Exception.new("Unknown mimetype: #{mimetype}") if lexer_file_name.nil?
RegexLexer.from_xml(LexerFiles.get("/#{lexer_file_name}.xml").gets_to_end)
end
private def self.lexer_by_name(name : String) : BaseLexer
return CrystalLexer.new if name == "crystal"
lexer_file_name = LEXERS_BY_NAME.fetch(name.downcase, nil)
return create_delegating_lexer(name) if lexer_file_name.nil? && name.includes? "+"
raise Exception.new("Unknown lexer: #{name}") if lexer_file_name.nil?
RegexLexer.from_xml(LexerFiles.get("/#{lexer_file_name}.xml").gets_to_end)
end
private def self.lexer_by_filename(filename : String) : BaseLexer
if filename.ends_with?(".cr")
return CrystalLexer.new
end
candidates = Set(String).new
LEXERS_BY_FILENAME.each do |k, v|
candidates += v.to_set if File.match?(k, File.basename(filename))
end
case candidates.size
when 0
lexer_file_name = LEXERS_BY_NAME["plaintext"]
when 1
lexer_file_name = candidates.first
else
lexer_file_name = self.lexer_by_content(filename)
begin
return self.lexer(lexer_file_name)
rescue ex : Exception
raise Exception.new("Multiple lexers match the filename: #{candidates.to_a.join(", ")}, heuristics suggest #{lexer_file_name} but there is no matching lexer.")
end
end
RegexLexer.from_xml(LexerFiles.get("/#{lexer_file_name}.xml").gets_to_end)
end
private def self.lexer_by_content(fname : String) : String?
h = Linguist::Heuristic.from_yaml(LexerFiles.get("/heuristics.yml").gets_to_end)
result = h.run(fname, File.read(fname))
case result
when Nil
raise Exception.new "No lexer found for #{fname}"
when String
result.as(String)
when Array(String)
result.first
end
end
private def self.create_delegating_lexer(name : String) : BaseLexer
language, root = name.split("+", 2)
language_lexer = lexer(language)
root_lexer = lexer(root)
DelegatingLexer.new(language_lexer, root_lexer)
end
# Return a list of all lexers
def self.lexers : Array(String)
LEXERS_BY_NAME.keys.sort!
end
# A token, the output of the tokenizer
alias Token = NamedTuple(type: String, value: String)
abstract class BaseTokenizer
end
class Tokenizer < BaseTokenizer
include Iterator(Token)
property lexer : BaseLexer
property text : Bytes
property pos : Int32 = 0
@dq = Deque(Token).new
property state_stack = ["root"]
def initialize(@lexer : BaseLexer, text : String, secondary = false)
# Respect the `ensure_nl` config option
if text.size > 0 && text[-1] != '\n' && @lexer.config[:ensure_nl] && !secondary
text += "\n"
end
@text = text.to_slice
end
def next : Iterator::Stop | Token
if @dq.size > 0
return @dq.shift
end
if pos == @text.size
return stop
end
matched = false
while @pos < @text.size
@lexer.states[@state_stack.last].rules.each do |rule|
matched, new_pos, new_tokens = rule.match(@text, @pos, self)
if matched
@pos = new_pos
split_tokens(new_tokens).each { |token| @dq << token }
break
end
end
if !matched
if @text[@pos] == 10u8
@dq << {type: "Text", value: "\n"}
@state_stack = ["root"]
else
@dq << {type: "Error", value: String.new(@text[@pos..@pos])}
end
@pos += 1
break
end
end
self.next
end
# If a token contains a newline, split it into two tokens
def split_tokens(tokens : Array(Token)) : Array(Token)
split_tokens = [] of Token
tokens.each do |token|
if token[:value].includes?("\n")
values = token[:value].split("\n")
values.each_with_index do |value, index|
value += "\n" if index < values.size - 1
split_tokens << {type: token[:type], value: value}
end
else
split_tokens << token
end
end
split_tokens
end
end
alias BaseLexer = Lexer
abstract class Lexer
property config = {
name: "",
priority: 0.0,
case_insensitive: false,
dot_all: false,
not_multiline: false,
ensure_nl: false,
}
property states = {} of String => State
def tokenizer(text : String, secondary = false) : BaseTokenizer
Tokenizer.new(self, text, secondary)
end
end
# This implements a lexer for Pygments RegexLexers as expressed
# in Chroma's XML serialization.
#
# For explanations on what actions and states do
# the Pygments documentation is a good place to start.
# https://pygments.org/docs/lexerdevelopment/
class RegexLexer < BaseLexer
# Collapse consecutive tokens of the same type for easier comparison
# and smaller output
def self.collapse_tokens(tokens : Array(Tartrazine::Token)) : Array(Tartrazine::Token)
result = [] of Tartrazine::Token
tokens = tokens.reject { |token| token[:value] == "" }
tokens.each do |token|
if result.empty?
result << token
next
end
last = result.last
if last[:type] == token[:type]
new_token = {type: last[:type], value: last[:value] + token[:value]}
result.pop
result << new_token
else
result << token
end
end
result
end
def self.from_xml(xml : String) : Lexer
l = RegexLexer.new
lexer = XML.parse(xml).first_element_child
if lexer
config = lexer.children.find { |node|
node.name == "config"
}
if config
l.config = {
name: xml_to_s(config, name) || "",
priority: xml_to_f(config, priority) || 0.0,
not_multiline: xml_to_s(config, not_multiline) == "true",
dot_all: xml_to_s(config, dot_all) == "true",
case_insensitive: xml_to_s(config, case_insensitive) == "true",
ensure_nl: xml_to_s(config, ensure_nl) == "true",
}
end
rules = lexer.children.find { |node|
node.name == "rules"
}
if rules
# Rules contains states 🤷
rules.children.select { |node|
node.name == "state"
}.each do |state_node|
state = State.new
state.name = state_node["name"]
if l.states.has_key?(state.name)
raise Exception.new("Duplicate state: #{state.name}")
else
l.states[state.name] = state
end
# And states contain rules 🤷
state_node.children.select { |node|
node.name == "rule"
}.each do |rule_node|
case rule_node["pattern"]?
when nil
if rule_node.first_element_child.try &.name == "include"
rule = IncludeStateRule.new(rule_node)
else
rule = UnconditionalRule.new(rule_node)
end
else
rule = Rule.new(rule_node,
multiline: !l.config[:not_multiline],
dotall: l.config[:dot_all],
ignorecase: l.config[:case_insensitive])
end
state.rules << rule
end
end
end
end
l
end
end
# A lexer that takes two lexers as arguments. A root lexer
# and a language lexer. Everything is scalled using the
# language lexer, afterwards all `Other` tokens are lexed
# using the root lexer.
#
# This is useful for things like template languages, where
# you have Jinja + HTML or Jinja + CSS and so on.
class DelegatingLexer < Lexer
property language_lexer : BaseLexer
property root_lexer : BaseLexer
def initialize(@language_lexer : BaseLexer, @root_lexer : BaseLexer)
end
def tokenizer(text : String, secondary = false) : DelegatingTokenizer
DelegatingTokenizer.new(self, text, secondary)
end
end
# This Tokenizer works with a DelegatingLexer. It first tokenizes
# using the language lexer, and "Other" tokens are tokenized using
# the root lexer.
class DelegatingTokenizer < BaseTokenizer
include Iterator(Token)
@dq = Deque(Token).new
@language_tokenizer : BaseTokenizer
def initialize(@lexer : DelegatingLexer, text : String, secondary = false)
# Respect the `ensure_nl` config option
if text.size > 0 && text[-1] != '\n' && @lexer.config[:ensure_nl] && !secondary
text += "\n"
end
@language_tokenizer = @lexer.language_lexer.tokenizer(text, true)
end
def next : Iterator::Stop | Token
if @dq.size > 0
return @dq.shift
end
token = @language_tokenizer.next
if token.is_a? Iterator::Stop
return stop
elsif token.as(Token).[:type] == "Other"
root_tokenizer = @lexer.root_lexer.tokenizer(token.as(Token).[:value], true)
root_tokenizer.each do |root_token|
@dq << root_token
end
else
@dq << token.as(Token)
end
self.next
end
end
# A Lexer state. A state has a name and a list of rules.
# The state machine has a state stack containing references
# to states to decide which rules to apply.
struct State
property name : String = ""
property rules = [] of BaseRule
def +(other : State)
new_state = State.new
new_state.name = Random.base58(8)
new_state.rules = rules + other.rules
new_state
end
end
class CustomCrystalHighlighter < Crystal::SyntaxHighlighter
@tokens = [] of Token
def render_delimiter(&block)
@tokens << {type: "LiteralString", value: block.call.to_s}
end
def render_interpolation(&block)
@tokens << {type: "LiteralStringInterpol", value: "\#{"}
@tokens << {type: "Text", value: block.call.to_s}
@tokens << {type: "LiteralStringInterpol", value: "}"}
end
def render_string_array(&block)
@tokens << {type: "LiteralString", value: block.call.to_s}
end
# ameba:disable Metrics/CyclomaticComplexity
def render(type : TokenType, value : String)
case type
when .comment?
@tokens << {type: "Comment", value: value}
when .number?
@tokens << {type: "LiteralNumber", value: value}
when .char?
@tokens << {type: "LiteralStringChar", value: value}
when .symbol?
@tokens << {type: "LiteralStringSymbol", value: value}
when .const?
@tokens << {type: "NameConstant", value: value}
when .string?
@tokens << {type: "LiteralString", value: value}
when .ident?
@tokens << {type: "NameVariable", value: value}
when .keyword?, .self?
@tokens << {type: "NameKeyword", value: value}
when .primitive_literal?
@tokens << {type: "Literal", value: value}
when .operator?
@tokens << {type: "Operator", value: value}
when Crystal::SyntaxHighlighter::TokenType::DELIMITED_TOKEN, Crystal::SyntaxHighlighter::TokenType::DELIMITER_START, Crystal::SyntaxHighlighter::TokenType::DELIMITER_END
@tokens << {type: "LiteralString", value: value}
else
@tokens << {type: "Text", value: value}
end
end
end
class CrystalTokenizer < Tartrazine::BaseTokenizer
include Iterator(Token)
@hl = CustomCrystalHighlighter.new
@lexer : BaseLexer
@iter : Iterator(Token)
# delegate next, to: @iter
def initialize(@lexer : BaseLexer, text : String, secondary = false)
# Respect the `ensure_nl` config option
if text.size > 0 && text[-1] != '\n' && @lexer.config[:ensure_nl] && !secondary
text += "\n"
end
# Just do the tokenizing
@hl.highlight(text)
@iter = @hl.@tokens.each
end
def next : Iterator::Stop | Token
@iter.next
end
end
class CrystalLexer < BaseLexer
def tokenizer(text : String, secondary = false) : BaseTokenizer
CrystalTokenizer.new(self, text, secondary)
end
end
end

File diff suppressed because one or more lines are too long

View File

@ -17,12 +17,19 @@ module Tartrazine
end
def format(text : String, lexer : Lexer) : String
raise Exception.new("Not implemented")
outp = String::Builder.new("")
format(text, lexer, outp)
outp.to_s
end
# Return the styles, if the formatter supports it.
def style_defs : String
raise Exception.new("Not implemented")
end
# Is this line in the highlighted ranges?
def highlighted?(line : Int) : Bool
highlight_lines.any?(&.includes?(line))
end
end
end

View File

@ -20,12 +20,6 @@ module Tartrazine
"#{i + 1}".rjust(4).ljust(5)
end
def format(text : String, lexer : BaseLexer) : String
outp = String::Builder.new("")
format(text, lexer, outp)
outp.to_s
end
def format(text : String, lexer : BaseLexer, outp : IO) : Nil
tokenizer = lexer.tokenizer(text)
i = 0

View File

@ -134,10 +134,5 @@ module Tartrazine
end
class_prefix + Abbreviations[token]
end
# Is this line in the highlighted ranges?
def highlighted?(line : Int) : Bool
highlight_lines.any?(&.includes?(line))
end
end
end

129
src/formatters/svg.cr Normal file
View File

@ -0,0 +1,129 @@
require "../constants/token_abbrevs.cr"
require "../formatter"
require "html"
module Tartrazine
def self.to_svg(text : String, language : String,
theme : String = "default-dark",
standalone : Bool = true,
line_numbers : Bool = false) : String
Tartrazine::Svg.new(
theme: Tartrazine.theme(theme),
standalone: standalone,
line_numbers: line_numbers
).format(text, Tartrazine.lexer(name: language))
end
class Svg < Formatter
property highlight_lines : Array(Range(Int32, Int32)) = [] of Range(Int32, Int32)
property line_number_id_prefix : String = "line-"
property line_number_start : Int32 = 1
property tab_width = 8
property? line_numbers : Bool = false
property? linkable_line_numbers : Bool = true
property? standalone : Bool = false
property weight_of_bold : Int32 = 600
property fs : Int32
property ystep : Int32
property theme : Theme
def initialize(@theme : Theme = Tartrazine.theme("default-dark"), *,
@highlight_lines = [] of Range(Int32, Int32),
@class_prefix : String = "",
@line_number_id_prefix = "line-",
@line_number_start = 1,
@tab_width = 8,
@line_numbers : Bool = false,
@linkable_line_numbers : Bool = true,
@standalone : Bool = false,
@weight_of_bold : Int32 = 600,
@font_family : String = "monospace",
@font_size : String = "14px")
if font_size.ends_with? "px"
@fs = font_size[0...-2].to_i
else
@fs = font_size.to_i
end
@ystep = @fs + 5
end
def format(text : String, lexer : BaseLexer, io : IO) : Nil
pre, post = wrap_standalone
io << pre if standalone?
format_text(text, lexer, io)
io << post if standalone?
end
# Wrap text into a full HTML document, including the CSS for the theme
def wrap_standalone
output = String.build do |outp|
outp << %(<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.0//EN" "http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd">
<svg xmlns="http://www.w3.org/2000/svg">
<g font-family="#{self.@font_family}" font-size="#{self.@font_size}">)
end
{output.to_s, "</g></svg>"}
end
private def line_label(i : Int32, x : Int32, y : Int32) : String
line_label = "#{i + 1}".rjust(4).ljust(5)
line_style = highlighted?(i + 1) ? "font-weight=\"#{@weight_of_bold}\"" : ""
line_id = linkable_line_numbers? ? "id=\"#{line_number_id_prefix}#{i + 1}\"" : ""
%(<text #{line_style} #{line_id} x="#{4*ystep}" y="#{y}" text-anchor="end">#{line_label}</text>)
end
def format_text(text : String, lexer : BaseLexer, outp : IO)
x = 0
y = ystep
i = 0
line_x = x
line_x += 5 * ystep if line_numbers?
tokenizer = lexer.tokenizer(text)
outp << line_label(i, x, y) if line_numbers?
outp << %(<text x="#{line_x}" y="#{y}" xml:space="preserve">)
tokenizer.each do |token|
if token[:value].ends_with? "\n"
outp << "<tspan #{get_style(token[:type])}>#{HTML.escape(token[:value][0...-1])}</tspan>"
outp << "</text>"
x = 0
y += ystep
i += 1
outp << line_label(i, x, y) if line_numbers?
outp << %(<text x="#{line_x}" y="#{y}" xml:space="preserve">)
else
outp << "<tspan#{get_style(token[:type])}>#{HTML.escape(token[:value])}</tspan>"
x += token[:value].size * ystep
end
end
outp << "</text>"
end
# Given a token type, return the style.
def get_style(token : String) : String
if !theme.styles.has_key? token
# Themes don't contain information for each specific
# token type. However, they may contain information
# for a parent style. Worst case, we go to the root
# (Background) style.
parent = theme.style_parents(token).reverse.find { |dad|
theme.styles.has_key?(dad)
}
theme.styles[token] = theme.styles[parent]
end
output = String.build do |outp|
style = theme.styles[token]
outp << " fill=\"##{style.color.try &.hex}\"" if style.color
# No support for background color or border in SVG
outp << " font-weight=\"#{@weight_of_bold}\"" if style.bold
outp << " font-weight=\"normal\"" if style.bold == false
outp << " font-style=\"italic\"" if style.italic
outp << " font-style=\"normal\"" if style.italic == false
outp << " text-decoration=\"underline\"" if style.underline
outp << " text-decoration=\"none" if style.underline == false
end
output
end
end
end

View File

@ -11,6 +11,8 @@ Usage:
tartrazine -f html -t theme --css
tartrazine FILE -f terminal [-t theme][-l lexer][--line-numbers]
[-o output]
tartrazine FILE -f svg [-t theme][--standalone][--line-numbers]
[-l lexer][-o output]
tartrazine FILE -f json [-o output]
tartrazine --list-themes
tartrazine --list-lexers
@ -18,7 +20,7 @@ Usage:
tartrazine --version
Options:
-f <formatter> Format to use (html, terminal, json)
-f <formatter> Format to use (html, terminal, json, svg)
-t <theme> Theme to use, see --list-themes [default: default-dark]
-l <lexer> Lexer (language) to use, see --list-lexers. Use more than
one lexer with "+" (e.g. jinja+yaml) [default: autodetect]
@ -71,6 +73,11 @@ if options["-f"]
formatter.theme = theme
when "json"
formatter = Tartrazine::Json.new
when "svg"
formatter = Tartrazine::Svg.new
formatter.standalone = options["--standalone"] != nil
formatter.line_numbers = options["--line-numbers"] != nil
formatter.theme = theme
else
puts "Invalid formatter: #{formatter}"
exit 1