Compare commits

...

17 Commits

Author SHA1 Message Date
84ee7e6934 JSON formatter 2024-08-09 16:58:15 -03:00
89d212b71c Start actual CLI 2024-08-09 16:53:24 -03:00
a92d2501f7 HTML formatter option: wrap_long_lines 2024-08-09 16:20:30 -03:00
6b44bcb5ad HTML formatter option: surrounding_pre 2024-08-09 15:59:49 -03:00
86a5894429 Hack luminance tweaking for creating highlight color (needs a proper implementation) 2024-08-09 14:54:00 -03:00
be12e0f4f1 Sort constants 2024-08-09 14:44:23 -03:00
96dcb7e15e Fix line highlight for non-base16 themes 2024-08-09 14:42:33 -03:00
d1762f477a Fix constants for non-base16 themes 2024-08-09 14:17:24 -03:00
f98f44365f HTML formatter option: line_numbers / highlight_lines 2024-08-09 14:00:42 -03:00
d0c2b1764a HTML formatter option: line_number_start / line_number_id_prefix 2024-08-09 13:28:05 -03:00
e6a292ade0 HTML formatter option: tab_width 2024-08-09 12:29:56 -03:00
4ced996f90 HTML formatter option: class_prefix 2024-08-09 12:21:02 -03:00
fd5af6ba3b Starting to add options to HTML formatter: standalone 2024-08-09 11:57:23 -03:00
47237eecc3 Refactor things into separate files for easier reading 2024-08-09 11:31:18 -03:00
a0ff4e0118 0.1.1 2024-08-09 11:11:17 -03:00
ece3d4163a Bug 2024-08-09 11:03:32 -03:00
3180168261 Added helper files 2024-08-09 10:32:15 -03:00
14 changed files with 562 additions and 369 deletions

15
Dockerfile.static Normal file
View File

@ -0,0 +1,15 @@
FROM --platform=${TARGETPLATFORM:-linux/amd64} alpine:3.20 AS build
RUN apk add --no-cache \
crystal \
shards \
yaml-dev \
yaml-static \
openssl-dev \
openssl-libs-static \
libxml2-dev \
libxml2-static \
zlib-dev \
zlib-static \
xz-dev \
xz-static \
make

View File

@ -36,9 +36,7 @@ It has 332 themes (64 from Chroma, the rest are base16 themes via
## Installation
This will have a CLI tool that can be installed, but it's not
there yet.
This has a CLI but it's not generally usable.
## Usage

View File

@ -1,24 +1,46 @@
import sys
import string
import glob
# Run it as grep token lexers/* | python scripts/token_abbrevs.py
tokens = {"Highlight"}
abbrevs = {"Highlight": "hl"}
def abbr(line):
return "".join(c for c in line if c in string.ascii_uppercase).lower()
abbrevs = {}
tokens = set([])
for line in sys.stdin:
if "<token" not in line:
continue
line = line.strip()
line = line.split('<token ',1)[-1]
line = line.split('"')[1]
abbrevs[line] = abbr(line)
tokens.add(line)
def check_abbrevs():
if len(abbrevs) != len(tokens):
print("Warning: Abbreviations are not unique")
print(len(abbrevs), len(tokens))
sys.exit(1)
print("Abbreviations: {")
# Processes all files in lexers looking for token names
for fname in glob.glob("lexers/*.xml"):
with open(fname) as f:
for line in f:
if "<token" not in line:
continue
line = line.strip()
line = line.split('<token ',1)[-1]
line = line.split('"')[1]
abbrevs[line] = abbr(line)
tokens.add(line)
check_abbrevs()
# Processes all files in styles looking for token names too
for fname in glob.glob("styles/*.xml"):
with open(fname) as f:
for line in f:
if "<entry" not in line:
continue
line = line.strip()
line = line.split('type=',1)[-1]
line = line.split('"')[1]
abbrevs[line] = abbr(line)
tokens.add(line)
check_abbrevs()
print("Abbreviations = {")
for k, v in abbrevs.items():
print(f' "{k}" => "{v}",')
print("}")

View File

@ -1,5 +1,5 @@
name: tartrazine
version: 0.1.0
version: 0.1.1
authors:
- Roberto Alsina <roberto.alsina@gmail.com>
@ -15,6 +15,8 @@ dependencies:
github: crystal-china/base58.cr
sixteen:
github: ralsina/sixteen
docopt:
github: chenkovsky/docopt.cr
crystal: ">= 1.13.0"

View File

@ -1,92 +1,99 @@
module Tartrazine
Abbreviations = {
"Background" => "b",
"Text" => "t",
"CodeLine" => "cl",
"Comment" => "c",
"CommentHashbang" => "ch",
"CommentMultiline" => "cm",
"CommentPreproc" => "cp",
"CommentPreprocFile" => "cpf",
"CommentSingle" => "cs",
"CommentSpecial" => "cs",
"NameVariable" => "nv",
"Keyword" => "k",
"NameFunction" => "nf",
"Punctuation" => "p",
"Operator" => "o",
"LiteralNumberInteger" => "lni",
"NameBuiltin" => "nb",
"Name" => "n",
"OperatorWord" => "ow",
"LiteralStringSingle" => "lss",
"Literal" => "l",
"NameClass" => "nc",
"CommentMultiline" => "cm",
"LiteralStringRegex" => "lsr",
"KeywordDeclaration" => "kd",
"KeywordConstant" => "kc",
"NameOther" => "no",
"LiteralNumberFloat" => "lnf",
"LiteralNumberHex" => "lnh",
"LiteralStringDouble" => "lsd",
"KeywordType" => "kt",
"NameNamespace" => "nn",
"NameAttribute" => "na",
"KeywordReserved" => "kr",
"CommentPreproc" => "cp",
"KeywordNamespace" => "kn",
"NameConstant" => "nc",
"NameLabel" => "nl",
"LiteralString" => "ls",
"LiteralStringChar" => "lsc",
"TextWhitespace" => "tw",
"LiteralStringEscape" => "lse",
"LiteralNumber" => "ln",
"Other" => "o",
"LiteralStringBoolean" => "lsb",
"NameProperty" => "np",
"Comment" => "c",
"NameTag" => "nt",
"LiteralStringOther" => "lso",
"NameVariableGlobal" => "nvg",
"NameBuiltinPseudo" => "nbp",
"LiteralNumberBin" => "lnb",
"KeywordPseudo" => "kp",
"CommentPreprocFile" => "cpf",
"LiteralStringAffix" => "lsa",
"LiteralStringDelimiter" => "lsd",
"LiteralNumberOct" => "lno",
"Error" => "e",
"Generic" => "g",
"LiteralNumberIntegerLong" => "lnil",
"NameDecorator" => "nd",
"LiteralStringInterpol" => "lsi",
"LiteralStringBacktick" => "lsb",
"GenericPrompt" => "gp",
"GenericOutput" => "go",
"LiteralStringName" => "lsn",
"LiteralStringHeredoc" => "lsh",
"LiteralStringSymbol" => "lss",
"NameVariableInstance" => "nvi",
"LiteralOther" => "lo",
"NameVariableClass" => "nvc",
"NameOperator" => "no",
"None" => "n",
"LiteralStringDoc" => "lsd",
"NameException" => "ne",
"GenericSubheading" => "gs",
"GenericStrong" => "gs",
"GenericDeleted" => "gd",
"GenericInserted" => "gi",
"GenericHeading" => "gh",
"NameEntity" => "ne",
"NamePseudo" => "np",
"CommentHashbang" => "ch",
"TextPunctuation" => "tp",
"NameVariableAnonymous" => "nva",
"NameVariableMagic" => "nvm",
"NameFunctionMagic" => "nfm",
"GenericEmph" => "ge",
"GenericUnderline" => "gu",
"LiteralStringAtom" => "lsa",
"LiteralDate" => "ld",
"GenericError" => "ge",
"TextSymbol" => "ts",
"GenericHeading" => "gh",
"GenericInserted" => "gi",
"GenericOutput" => "go",
"GenericPrompt" => "gp",
"GenericStrong" => "gs",
"GenericSubheading" => "gs",
"GenericTraceback" => "gt",
"GenericUnderline" => "gu",
"Keyword" => "k",
"KeywordConstant" => "kc",
"KeywordDeclaration" => "kd",
"KeywordNamespace" => "kn",
"KeywordPseudo" => "kp",
"KeywordReserved" => "kr",
"KeywordType" => "kt",
"LineHighlight" => "lh",
"LineNumbers" => "ln",
"LineNumbersTable" => "lnt",
"LineTable" => "lt",
"LineTableTD" => "lttd",
"Literal" => "l",
"LiteralDate" => "ld",
"LiteralNumber" => "ln",
"LiteralNumberBin" => "lnb",
"LiteralNumberFloat" => "lnf",
"LiteralNumberHex" => "lnh",
"LiteralNumberInteger" => "lni",
"LiteralNumberIntegerLong" => "lnil",
"LiteralNumberOct" => "lno",
"LiteralOther" => "lo",
"LiteralString" => "ls",
"LiteralStringAffix" => "lsa",
"LiteralStringAtom" => "lsa",
"LiteralStringBacktick" => "lsb",
"LiteralStringBoolean" => "lsb",
"LiteralStringChar" => "lsc",
"LiteralStringDelimiter" => "lsd",
"LiteralStringDoc" => "lsd",
"LiteralStringDouble" => "lsd",
"LiteralStringEscape" => "lse",
"LiteralStringHeredoc" => "lsh",
"LiteralStringInterpol" => "lsi",
"LiteralStringName" => "lsn",
"LiteralStringOther" => "lso",
"LiteralStringRegex" => "lsr",
"LiteralStringSingle" => "lss",
"LiteralStringSymbol" => "lss",
"Name" => "n",
"NameAttribute" => "na",
"NameBuiltin" => "nb",
"NameBuiltinPseudo" => "nbp",
"NameClass" => "nc",
"NameConstant" => "nc",
"NameDecorator" => "nd",
"NameEntity" => "ne",
"NameException" => "ne",
"NameFunction" => "nf",
"NameFunctionMagic" => "nfm",
"NameKeyword" => "nk",
"NameLabel" => "nl",
"NameNamespace" => "nn",
"NameOperator" => "no",
"NameOther" => "no",
"NameProperty" => "np",
"NamePseudo" => "np",
"NameTag" => "nt",
"NameVariable" => "nv",
"NameVariableAnonymous" => "nva",
"NameVariableClass" => "nvc",
"NameVariableGlobal" => "nvg",
"NameVariableInstance" => "nvi",
"NameVariableMagic" => "nvm",
"None" => "n",
"Operator" => "o",
"OperatorWord" => "ow",
"Other" => "o",
"Punctuation" => "p",
"Text" => "t",
"TextPunctuation" => "tp",
"TextSymbol" => "ts",
"TextWhitespace" => "tw",
}
end

View File

@ -19,92 +19,4 @@ module Tartrazine
raise Exception.new("Not implemented")
end
end
class Ansi < Formatter
def format(text : String, lexer : Lexer, theme : Theme) : String
output = String.build do |outp|
lexer.tokenize(text).each do |token|
outp << self.colorize(token[:value], token[:type], theme)
end
end
output
end
def colorize(text : String, token : String, theme : Theme) : String
style = theme.styles.fetch(token, nil)
return text if style.nil?
if theme.styles.has_key?(token)
s = theme.styles[token]
else
# Themes don't contain information for each specific
# token type. However, they may contain information
# for a parent style. Worst case, we go to the root
# (Background) style.
s = theme.styles[theme.style_parents(token).reverse.find { |parent|
theme.styles.has_key?(parent)
}]
end
colorized = text.colorize(s.color.try &.colorize)
# Intentionally not setting background color
colorized.mode(:bold) if s.bold
colorized.mode(:italic) if s.italic
colorized.mode(:underline) if s.underline
colorized.to_s
end
end
class Html < Formatter
def format(text : String, lexer : Lexer, theme : Theme) : String
output = String.build do |outp|
outp << "<html><head><style>"
outp << get_style_defs(theme)
outp << "</style></head><body>"
outp << "<pre class=\"#{get_css_class("Background", theme)}\"><code class=\"#{get_css_class("Background", theme)}\">"
lexer.tokenize(text).each do |token|
fragment = "<span class=\"#{get_css_class(token[:type], theme)}\">#{token[:value]}</span>"
outp << fragment
end
outp << "</code></pre></body></html>"
end
output
end
# ameba:disable Metrics/CyclomaticComplexity
def get_style_defs(theme : Theme) : String
output = String.build do |outp|
theme.styles.each do |token, style|
outp << ".#{get_css_class(token, theme)} {"
# These are set or nil
outp << "color: #{style.color.try &.hex};" if style.color
outp << "background-color: #{style.background.try &.hex};" if style.background
outp << "border: 1px solid #{style.border.try &.hex};" if style.border
# These are true/false/nil
outp << "border: none;" if style.border == false
outp << "font-weight: bold;" if style.bold
outp << "font-weight: 400;" if style.bold == false
outp << "font-style: italic;" if style.italic
outp << "font-style: normal;" if style.italic == false
outp << "text-decoration: underline;" if style.underline
outp << "text-decoration: none;" if style.underline == false
outp << "}"
end
end
output
end
# Given a token type, return the CSS class to use.
def get_css_class(token, theme)
return Abbreviations[token] if theme.styles.has_key?(token)
# Themes don't contain information for each specific
# token type. However, they may contain information
# for a parent style. Worst case, we go to the root
# (Background) style.
Abbreviations[theme.style_parents(token).reverse.find { |parent|
theme.styles.has_key?(parent)
}]
end
end
end

37
src/formatters/ansi.cr Normal file
View File

@ -0,0 +1,37 @@
require "../formatter"
module Tartrazine
class Ansi < Formatter
def format(text : String, lexer : Lexer, theme : Theme) : String
output = String.build do |outp|
lexer.tokenize(text).each do |token|
outp << self.colorize(token[:value], token[:type], theme)
end
end
output
end
def colorize(text : String, token : String, theme : Theme) : String
style = theme.styles.fetch(token, nil)
return text if style.nil?
if theme.styles.has_key?(token)
s = theme.styles[token]
else
# Themes don't contain information for each specific
# token type. However, they may contain information
# for a parent style. Worst case, we go to the root
# (Background) style.
s = theme.styles[theme.style_parents(token).reverse.find { |parent|
theme.styles.has_key?(parent)
}]
end
colorized = text.colorize
s.color.try { |col| colorized = colorized.fore(col.colorize) }
# Intentionally not setting background color
colorized.mode(:bold) if s.bold
colorized.mode(:italic) if s.italic
colorized.mode(:underline) if s.underline
colorized.to_s
end
end
end

127
src/formatters/html.cr Normal file
View File

@ -0,0 +1,127 @@
require "../formatter"
module Tartrazine
class Html < Formatter
# property line_number_in_table : Bool = false
# property with_classes : Bool = true
property class_prefix : String = ""
property highlight_lines : Array(Range(Int32, Int32)) = [] of Range(Int32, Int32)
property line_number_id_prefix : String = "line-"
property line_number_start : Int32 = 1
property tab_width = 8
property? line_numbers : Bool = false
property? linkable_line_numbers : Bool = true
property? standalone : Bool = false
property? surrounding_pre : Bool = true
property? wrap_long_lines : Bool = false
def format(text : String, lexer : Lexer, theme : Theme) : String
text = format_text(text, lexer, theme)
if standalone?
text = wrap_standalone(text, theme)
end
text
end
# Wrap text into a full HTML document, including the CSS for the theme
def wrap_standalone(text, theme) : String
output = String.build do |outp|
outp << "<!DOCTYPE html><html><head><style>"
outp << get_style_defs(theme)
outp << "</style></head><body>"
outp << text
outp << "</body></html>"
end
output
end
def format_text(text : String, lexer : Lexer, theme : Theme) : String
lines = group_tokens_in_lines(lexer.tokenize(text))
output = String.build do |outp|
if surrounding_pre?
pre_style= wrap_long_lines? ? "style=\"white-space: pre-wrap; word-break: break-word;\"" : ""
outp << "<pre class=\"#{get_css_class("Background", theme)}\" #{pre_style}>"
end
"<code class=\"#{get_css_class("Background", theme)}\">"
lines.each_with_index(offset: line_number_start - 1) do |line, i|
line_label = line_numbers? ? "#{i + 1}".rjust(4).ljust(5) : ""
line_class = highlighted?(i + 1) ? "class=\"#{get_css_class("LineHighlight", theme)}\"" : ""
line_id = linkable_line_numbers? ? "id=\"#{line_number_id_prefix}#{i + 1}\"" : ""
outp << "<span #{line_id} #{line_class}>#{line_label}</span>"
line.each do |token|
fragment = "<span class=\"#{get_css_class(token[:type], theme)}\">#{token[:value]}</span>"
outp << fragment
end
end
outp << "</code></pre>"
end
output
end
# ameba:disable Metrics/CyclomaticComplexity
def get_style_defs(theme : Theme) : String
output = String.build do |outp|
theme.styles.each do |token, style|
outp << ".#{get_css_class(token, theme)} {"
# These are set or nil
outp << "color: ##{style.color.try &.hex};" if style.color
outp << "background-color: ##{style.background.try &.hex};" if style.background
outp << "border: 1px solid ##{style.border.try &.hex};" if style.border
# These are true/false/nil
outp << "border: none;" if style.border == false
outp << "font-weight: bold;" if style.bold
outp << "font-weight: 400;" if style.bold == false
outp << "font-style: italic;" if style.italic
outp << "font-style: normal;" if style.italic == false
outp << "text-decoration: underline;" if style.underline
outp << "text-decoration: none;" if style.underline == false
outp << "tab-size: #{tab_width};" if token == "Background"
outp << "}"
end
end
output
end
# Given a token type, return the CSS class to use.
def get_css_class(token, theme)
return class_prefix + Abbreviations[token] if theme.styles.has_key?(token)
# Themes don't contain information for each specific
# token type. However, they may contain information
# for a parent style. Worst case, we go to the root
# (Background) style.
class_prefix + Abbreviations[theme.style_parents(token).reverse.find { |parent|
theme.styles.has_key?(parent)
}]
end
def highlighted?(line : Int) : Bool
highlight_lines.any?(&.includes?(line))
end
def group_tokens_in_lines(tokens : Array(Token)) : Array(Array(Token))
split_tokens = [] of Token
tokens.each do |token|
if token[:value].includes?("\n")
values = token[:value].split("\n")
values.each_with_index do |value, index|
value += "\n" if index < values.size - 1
split_tokens << {type: token[:type], value: value}
end
else
split_tokens << token
end
end
lines = [Array(Token).new]
split_tokens.each do |token|
lines.last << token
if token[:value].includes?("\n")
lines << Array(Token).new
end
end
lines
end
end
end

11
src/formatters/json.cr Normal file
View File

@ -0,0 +1,11 @@
require "../formatter"
module Tartrazine
class Json < Formatter
property name = "json"
def format(text : String, lexer : Lexer, _theme : Theme) : String
lexer.tokenize(text).to_json
end
end
end

180
src/lexer.cr Normal file
View File

@ -0,0 +1,180 @@
module Tartrazine
class LexerFiles
extend BakedFileSystem
bake_folder "../lexers", __DIR__
end
# This implements a lexer for Pygments RegexLexers as expressed
# in Chroma's XML serialization.
#
# For explanations on what actions and states do
# the Pygments documentation is a good place to start.
# https://pygments.org/docs/lexerdevelopment/
class Lexer
property config = {
name: "",
aliases: [] of String,
filenames: [] of String,
mime_types: [] of String,
priority: 0.0,
case_insensitive: false,
dot_all: false,
not_multiline: false,
ensure_nl: false,
}
property xml : String = ""
property states = {} of String => State
property state_stack = ["root"]
# Turn the text into a list of tokens. The `usingself` parameter
# is true when the lexer is being used to tokenize a string
# from a larger text that is already being tokenized.
# So, when it's true, we don't modify the text.
def tokenize(text, usingself = false) : Array(Token)
@state_stack = ["root"]
tokens = [] of Token
pos = 0
matched = false
# Respect the `ensure_nl` config option
if text.size > 0 && text[-1] != '\n' && config[:ensure_nl] && !usingself
text += "\n"
end
# Loop through the text, applying rules
while pos < text.size
state = states[@state_stack.last]
# Log.trace { "Stack is #{@state_stack} State is #{state.name}, pos is #{pos}, text is #{text[pos..pos + 10]}" }
state.rules.each do |rule|
matched, new_pos, new_tokens = rule.match(text, pos, self)
if matched
# Move position forward, save the tokens,
# tokenize from the new position
# Log.trace { "MATCHED: #{rule.xml}" }
pos = new_pos
tokens += new_tokens
break
end
# Log.trace { "NOT MATCHED: #{rule.xml}" }
end
# If no rule matches, emit an error token
unless matched
# Log.trace { "Error at #{pos}" }
tokens << {type: "Error", value: "#{text[pos]}"}
pos += 1
end
end
Lexer.collapse_tokens(tokens)
end
# Collapse consecutive tokens of the same type for easier comparison
# and smaller output
def self.collapse_tokens(tokens : Array(Tartrazine::Token)) : Array(Tartrazine::Token)
result = [] of Tartrazine::Token
tokens = tokens.reject { |token| token[:value] == "" }
tokens.each do |token|
if result.empty?
result << token
next
end
last = result.last
if last[:type] == token[:type]
new_token = {type: last[:type], value: last[:value] + token[:value]}
result.pop
result << new_token
else
result << token
end
end
result
end
# ameba:disable Metrics/CyclomaticComplexity
def self.from_xml(xml : String) : Lexer
l = Lexer.new
l.xml = xml
lexer = XML.parse(xml).first_element_child
if lexer
config = lexer.children.find { |node|
node.name == "config"
}
if config
l.config = {
name: xml_to_s(config, name) || "",
aliases: xml_to_a(config, _alias) || [] of String,
filenames: xml_to_a(config, filename) || [] of String,
mime_types: xml_to_a(config, mime_type) || [] of String,
priority: xml_to_f(config, priority) || 0.0,
not_multiline: xml_to_s(config, not_multiline) == "true",
dot_all: xml_to_s(config, dot_all) == "true",
case_insensitive: xml_to_s(config, case_insensitive) == "true",
ensure_nl: xml_to_s(config, ensure_nl) == "true",
}
end
rules = lexer.children.find { |node|
node.name == "rules"
}
if rules
# Rules contains states 🤷
rules.children.select { |node|
node.name == "state"
}.each do |state_node|
state = State.new
state.name = state_node["name"]
if l.states.has_key?(state.name)
raise Exception.new("Duplicate state: #{state.name}")
else
l.states[state.name] = state
end
# And states contain rules 🤷
state_node.children.select { |node|
node.name == "rule"
}.each do |rule_node|
case rule_node["pattern"]?
when nil
if rule_node.first_element_child.try &.name == "include"
rule = IncludeStateRule.new(rule_node)
else
rule = UnconditionalRule.new(rule_node)
end
else
rule = Rule.new(rule_node,
multiline: !l.config[:not_multiline],
dotall: l.config[:dot_all],
ignorecase: l.config[:case_insensitive])
end
state.rules << rule
end
end
end
end
l
end
end
# A Lexer state. A state has a name and a list of rules.
# The state machine has a state stack containing references
# to states to decide which rules to apply.
class State
property name : String = ""
property rules = [] of Rule
def +(other : State)
new_state = State.new
new_state.name = Random.base58(8)
new_state.rules = rules + other.rules
new_state
end
end
# A token, the output of the tokenizer
alias Token = NamedTuple(type: String, value: String)
def self.lexer(name : String) : Lexer
Lexer.from_xml(LexerFiles.get("/#{name}.xml").gets_to_end)
end
end

View File

@ -1,5 +1,34 @@
require "./**"
HELP = <<-HELP
tartrazine: a syntax highlighting tool
Usage:
tartrazine FILE -f html [-t theme][--standalone][--line-numbers]
[-l lexer] [-o output][--css]
tartrazine FILE -f terminal [-t theme][-l lexer][-o output]
tartrazine FILE -f json [-o output]
tartrazine --list-themes
tartrazine --list-lexers
-f <formatter> Format to use (html, terminal, json)
-t <theme> Theme to use (see --list-themes)
-l <lexer> Lexer (language) to use (see --list-lexers)
-o <output> Output file (default: stdout)
--standalone Generate a standalone HTML file
--css Generate a CSS file for the theme
--line-numbers Include line numbers in the output
HELP
lexer = Tartrazine.lexer("crystal")
theme = Tartrazine.theme(ARGV[1])
puts Tartrazine::Html.new.format(File.read(ARGV[0]), lexer, theme)
formatter = Tartrazine::Json.new
# formatter.standalone = true
# formatter.class_prefix = "hl-"
# formatter.line_number_id_prefix = "ln-"
# formatter.line_numbers = true
# formatter.highlight_lines = [3..7, 20..30]
# formatter.linkable_line_numbers = false
# formatter.wrap_long_lines = false
puts formatter.format(File.read(ARGV[0]), lexer, theme)

View File

@ -3,7 +3,7 @@ require "./constants"
require "./formatter"
require "./rules"
require "./styles"
require "./tartrazine"
require "./lexer"
# These are lexer rules. They match with the text being parsed
# and perform actions, either emitting tokens or changing the

View File

@ -11,8 +11,16 @@ module Tartrazine
alias Color = Sixteen::Color
def self.theme(name : String) : Theme
return Theme.from_base16(name[7..]) if name.starts_with? "base16_"
Theme.from_xml(ThemeFiles.get("/#{name}.xml").gets_to_end)
begin
return Theme.from_base16(name)
rescue ex : Exception
raise ex unless ex.message.try &.includes? "Theme not found"
end
begin
return Theme.from_xml(ThemeFiles.get("/#{name}.xml").gets_to_end)
rescue
raise Exception.new("Theme #{name} not found")
end
end
class ThemeFiles
@ -104,6 +112,7 @@ module Tartrazine
# https://github.com/mohd-akram/base16-pygments/
theme.styles["Background"] = Style.new(color: t["base05"], background: t["base00"])
theme.styles["LineHighlight"] = Style.new(color: t["base0D"], background: t["base01"])
theme.styles["Text"] = Style.new(color: t["base05"])
theme.styles["Error"] = Style.new(color: t["base08"])
theme.styles["Comment"] = Style.new(color: t["base03"])
@ -162,7 +171,31 @@ module Tartrazine
theme.styles[node["type"]] = s
end
# We really want a LineHighlight class
if !theme.styles.has_key?("LineHighlight")
theme.styles["LineHighlight"] = Style.new
theme.styles["LineHighlight"].background = make_highlight_color(theme.styles["Background"].background)
end
theme
end
# If the color is dark, make it brighter and viceversa
def self.make_highlight_color(base_color)
# FIXME: do a proper luminance adjustment in the color class
return nil if base_color.nil?
color = Color.new(base_color.hex)
if base_color.light?
color.r = [(base_color.r - 40), 255].min.to_u8
color.g = [(base_color.g - 40), 255].min.to_u8
color.b = [(base_color.b - 40), 255].min.to_u8
else
color.r = [(base_color.r + 40), 255].min.to_u8
color.g = [(base_color.g + 40), 255].min.to_u8
color.b = [(base_color.b + 40), 255].min.to_u8
end
# Bug in color, setting rgb doesn't update hex
color.hex = "#{color.r.to_s(16)}#{color.g.to_s(16)}#{color.b.to_s(16)}"
color
end
end
end

View File

@ -12,189 +12,9 @@ require "xml"
module Tartrazine
extend self
VERSION = "0.1.0"
VERSION = "0.1.1"
Log = ::Log.for("tartrazine")
# This implements a lexer for Pygments RegexLexers as expressed
# in Chroma's XML serialization.
#
# For explanations on what actions and states do
# the Pygments documentation is a good place to start.
# https://pygments.org/docs/lexerdevelopment/
# A Lexer state. A state has a name and a list of rules.
# The state machine has a state stack containing references
# to states to decide which rules to apply.
class State
property name : String = ""
property rules = [] of Rule
def +(other : State)
new_state = State.new
new_state.name = Random.base58(8)
new_state.rules = rules + other.rules
new_state
end
end
class LexerFiles
extend BakedFileSystem
bake_folder "../lexers", __DIR__
end
# A token, the output of the tokenizer
alias Token = NamedTuple(type: String, value: String)
class Lexer
property config = {
name: "",
aliases: [] of String,
filenames: [] of String,
mime_types: [] of String,
priority: 0.0,
case_insensitive: false,
dot_all: false,
not_multiline: false,
ensure_nl: false,
}
property xml : String = ""
property states = {} of String => State
property state_stack = ["root"]
# Turn the text into a list of tokens. The `usingself` parameter
# is true when the lexer is being used to tokenize a string
# from a larger text that is already being tokenized.
# So, when it's true, we don't modify the text.
def tokenize(text, usingself = false) : Array(Token)
@state_stack = ["root"]
tokens = [] of Token
pos = 0
matched = false
# Respect the `ensure_nl` config option
if text.size > 0 && text[-1] != '\n' && config[:ensure_nl] && !usingself
text += "\n"
end
# Loop through the text, applying rules
while pos < text.size
state = states[@state_stack.last]
# Log.trace { "Stack is #{@state_stack} State is #{state.name}, pos is #{pos}, text is #{text[pos..pos + 10]}" }
state.rules.each do |rule|
matched, new_pos, new_tokens = rule.match(text, pos, self)
if matched
# Move position forward, save the tokens,
# tokenize from the new position
# Log.trace { "MATCHED: #{rule.xml}" }
pos = new_pos
tokens += new_tokens
break
end
# Log.trace { "NOT MATCHED: #{rule.xml}" }
end
# If no rule matches, emit an error token
unless matched
# Log.trace { "Error at #{pos}" }
tokens << {type: "Error", value: "#{text[pos]}"}
pos += 1
end
end
Lexer.collapse_tokens(tokens)
end
# Collapse consecutive tokens of the same type for easier comparison
# and smaller output
def self.collapse_tokens(tokens : Array(Tartrazine::Token)) : Array(Tartrazine::Token)
result = [] of Tartrazine::Token
tokens = tokens.reject { |token| token[:value] == "" }
tokens.each do |token|
if result.empty?
result << token
next
end
last = result.last
if last[:type] == token[:type]
new_token = {type: last[:type], value: last[:value] + token[:value]}
result.pop
result << new_token
else
result << token
end
end
result
end
# ameba:disable Metrics/CyclomaticComplexity
def self.from_xml(xml : String) : Lexer
l = Lexer.new
l.xml = xml
lexer = XML.parse(xml).first_element_child
if lexer
config = lexer.children.find { |node|
node.name == "config"
}
if config
l.config = {
name: xml_to_s(config, name) || "",
aliases: xml_to_a(config, _alias) || [] of String,
filenames: xml_to_a(config, filename) || [] of String,
mime_types: xml_to_a(config, mime_type) || [] of String,
priority: xml_to_f(config, priority) || 0.0,
not_multiline: xml_to_s(config, not_multiline) == "true",
dot_all: xml_to_s(config, dot_all) == "true",
case_insensitive: xml_to_s(config, case_insensitive) == "true",
ensure_nl: xml_to_s(config, ensure_nl) == "true",
}
end
rules = lexer.children.find { |node|
node.name == "rules"
}
if rules
# Rules contains states 🤷
rules.children.select { |node|
node.name == "state"
}.each do |state_node|
state = State.new
state.name = state_node["name"]
if l.states.has_key?(state.name)
raise Exception.new("Duplicate state: #{state.name}")
else
l.states[state.name] = state
end
# And states contain rules 🤷
state_node.children.select { |node|
node.name == "rule"
}.each do |rule_node|
case rule_node["pattern"]?
when nil
if rule_node.first_element_child.try &.name == "include"
rule = IncludeStateRule.new(rule_node)
else
rule = UnconditionalRule.new(rule_node)
end
else
rule = Rule.new(rule_node,
multiline: !l.config[:not_multiline],
dotall: l.config[:dot_all],
ignorecase: l.config[:case_insensitive])
end
state.rules << rule
end
end
end
end
l
end
end
def self.lexer(name : String) : Lexer
Lexer.from_xml(LexerFiles.get("/#{name}.xml").gets_to_end)
end
end
# Convenience macros to parse XML