Compare commits

..

7 Commits

9 changed files with 220 additions and 35 deletions

View File

@ -31,6 +31,9 @@ is a subset of Pygments'.
Currently Tartrazine supports ... 241 languages.
It has 332 themes (64 from Chroma, the rest are base16 themes via
[Sixteen](https://github.com/ralsina/sixteen)
## Installation
This will have a CLI tool that can be installed, but it's not

24
scripts/token_abbrevs.py Normal file
View File

@ -0,0 +1,24 @@
import sys
import string
# Run it as grep token lexers/* | python scripts/token_abbrevs.py
def abbr(line):
return "".join(c for c in line if c in string.ascii_uppercase).lower()
abbrevs = {}
tokens = set([])
for line in sys.stdin:
if "<token" not in line:
continue
line = line.strip()
line = line.split('<token ',1)[-1]
line = line.split('"')[1]
abbrevs[line] = abbr(line)
tokens.add(line)
print("Abbreviations: {")
for k, v in abbrevs.items():
print(f' "{k}" => "{v}",')
print("}")

View File

@ -6,11 +6,14 @@ authors:
targets:
tartrazine:
main: src/tartrazine.cr
main: src/main.cr
dependencies:
base58:
github: crystal-china/base58.cr
sixteen:
github: ralsina/sixteen
branch: main
crystal: ">= 1.13.0"

92
src/constants.cr Normal file
View File

@ -0,0 +1,92 @@
module Tartrazine
Abbreviations = {
"Background" => "b",
"Text" => "t",
"CommentSingle" => "cs",
"CommentSpecial" => "cs",
"NameVariable" => "nv",
"Keyword" => "k",
"NameFunction" => "nf",
"Punctuation" => "p",
"Operator" => "o",
"LiteralNumberInteger" => "lni",
"NameBuiltin" => "nb",
"Name" => "n",
"OperatorWord" => "ow",
"LiteralStringSingle" => "lss",
"Literal" => "l",
"NameClass" => "nc",
"CommentMultiline" => "cm",
"LiteralStringRegex" => "lsr",
"KeywordDeclaration" => "kd",
"KeywordConstant" => "kc",
"NameOther" => "no",
"LiteralNumberFloat" => "lnf",
"LiteralNumberHex" => "lnh",
"LiteralStringDouble" => "lsd",
"KeywordType" => "kt",
"NameNamespace" => "nn",
"NameAttribute" => "na",
"KeywordReserved" => "kr",
"CommentPreproc" => "cp",
"KeywordNamespace" => "kn",
"NameConstant" => "nc",
"NameLabel" => "nl",
"LiteralString" => "ls",
"LiteralStringChar" => "lsc",
"TextWhitespace" => "tw",
"LiteralStringEscape" => "lse",
"LiteralNumber" => "ln",
"Other" => "o",
"LiteralStringBoolean" => "lsb",
"NameProperty" => "np",
"Comment" => "c",
"NameTag" => "nt",
"LiteralStringOther" => "lso",
"NameVariableGlobal" => "nvg",
"NameBuiltinPseudo" => "nbp",
"LiteralNumberBin" => "lnb",
"KeywordPseudo" => "kp",
"CommentPreprocFile" => "cpf",
"LiteralStringAffix" => "lsa",
"LiteralStringDelimiter" => "lsd",
"LiteralNumberOct" => "lno",
"Error" => "e",
"Generic" => "g",
"LiteralNumberIntegerLong" => "lnil",
"NameDecorator" => "nd",
"LiteralStringInterpol" => "lsi",
"LiteralStringBacktick" => "lsb",
"GenericPrompt" => "gp",
"GenericOutput" => "go",
"LiteralStringName" => "lsn",
"LiteralStringHeredoc" => "lsh",
"LiteralStringSymbol" => "lss",
"NameVariableInstance" => "nvi",
"LiteralOther" => "lo",
"NameVariableClass" => "nvc",
"NameOperator" => "no",
"None" => "n",
"LiteralStringDoc" => "lsd",
"NameException" => "ne",
"GenericSubheading" => "gs",
"GenericStrong" => "gs",
"GenericDeleted" => "gd",
"GenericInserted" => "gi",
"GenericHeading" => "gh",
"NameEntity" => "ne",
"NamePseudo" => "np",
"CommentHashbang" => "ch",
"TextPunctuation" => "tp",
"NameVariableAnonymous" => "nva",
"NameVariableMagic" => "nvm",
"NameFunctionMagic" => "nfm",
"GenericEmph" => "ge",
"GenericUnderline" => "gu",
"LiteralStringAtom" => "lsa",
"LiteralDate" => "ld",
"GenericError" => "ge",
"TextSymbol" => "ts",
"NameKeyword" => "nk",
}
end

View File

@ -1,5 +1,6 @@
require "./tartrazine.cr"
require "./constants.cr"
require "./styles.cr"
require "./tartrazine.cr"
module Tartrazine
# This is the base class for all formatters.
@ -14,7 +15,7 @@ module Tartrazine
def get_style_defs(theme : Theme) : String
output = String.build do |outp|
theme.styles.each do |token, style|
outp << ".#{token} {"
outp << ".#{get_css_class(token, theme)} {"
# These are set or nil
outp << "color: #{style.color};" if style.color
outp << "background-color: #{style.background};" if style.background
@ -42,7 +43,7 @@ module Tartrazine
outp << "<html><head><style>"
outp << get_style_defs(theme)
outp << "</style></head><body>"
outp << "<pre class=\"Background\"><code class=\"Background\">"
outp << "<pre class=\"#{get_css_class("Background", theme)}\"><code class=\"#{get_css_class("Background", theme)}\">"
lexer.tokenize(text).each do |token|
fragment = "<span class=\"#{get_css_class(token[:type], theme)}\">#{token[:value]}</span>"
outp << fragment
@ -54,19 +55,15 @@ module Tartrazine
# Given a token type, return the CSS class to use.
def get_css_class(token, theme)
return token if theme.styles.has_key?(token)
return Abbreviations[token] if theme.styles.has_key?(token)
# Themes don't contain information for each specific
# token type. However, they may contain information
# for a parent style. Worst case, we go to the root
# (Background) style.
theme.style_parents(token).reverse.find { |parent|
Abbreviations[theme.style_parents(token).reverse.find { |parent|
theme.styles.has_key?(parent)
}
}]
end
end
end
lexer = Tartrazine.lexer("crystal")
theme = Tartrazine.theme("catppuccin-macchiato")
puts Tartrazine::Html.new.format(File.read(ARGV[0]), lexer, theme)

5
src/main.cr Normal file
View File

@ -0,0 +1,5 @@
require "./**"
lexer = Tartrazine.lexer("crystal")
theme = Tartrazine.theme(ARGV[1])
puts Tartrazine::Html.new.format(File.read(ARGV[0]), lexer, theme)

View File

@ -5,18 +5,19 @@ require "./actions"
# state of the lexer.
module Tartrazine
# This rule matches via a regex pattern
class Rule
property pattern : Regex = Re2.new ""
property actions : Array(Action) = [] of Action
property xml : String = "foo"
def match(text, pos, lexer) : Tuple(Bool, Int32, Array(Token))
tokens = [] of Token
match = pattern.match(text, pos)
# We don't match if the match doesn't move the cursor
# because that causes infinite loops
Log.trace { "#{match}, #{pattern.inspect}, #{text}, #{pos}" }
return false, pos, [] of Token if match.nil? || match.end == 0
# Log.trace { "#{match}, #{pattern.inspect}, #{text}, #{pos}" }
tokens = [] of Token
# Emit the tokens
actions.each do |action|
# Emit the token
@ -28,7 +29,12 @@ module Tartrazine
def initialize(node : XML::Node, multiline, dotall, ignorecase)
@xml = node.to_s
@pattern = Re2.new(node["pattern"], multiline, dotall, ignorecase)
@pattern = Re2.new(
node["pattern"],
multiline,
dotall,
ignorecase,
anchored: true)
add_actions(node)
end
@ -80,4 +86,24 @@ module Tartrazine
add_actions(node)
end
end
# This is a hack to workaround that Crystal seems to disallow
# having regexes multiline but not dot_all
class Re2 < Regex
@source = "fa"
@options = Regex::Options::None
@jit = true
def initialize(pattern : String, multiline = false, dotall = false, ignorecase = false, anchored = false)
flags = LibPCRE2::UTF | LibPCRE2::DUPNAMES |
LibPCRE2::UCP
flags |= LibPCRE2::MULTILINE if multiline
flags |= LibPCRE2::DOTALL if dotall
flags |= LibPCRE2::CASELESS if ignorecase
flags |= LibPCRE2::ANCHORED if anchored
@re = Regex::PCRE2.compile(pattern, flags) do |error_message|
raise Exception.new(error_message)
end
end
end
end

View File

@ -1,7 +1,9 @@
require "sixteen"
require "xml"
module Tartrazine
def self.theme(name : String) : Theme
return Theme.from_base16(name[7..]) if name.starts_with? "base16_"
path = File.join("styles", "#{name}.xml")
Theme.from_xml(File.read(path))
end
@ -27,6 +29,9 @@ module Tartrazine
# anything
property? complete : Bool = false
def initialize(@color = nil, @background = nil, @border = nil, @bold = nil, @italic = nil, @underline = nil)
end
macro merge_prop(prop)
new.{{prop}} = other.{{prop}}.nil? ? self.{{prop}} : other.{{prop}}
end
@ -78,6 +83,44 @@ module Tartrazine
parents
end
# Load from a base16 theme name using Sixteen
def self.from_base16(name : String) : Theme
t = Sixteen.theme(name)
theme = Theme.new
theme.name = name
# The color assignments are adapted from
# https://github.com/mohd-akram/base16-pygments/
theme.styles["Background"] = Style.new(color: t.palette["base05"], background: t.palette["base00"])
theme.styles["Text"] = Style.new(color: t.palette["base05"])
theme.styles["Error"] = Style.new(color: t.palette["base08"])
theme.styles["Comment"] = Style.new(color: t.palette["base03"])
theme.styles["CommentPreproc"] = Style.new(color: t.palette["base0F"])
theme.styles["CommentPreprocFile"] = Style.new(color: t.palette["base0B"])
theme.styles["Keyword"] = Style.new(color: t.palette["base0E"])
theme.styles["KeywordType"] = Style.new(color: t.palette["base08"])
theme.styles["NameAttribute"] = Style.new(color: t.palette["base0D"])
theme.styles["NameBuiltin"] = Style.new(color: t.palette["base08"])
theme.styles["NameBuiltinPseudo"] = Style.new(color: t.palette["base08"])
theme.styles["NameClass"] = Style.new(color: t.palette["base0D"])
theme.styles["NameConstant"] = Style.new(color: t.palette["base09"])
theme.styles["NameDecorator"] = Style.new(color: t.palette["base09"])
theme.styles["NameFunction"] = Style.new(color: t.palette["base0D"])
theme.styles["NameNamespace"] = Style.new(color: t.palette["base0D"])
theme.styles["NameTag"] = Style.new(color: t.palette["base0E"])
theme.styles["NameVariable"] = Style.new(color: t.palette["base0D"])
theme.styles["NameVariableInstance"] = Style.new(color: t.palette["base08"])
theme.styles["LiteralNumber"] = Style.new(color: t.palette["base09"])
theme.styles["Operator"] = Style.new(color: t.palette["base0C"])
theme.styles["OperatorWord"] = Style.new(color: t.palette["base0E"])
theme.styles["Literal"] = Style.new(color: t.palette["base0B"])
theme.styles["LiteralString"] = Style.new(color: t.palette["base0B"])
theme.styles["LiteralStringInterpol"] = Style.new(color: t.palette["base0F"])
theme.styles["LiteralStringRegex"] = Style.new(color: t.palette["base0C"])
theme.styles["LiteralStringSymbol"] = Style.new(color: t.palette["base09"])
theme
end
# Load from a Chroma XML file
def self.from_xml(xml : String) : Theme
document = XML.parse(xml)

View File

@ -54,25 +54,36 @@ module Tartrazine
property state_stack = ["root"]
# Turn the text into a list of tokens.
# Turn the text into a list of tokens. The `usingself` parameter
# is true when the lexer is being used to tokenize a string
# from a larger text that is already being tokenized.
# So, when it's true, we don't modify the text.
def tokenize(text, usingself = false) : Array(Token)
@state_stack = ["root"]
tokens = [] of Token
pos = 0
matched = false
time = 0
count = 0
# Respect the `ensure_nl` config option
if text.size > 0 && text[-1] != '\n' && config[:ensure_nl] && !usingself
text += "\n"
end
# Loop through the text, applying rules
while pos < text.size
state = states[@state_stack.last]
Log.trace { "Stack is #{@state_stack} State is #{state.name}, pos is #{pos}, text is #{text[pos..pos + 10]}" }
state.rules.each do |rule|
matched, new_pos, new_tokens = rule.match(text, pos, self)
if matched
# Move position forward, save the tokens,
# tokenize from the new position
Log.trace { "MATCHED: #{rule.xml}" }
pos = new_pos
tokens += new_tokens
break # We go back to processing with current state
break
end
Log.trace { "NOT MATCHED: #{rule.xml}" }
end
@ -175,25 +186,6 @@ module Tartrazine
def self.lexer(name : String) : Lexer
Lexer.from_xml(File.read("lexers/#{name}.xml"))
end
# This is a hack to workaround that Crystal seems to disallow
# having regexes multiline but not dot_all
class Re2 < Regex
@source = "fa"
@options = Regex::Options::None
@jit = true
def initialize(pattern : String, multiline = false, dotall = false, ignorecase = false)
flags = LibPCRE2::UTF | LibPCRE2::DUPNAMES |
LibPCRE2::UCP | LibPCRE2::ANCHORED
flags |= LibPCRE2::MULTILINE if multiline
flags |= LibPCRE2::DOTALL if dotall
flags |= LibPCRE2::CASELESS if ignorecase
@re = Regex::PCRE2.compile(pattern, flags) do |error_message|
raise Exception.new(error_message)
end
end
end
end
# Convenience macros to parse XML