mirror of
https://github.com/ralsina/tartrazine.git
synced 2024-11-10 05:22:23 +00:00
Compare commits
7 Commits
a2394a7313
...
e0f697f1f9
Author | SHA1 | Date | |
---|---|---|---|
e0f697f1f9 | |||
0c86e91b0b | |||
499cf7f623 | |||
a9ff9bc8ac | |||
82db232511 | |||
420b68993c | |||
94bc221545 |
@ -31,6 +31,9 @@ is a subset of Pygments'.
|
||||
|
||||
Currently Tartrazine supports ... 241 languages.
|
||||
|
||||
It has 332 themes (64 from Chroma, the rest are base16 themes via
|
||||
[Sixteen](https://github.com/ralsina/sixteen)
|
||||
|
||||
## Installation
|
||||
|
||||
This will have a CLI tool that can be installed, but it's not
|
||||
|
24
scripts/token_abbrevs.py
Normal file
24
scripts/token_abbrevs.py
Normal file
@ -0,0 +1,24 @@
|
||||
import sys
|
||||
import string
|
||||
|
||||
# Run it as grep token lexers/* | python scripts/token_abbrevs.py
|
||||
|
||||
|
||||
def abbr(line):
|
||||
return "".join(c for c in line if c in string.ascii_uppercase).lower()
|
||||
|
||||
abbrevs = {}
|
||||
tokens = set([])
|
||||
for line in sys.stdin:
|
||||
if "<token" not in line:
|
||||
continue
|
||||
line = line.strip()
|
||||
line = line.split('<token ',1)[-1]
|
||||
line = line.split('"')[1]
|
||||
abbrevs[line] = abbr(line)
|
||||
tokens.add(line)
|
||||
|
||||
print("Abbreviations: {")
|
||||
for k, v in abbrevs.items():
|
||||
print(f' "{k}" => "{v}",')
|
||||
print("}")
|
@ -6,11 +6,14 @@ authors:
|
||||
|
||||
targets:
|
||||
tartrazine:
|
||||
main: src/tartrazine.cr
|
||||
main: src/main.cr
|
||||
|
||||
dependencies:
|
||||
base58:
|
||||
github: crystal-china/base58.cr
|
||||
sixteen:
|
||||
github: ralsina/sixteen
|
||||
branch: main
|
||||
|
||||
crystal: ">= 1.13.0"
|
||||
|
||||
|
92
src/constants.cr
Normal file
92
src/constants.cr
Normal file
@ -0,0 +1,92 @@
|
||||
module Tartrazine
|
||||
Abbreviations = {
|
||||
"Background" => "b",
|
||||
"Text" => "t",
|
||||
"CommentSingle" => "cs",
|
||||
"CommentSpecial" => "cs",
|
||||
"NameVariable" => "nv",
|
||||
"Keyword" => "k",
|
||||
"NameFunction" => "nf",
|
||||
"Punctuation" => "p",
|
||||
"Operator" => "o",
|
||||
"LiteralNumberInteger" => "lni",
|
||||
"NameBuiltin" => "nb",
|
||||
"Name" => "n",
|
||||
"OperatorWord" => "ow",
|
||||
"LiteralStringSingle" => "lss",
|
||||
"Literal" => "l",
|
||||
"NameClass" => "nc",
|
||||
"CommentMultiline" => "cm",
|
||||
"LiteralStringRegex" => "lsr",
|
||||
"KeywordDeclaration" => "kd",
|
||||
"KeywordConstant" => "kc",
|
||||
"NameOther" => "no",
|
||||
"LiteralNumberFloat" => "lnf",
|
||||
"LiteralNumberHex" => "lnh",
|
||||
"LiteralStringDouble" => "lsd",
|
||||
"KeywordType" => "kt",
|
||||
"NameNamespace" => "nn",
|
||||
"NameAttribute" => "na",
|
||||
"KeywordReserved" => "kr",
|
||||
"CommentPreproc" => "cp",
|
||||
"KeywordNamespace" => "kn",
|
||||
"NameConstant" => "nc",
|
||||
"NameLabel" => "nl",
|
||||
"LiteralString" => "ls",
|
||||
"LiteralStringChar" => "lsc",
|
||||
"TextWhitespace" => "tw",
|
||||
"LiteralStringEscape" => "lse",
|
||||
"LiteralNumber" => "ln",
|
||||
"Other" => "o",
|
||||
"LiteralStringBoolean" => "lsb",
|
||||
"NameProperty" => "np",
|
||||
"Comment" => "c",
|
||||
"NameTag" => "nt",
|
||||
"LiteralStringOther" => "lso",
|
||||
"NameVariableGlobal" => "nvg",
|
||||
"NameBuiltinPseudo" => "nbp",
|
||||
"LiteralNumberBin" => "lnb",
|
||||
"KeywordPseudo" => "kp",
|
||||
"CommentPreprocFile" => "cpf",
|
||||
"LiteralStringAffix" => "lsa",
|
||||
"LiteralStringDelimiter" => "lsd",
|
||||
"LiteralNumberOct" => "lno",
|
||||
"Error" => "e",
|
||||
"Generic" => "g",
|
||||
"LiteralNumberIntegerLong" => "lnil",
|
||||
"NameDecorator" => "nd",
|
||||
"LiteralStringInterpol" => "lsi",
|
||||
"LiteralStringBacktick" => "lsb",
|
||||
"GenericPrompt" => "gp",
|
||||
"GenericOutput" => "go",
|
||||
"LiteralStringName" => "lsn",
|
||||
"LiteralStringHeredoc" => "lsh",
|
||||
"LiteralStringSymbol" => "lss",
|
||||
"NameVariableInstance" => "nvi",
|
||||
"LiteralOther" => "lo",
|
||||
"NameVariableClass" => "nvc",
|
||||
"NameOperator" => "no",
|
||||
"None" => "n",
|
||||
"LiteralStringDoc" => "lsd",
|
||||
"NameException" => "ne",
|
||||
"GenericSubheading" => "gs",
|
||||
"GenericStrong" => "gs",
|
||||
"GenericDeleted" => "gd",
|
||||
"GenericInserted" => "gi",
|
||||
"GenericHeading" => "gh",
|
||||
"NameEntity" => "ne",
|
||||
"NamePseudo" => "np",
|
||||
"CommentHashbang" => "ch",
|
||||
"TextPunctuation" => "tp",
|
||||
"NameVariableAnonymous" => "nva",
|
||||
"NameVariableMagic" => "nvm",
|
||||
"NameFunctionMagic" => "nfm",
|
||||
"GenericEmph" => "ge",
|
||||
"GenericUnderline" => "gu",
|
||||
"LiteralStringAtom" => "lsa",
|
||||
"LiteralDate" => "ld",
|
||||
"GenericError" => "ge",
|
||||
"TextSymbol" => "ts",
|
||||
"NameKeyword" => "nk",
|
||||
}
|
||||
end
|
@ -1,5 +1,6 @@
|
||||
require "./tartrazine.cr"
|
||||
require "./constants.cr"
|
||||
require "./styles.cr"
|
||||
require "./tartrazine.cr"
|
||||
|
||||
module Tartrazine
|
||||
# This is the base class for all formatters.
|
||||
@ -14,7 +15,7 @@ module Tartrazine
|
||||
def get_style_defs(theme : Theme) : String
|
||||
output = String.build do |outp|
|
||||
theme.styles.each do |token, style|
|
||||
outp << ".#{token} {"
|
||||
outp << ".#{get_css_class(token, theme)} {"
|
||||
# These are set or nil
|
||||
outp << "color: #{style.color};" if style.color
|
||||
outp << "background-color: #{style.background};" if style.background
|
||||
@ -42,7 +43,7 @@ module Tartrazine
|
||||
outp << "<html><head><style>"
|
||||
outp << get_style_defs(theme)
|
||||
outp << "</style></head><body>"
|
||||
outp << "<pre class=\"Background\"><code class=\"Background\">"
|
||||
outp << "<pre class=\"#{get_css_class("Background", theme)}\"><code class=\"#{get_css_class("Background", theme)}\">"
|
||||
lexer.tokenize(text).each do |token|
|
||||
fragment = "<span class=\"#{get_css_class(token[:type], theme)}\">#{token[:value]}</span>"
|
||||
outp << fragment
|
||||
@ -54,19 +55,15 @@ module Tartrazine
|
||||
|
||||
# Given a token type, return the CSS class to use.
|
||||
def get_css_class(token, theme)
|
||||
return token if theme.styles.has_key?(token)
|
||||
return Abbreviations[token] if theme.styles.has_key?(token)
|
||||
|
||||
# Themes don't contain information for each specific
|
||||
# token type. However, they may contain information
|
||||
# for a parent style. Worst case, we go to the root
|
||||
# (Background) style.
|
||||
theme.style_parents(token).reverse.find { |parent|
|
||||
Abbreviations[theme.style_parents(token).reverse.find { |parent|
|
||||
theme.styles.has_key?(parent)
|
||||
}
|
||||
}]
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
lexer = Tartrazine.lexer("crystal")
|
||||
theme = Tartrazine.theme("catppuccin-macchiato")
|
||||
puts Tartrazine::Html.new.format(File.read(ARGV[0]), lexer, theme)
|
||||
|
5
src/main.cr
Normal file
5
src/main.cr
Normal file
@ -0,0 +1,5 @@
|
||||
require "./**"
|
||||
|
||||
lexer = Tartrazine.lexer("crystal")
|
||||
theme = Tartrazine.theme(ARGV[1])
|
||||
puts Tartrazine::Html.new.format(File.read(ARGV[0]), lexer, theme)
|
32
src/rules.cr
32
src/rules.cr
@ -5,18 +5,19 @@ require "./actions"
|
||||
# state of the lexer.
|
||||
module Tartrazine
|
||||
# This rule matches via a regex pattern
|
||||
|
||||
class Rule
|
||||
property pattern : Regex = Re2.new ""
|
||||
property actions : Array(Action) = [] of Action
|
||||
property xml : String = "foo"
|
||||
|
||||
def match(text, pos, lexer) : Tuple(Bool, Int32, Array(Token))
|
||||
tokens = [] of Token
|
||||
match = pattern.match(text, pos)
|
||||
# We don't match if the match doesn't move the cursor
|
||||
# because that causes infinite loops
|
||||
Log.trace { "#{match}, #{pattern.inspect}, #{text}, #{pos}" }
|
||||
return false, pos, [] of Token if match.nil? || match.end == 0
|
||||
# Log.trace { "#{match}, #{pattern.inspect}, #{text}, #{pos}" }
|
||||
tokens = [] of Token
|
||||
# Emit the tokens
|
||||
actions.each do |action|
|
||||
# Emit the token
|
||||
@ -28,7 +29,12 @@ module Tartrazine
|
||||
|
||||
def initialize(node : XML::Node, multiline, dotall, ignorecase)
|
||||
@xml = node.to_s
|
||||
@pattern = Re2.new(node["pattern"], multiline, dotall, ignorecase)
|
||||
@pattern = Re2.new(
|
||||
node["pattern"],
|
||||
multiline,
|
||||
dotall,
|
||||
ignorecase,
|
||||
anchored: true)
|
||||
add_actions(node)
|
||||
end
|
||||
|
||||
@ -80,4 +86,24 @@ module Tartrazine
|
||||
add_actions(node)
|
||||
end
|
||||
end
|
||||
|
||||
# This is a hack to workaround that Crystal seems to disallow
|
||||
# having regexes multiline but not dot_all
|
||||
class Re2 < Regex
|
||||
@source = "fa"
|
||||
@options = Regex::Options::None
|
||||
@jit = true
|
||||
|
||||
def initialize(pattern : String, multiline = false, dotall = false, ignorecase = false, anchored = false)
|
||||
flags = LibPCRE2::UTF | LibPCRE2::DUPNAMES |
|
||||
LibPCRE2::UCP
|
||||
flags |= LibPCRE2::MULTILINE if multiline
|
||||
flags |= LibPCRE2::DOTALL if dotall
|
||||
flags |= LibPCRE2::CASELESS if ignorecase
|
||||
flags |= LibPCRE2::ANCHORED if anchored
|
||||
@re = Regex::PCRE2.compile(pattern, flags) do |error_message|
|
||||
raise Exception.new(error_message)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
@ -1,7 +1,9 @@
|
||||
require "sixteen"
|
||||
require "xml"
|
||||
|
||||
module Tartrazine
|
||||
def self.theme(name : String) : Theme
|
||||
return Theme.from_base16(name[7..]) if name.starts_with? "base16_"
|
||||
path = File.join("styles", "#{name}.xml")
|
||||
Theme.from_xml(File.read(path))
|
||||
end
|
||||
@ -27,6 +29,9 @@ module Tartrazine
|
||||
# anything
|
||||
property? complete : Bool = false
|
||||
|
||||
def initialize(@color = nil, @background = nil, @border = nil, @bold = nil, @italic = nil, @underline = nil)
|
||||
end
|
||||
|
||||
macro merge_prop(prop)
|
||||
new.{{prop}} = other.{{prop}}.nil? ? self.{{prop}} : other.{{prop}}
|
||||
end
|
||||
@ -78,6 +83,44 @@ module Tartrazine
|
||||
parents
|
||||
end
|
||||
|
||||
# Load from a base16 theme name using Sixteen
|
||||
def self.from_base16(name : String) : Theme
|
||||
t = Sixteen.theme(name)
|
||||
theme = Theme.new
|
||||
theme.name = name
|
||||
# The color assignments are adapted from
|
||||
# https://github.com/mohd-akram/base16-pygments/
|
||||
|
||||
theme.styles["Background"] = Style.new(color: t.palette["base05"], background: t.palette["base00"])
|
||||
theme.styles["Text"] = Style.new(color: t.palette["base05"])
|
||||
theme.styles["Error"] = Style.new(color: t.palette["base08"])
|
||||
theme.styles["Comment"] = Style.new(color: t.palette["base03"])
|
||||
theme.styles["CommentPreproc"] = Style.new(color: t.palette["base0F"])
|
||||
theme.styles["CommentPreprocFile"] = Style.new(color: t.palette["base0B"])
|
||||
theme.styles["Keyword"] = Style.new(color: t.palette["base0E"])
|
||||
theme.styles["KeywordType"] = Style.new(color: t.palette["base08"])
|
||||
theme.styles["NameAttribute"] = Style.new(color: t.palette["base0D"])
|
||||
theme.styles["NameBuiltin"] = Style.new(color: t.palette["base08"])
|
||||
theme.styles["NameBuiltinPseudo"] = Style.new(color: t.palette["base08"])
|
||||
theme.styles["NameClass"] = Style.new(color: t.palette["base0D"])
|
||||
theme.styles["NameConstant"] = Style.new(color: t.palette["base09"])
|
||||
theme.styles["NameDecorator"] = Style.new(color: t.palette["base09"])
|
||||
theme.styles["NameFunction"] = Style.new(color: t.palette["base0D"])
|
||||
theme.styles["NameNamespace"] = Style.new(color: t.palette["base0D"])
|
||||
theme.styles["NameTag"] = Style.new(color: t.palette["base0E"])
|
||||
theme.styles["NameVariable"] = Style.new(color: t.palette["base0D"])
|
||||
theme.styles["NameVariableInstance"] = Style.new(color: t.palette["base08"])
|
||||
theme.styles["LiteralNumber"] = Style.new(color: t.palette["base09"])
|
||||
theme.styles["Operator"] = Style.new(color: t.palette["base0C"])
|
||||
theme.styles["OperatorWord"] = Style.new(color: t.palette["base0E"])
|
||||
theme.styles["Literal"] = Style.new(color: t.palette["base0B"])
|
||||
theme.styles["LiteralString"] = Style.new(color: t.palette["base0B"])
|
||||
theme.styles["LiteralStringInterpol"] = Style.new(color: t.palette["base0F"])
|
||||
theme.styles["LiteralStringRegex"] = Style.new(color: t.palette["base0C"])
|
||||
theme.styles["LiteralStringSymbol"] = Style.new(color: t.palette["base09"])
|
||||
theme
|
||||
end
|
||||
|
||||
# Load from a Chroma XML file
|
||||
def self.from_xml(xml : String) : Theme
|
||||
document = XML.parse(xml)
|
||||
|
@ -54,25 +54,36 @@ module Tartrazine
|
||||
|
||||
property state_stack = ["root"]
|
||||
|
||||
# Turn the text into a list of tokens.
|
||||
# Turn the text into a list of tokens. The `usingself` parameter
|
||||
# is true when the lexer is being used to tokenize a string
|
||||
# from a larger text that is already being tokenized.
|
||||
# So, when it's true, we don't modify the text.
|
||||
def tokenize(text, usingself = false) : Array(Token)
|
||||
@state_stack = ["root"]
|
||||
tokens = [] of Token
|
||||
pos = 0
|
||||
matched = false
|
||||
time = 0
|
||||
count = 0
|
||||
|
||||
# Respect the `ensure_nl` config option
|
||||
if text.size > 0 && text[-1] != '\n' && config[:ensure_nl] && !usingself
|
||||
text += "\n"
|
||||
end
|
||||
|
||||
# Loop through the text, applying rules
|
||||
while pos < text.size
|
||||
state = states[@state_stack.last]
|
||||
Log.trace { "Stack is #{@state_stack} State is #{state.name}, pos is #{pos}, text is #{text[pos..pos + 10]}" }
|
||||
state.rules.each do |rule|
|
||||
matched, new_pos, new_tokens = rule.match(text, pos, self)
|
||||
if matched
|
||||
# Move position forward, save the tokens,
|
||||
# tokenize from the new position
|
||||
Log.trace { "MATCHED: #{rule.xml}" }
|
||||
pos = new_pos
|
||||
tokens += new_tokens
|
||||
break # We go back to processing with current state
|
||||
break
|
||||
end
|
||||
Log.trace { "NOT MATCHED: #{rule.xml}" }
|
||||
end
|
||||
@ -175,25 +186,6 @@ module Tartrazine
|
||||
def self.lexer(name : String) : Lexer
|
||||
Lexer.from_xml(File.read("lexers/#{name}.xml"))
|
||||
end
|
||||
|
||||
# This is a hack to workaround that Crystal seems to disallow
|
||||
# having regexes multiline but not dot_all
|
||||
class Re2 < Regex
|
||||
@source = "fa"
|
||||
@options = Regex::Options::None
|
||||
@jit = true
|
||||
|
||||
def initialize(pattern : String, multiline = false, dotall = false, ignorecase = false)
|
||||
flags = LibPCRE2::UTF | LibPCRE2::DUPNAMES |
|
||||
LibPCRE2::UCP | LibPCRE2::ANCHORED
|
||||
flags |= LibPCRE2::MULTILINE if multiline
|
||||
flags |= LibPCRE2::DOTALL if dotall
|
||||
flags |= LibPCRE2::CASELESS if ignorecase
|
||||
@re = Regex::PCRE2.compile(pattern, flags) do |error_message|
|
||||
raise Exception.new(error_message)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# Convenience macros to parse XML
|
||||
|
Loading…
Reference in New Issue
Block a user