mirror of
https://github.com/ralsina/tartrazine.git
synced 2024-11-10 05:22:23 +00:00
Compare commits
No commits in common. "e0f697f1f97cf058d5431d2b513fdff851f3f2e6" and "a2394a7313443dd59135888715f6e7c13117977d" have entirely different histories.
e0f697f1f9
...
a2394a7313
@ -31,9 +31,6 @@ is a subset of Pygments'.
|
|||||||
|
|
||||||
Currently Tartrazine supports ... 241 languages.
|
Currently Tartrazine supports ... 241 languages.
|
||||||
|
|
||||||
It has 332 themes (64 from Chroma, the rest are base16 themes via
|
|
||||||
[Sixteen](https://github.com/ralsina/sixteen)
|
|
||||||
|
|
||||||
## Installation
|
## Installation
|
||||||
|
|
||||||
This will have a CLI tool that can be installed, but it's not
|
This will have a CLI tool that can be installed, but it's not
|
||||||
|
@ -1,24 +0,0 @@
|
|||||||
import sys
|
|
||||||
import string
|
|
||||||
|
|
||||||
# Run it as grep token lexers/* | python scripts/token_abbrevs.py
|
|
||||||
|
|
||||||
|
|
||||||
def abbr(line):
|
|
||||||
return "".join(c for c in line if c in string.ascii_uppercase).lower()
|
|
||||||
|
|
||||||
abbrevs = {}
|
|
||||||
tokens = set([])
|
|
||||||
for line in sys.stdin:
|
|
||||||
if "<token" not in line:
|
|
||||||
continue
|
|
||||||
line = line.strip()
|
|
||||||
line = line.split('<token ',1)[-1]
|
|
||||||
line = line.split('"')[1]
|
|
||||||
abbrevs[line] = abbr(line)
|
|
||||||
tokens.add(line)
|
|
||||||
|
|
||||||
print("Abbreviations: {")
|
|
||||||
for k, v in abbrevs.items():
|
|
||||||
print(f' "{k}" => "{v}",')
|
|
||||||
print("}")
|
|
@ -6,14 +6,11 @@ authors:
|
|||||||
|
|
||||||
targets:
|
targets:
|
||||||
tartrazine:
|
tartrazine:
|
||||||
main: src/main.cr
|
main: src/tartrazine.cr
|
||||||
|
|
||||||
dependencies:
|
dependencies:
|
||||||
base58:
|
base58:
|
||||||
github: crystal-china/base58.cr
|
github: crystal-china/base58.cr
|
||||||
sixteen:
|
|
||||||
github: ralsina/sixteen
|
|
||||||
branch: main
|
|
||||||
|
|
||||||
crystal: ">= 1.13.0"
|
crystal: ">= 1.13.0"
|
||||||
|
|
||||||
|
@ -1,92 +0,0 @@
|
|||||||
module Tartrazine
|
|
||||||
Abbreviations = {
|
|
||||||
"Background" => "b",
|
|
||||||
"Text" => "t",
|
|
||||||
"CommentSingle" => "cs",
|
|
||||||
"CommentSpecial" => "cs",
|
|
||||||
"NameVariable" => "nv",
|
|
||||||
"Keyword" => "k",
|
|
||||||
"NameFunction" => "nf",
|
|
||||||
"Punctuation" => "p",
|
|
||||||
"Operator" => "o",
|
|
||||||
"LiteralNumberInteger" => "lni",
|
|
||||||
"NameBuiltin" => "nb",
|
|
||||||
"Name" => "n",
|
|
||||||
"OperatorWord" => "ow",
|
|
||||||
"LiteralStringSingle" => "lss",
|
|
||||||
"Literal" => "l",
|
|
||||||
"NameClass" => "nc",
|
|
||||||
"CommentMultiline" => "cm",
|
|
||||||
"LiteralStringRegex" => "lsr",
|
|
||||||
"KeywordDeclaration" => "kd",
|
|
||||||
"KeywordConstant" => "kc",
|
|
||||||
"NameOther" => "no",
|
|
||||||
"LiteralNumberFloat" => "lnf",
|
|
||||||
"LiteralNumberHex" => "lnh",
|
|
||||||
"LiteralStringDouble" => "lsd",
|
|
||||||
"KeywordType" => "kt",
|
|
||||||
"NameNamespace" => "nn",
|
|
||||||
"NameAttribute" => "na",
|
|
||||||
"KeywordReserved" => "kr",
|
|
||||||
"CommentPreproc" => "cp",
|
|
||||||
"KeywordNamespace" => "kn",
|
|
||||||
"NameConstant" => "nc",
|
|
||||||
"NameLabel" => "nl",
|
|
||||||
"LiteralString" => "ls",
|
|
||||||
"LiteralStringChar" => "lsc",
|
|
||||||
"TextWhitespace" => "tw",
|
|
||||||
"LiteralStringEscape" => "lse",
|
|
||||||
"LiteralNumber" => "ln",
|
|
||||||
"Other" => "o",
|
|
||||||
"LiteralStringBoolean" => "lsb",
|
|
||||||
"NameProperty" => "np",
|
|
||||||
"Comment" => "c",
|
|
||||||
"NameTag" => "nt",
|
|
||||||
"LiteralStringOther" => "lso",
|
|
||||||
"NameVariableGlobal" => "nvg",
|
|
||||||
"NameBuiltinPseudo" => "nbp",
|
|
||||||
"LiteralNumberBin" => "lnb",
|
|
||||||
"KeywordPseudo" => "kp",
|
|
||||||
"CommentPreprocFile" => "cpf",
|
|
||||||
"LiteralStringAffix" => "lsa",
|
|
||||||
"LiteralStringDelimiter" => "lsd",
|
|
||||||
"LiteralNumberOct" => "lno",
|
|
||||||
"Error" => "e",
|
|
||||||
"Generic" => "g",
|
|
||||||
"LiteralNumberIntegerLong" => "lnil",
|
|
||||||
"NameDecorator" => "nd",
|
|
||||||
"LiteralStringInterpol" => "lsi",
|
|
||||||
"LiteralStringBacktick" => "lsb",
|
|
||||||
"GenericPrompt" => "gp",
|
|
||||||
"GenericOutput" => "go",
|
|
||||||
"LiteralStringName" => "lsn",
|
|
||||||
"LiteralStringHeredoc" => "lsh",
|
|
||||||
"LiteralStringSymbol" => "lss",
|
|
||||||
"NameVariableInstance" => "nvi",
|
|
||||||
"LiteralOther" => "lo",
|
|
||||||
"NameVariableClass" => "nvc",
|
|
||||||
"NameOperator" => "no",
|
|
||||||
"None" => "n",
|
|
||||||
"LiteralStringDoc" => "lsd",
|
|
||||||
"NameException" => "ne",
|
|
||||||
"GenericSubheading" => "gs",
|
|
||||||
"GenericStrong" => "gs",
|
|
||||||
"GenericDeleted" => "gd",
|
|
||||||
"GenericInserted" => "gi",
|
|
||||||
"GenericHeading" => "gh",
|
|
||||||
"NameEntity" => "ne",
|
|
||||||
"NamePseudo" => "np",
|
|
||||||
"CommentHashbang" => "ch",
|
|
||||||
"TextPunctuation" => "tp",
|
|
||||||
"NameVariableAnonymous" => "nva",
|
|
||||||
"NameVariableMagic" => "nvm",
|
|
||||||
"NameFunctionMagic" => "nfm",
|
|
||||||
"GenericEmph" => "ge",
|
|
||||||
"GenericUnderline" => "gu",
|
|
||||||
"LiteralStringAtom" => "lsa",
|
|
||||||
"LiteralDate" => "ld",
|
|
||||||
"GenericError" => "ge",
|
|
||||||
"TextSymbol" => "ts",
|
|
||||||
"NameKeyword" => "nk",
|
|
||||||
}
|
|
||||||
end
|
|
@ -1,6 +1,5 @@
|
|||||||
require "./constants.cr"
|
|
||||||
require "./styles.cr"
|
|
||||||
require "./tartrazine.cr"
|
require "./tartrazine.cr"
|
||||||
|
require "./styles.cr"
|
||||||
|
|
||||||
module Tartrazine
|
module Tartrazine
|
||||||
# This is the base class for all formatters.
|
# This is the base class for all formatters.
|
||||||
@ -15,7 +14,7 @@ module Tartrazine
|
|||||||
def get_style_defs(theme : Theme) : String
|
def get_style_defs(theme : Theme) : String
|
||||||
output = String.build do |outp|
|
output = String.build do |outp|
|
||||||
theme.styles.each do |token, style|
|
theme.styles.each do |token, style|
|
||||||
outp << ".#{get_css_class(token, theme)} {"
|
outp << ".#{token} {"
|
||||||
# These are set or nil
|
# These are set or nil
|
||||||
outp << "color: #{style.color};" if style.color
|
outp << "color: #{style.color};" if style.color
|
||||||
outp << "background-color: #{style.background};" if style.background
|
outp << "background-color: #{style.background};" if style.background
|
||||||
@ -43,7 +42,7 @@ module Tartrazine
|
|||||||
outp << "<html><head><style>"
|
outp << "<html><head><style>"
|
||||||
outp << get_style_defs(theme)
|
outp << get_style_defs(theme)
|
||||||
outp << "</style></head><body>"
|
outp << "</style></head><body>"
|
||||||
outp << "<pre class=\"#{get_css_class("Background", theme)}\"><code class=\"#{get_css_class("Background", theme)}\">"
|
outp << "<pre class=\"Background\"><code class=\"Background\">"
|
||||||
lexer.tokenize(text).each do |token|
|
lexer.tokenize(text).each do |token|
|
||||||
fragment = "<span class=\"#{get_css_class(token[:type], theme)}\">#{token[:value]}</span>"
|
fragment = "<span class=\"#{get_css_class(token[:type], theme)}\">#{token[:value]}</span>"
|
||||||
outp << fragment
|
outp << fragment
|
||||||
@ -55,15 +54,19 @@ module Tartrazine
|
|||||||
|
|
||||||
# Given a token type, return the CSS class to use.
|
# Given a token type, return the CSS class to use.
|
||||||
def get_css_class(token, theme)
|
def get_css_class(token, theme)
|
||||||
return Abbreviations[token] if theme.styles.has_key?(token)
|
return token if theme.styles.has_key?(token)
|
||||||
|
|
||||||
# Themes don't contain information for each specific
|
# Themes don't contain information for each specific
|
||||||
# token type. However, they may contain information
|
# token type. However, they may contain information
|
||||||
# for a parent style. Worst case, we go to the root
|
# for a parent style. Worst case, we go to the root
|
||||||
# (Background) style.
|
# (Background) style.
|
||||||
Abbreviations[theme.style_parents(token).reverse.find { |parent|
|
theme.style_parents(token).reverse.find { |parent|
|
||||||
theme.styles.has_key?(parent)
|
theme.styles.has_key?(parent)
|
||||||
}]
|
}
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
lexer = Tartrazine.lexer("crystal")
|
||||||
|
theme = Tartrazine.theme("catppuccin-macchiato")
|
||||||
|
puts Tartrazine::Html.new.format(File.read(ARGV[0]), lexer, theme)
|
||||||
|
@ -1,5 +0,0 @@
|
|||||||
require "./**"
|
|
||||||
|
|
||||||
lexer = Tartrazine.lexer("crystal")
|
|
||||||
theme = Tartrazine.theme(ARGV[1])
|
|
||||||
puts Tartrazine::Html.new.format(File.read(ARGV[0]), lexer, theme)
|
|
32
src/rules.cr
32
src/rules.cr
@ -5,19 +5,18 @@ require "./actions"
|
|||||||
# state of the lexer.
|
# state of the lexer.
|
||||||
module Tartrazine
|
module Tartrazine
|
||||||
# This rule matches via a regex pattern
|
# This rule matches via a regex pattern
|
||||||
|
|
||||||
class Rule
|
class Rule
|
||||||
property pattern : Regex = Re2.new ""
|
property pattern : Regex = Re2.new ""
|
||||||
property actions : Array(Action) = [] of Action
|
property actions : Array(Action) = [] of Action
|
||||||
property xml : String = "foo"
|
property xml : String = "foo"
|
||||||
|
|
||||||
def match(text, pos, lexer) : Tuple(Bool, Int32, Array(Token))
|
def match(text, pos, lexer) : Tuple(Bool, Int32, Array(Token))
|
||||||
|
tokens = [] of Token
|
||||||
match = pattern.match(text, pos)
|
match = pattern.match(text, pos)
|
||||||
# We don't match if the match doesn't move the cursor
|
# We don't match if the match doesn't move the cursor
|
||||||
# because that causes infinite loops
|
# because that causes infinite loops
|
||||||
|
Log.trace { "#{match}, #{pattern.inspect}, #{text}, #{pos}" }
|
||||||
return false, pos, [] of Token if match.nil? || match.end == 0
|
return false, pos, [] of Token if match.nil? || match.end == 0
|
||||||
# Log.trace { "#{match}, #{pattern.inspect}, #{text}, #{pos}" }
|
|
||||||
tokens = [] of Token
|
|
||||||
# Emit the tokens
|
# Emit the tokens
|
||||||
actions.each do |action|
|
actions.each do |action|
|
||||||
# Emit the token
|
# Emit the token
|
||||||
@ -29,12 +28,7 @@ module Tartrazine
|
|||||||
|
|
||||||
def initialize(node : XML::Node, multiline, dotall, ignorecase)
|
def initialize(node : XML::Node, multiline, dotall, ignorecase)
|
||||||
@xml = node.to_s
|
@xml = node.to_s
|
||||||
@pattern = Re2.new(
|
@pattern = Re2.new(node["pattern"], multiline, dotall, ignorecase)
|
||||||
node["pattern"],
|
|
||||||
multiline,
|
|
||||||
dotall,
|
|
||||||
ignorecase,
|
|
||||||
anchored: true)
|
|
||||||
add_actions(node)
|
add_actions(node)
|
||||||
end
|
end
|
||||||
|
|
||||||
@ -86,24 +80,4 @@ module Tartrazine
|
|||||||
add_actions(node)
|
add_actions(node)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
# This is a hack to workaround that Crystal seems to disallow
|
|
||||||
# having regexes multiline but not dot_all
|
|
||||||
class Re2 < Regex
|
|
||||||
@source = "fa"
|
|
||||||
@options = Regex::Options::None
|
|
||||||
@jit = true
|
|
||||||
|
|
||||||
def initialize(pattern : String, multiline = false, dotall = false, ignorecase = false, anchored = false)
|
|
||||||
flags = LibPCRE2::UTF | LibPCRE2::DUPNAMES |
|
|
||||||
LibPCRE2::UCP
|
|
||||||
flags |= LibPCRE2::MULTILINE if multiline
|
|
||||||
flags |= LibPCRE2::DOTALL if dotall
|
|
||||||
flags |= LibPCRE2::CASELESS if ignorecase
|
|
||||||
flags |= LibPCRE2::ANCHORED if anchored
|
|
||||||
@re = Regex::PCRE2.compile(pattern, flags) do |error_message|
|
|
||||||
raise Exception.new(error_message)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
@ -1,9 +1,7 @@
|
|||||||
require "sixteen"
|
|
||||||
require "xml"
|
require "xml"
|
||||||
|
|
||||||
module Tartrazine
|
module Tartrazine
|
||||||
def self.theme(name : String) : Theme
|
def self.theme(name : String) : Theme
|
||||||
return Theme.from_base16(name[7..]) if name.starts_with? "base16_"
|
|
||||||
path = File.join("styles", "#{name}.xml")
|
path = File.join("styles", "#{name}.xml")
|
||||||
Theme.from_xml(File.read(path))
|
Theme.from_xml(File.read(path))
|
||||||
end
|
end
|
||||||
@ -29,9 +27,6 @@ module Tartrazine
|
|||||||
# anything
|
# anything
|
||||||
property? complete : Bool = false
|
property? complete : Bool = false
|
||||||
|
|
||||||
def initialize(@color = nil, @background = nil, @border = nil, @bold = nil, @italic = nil, @underline = nil)
|
|
||||||
end
|
|
||||||
|
|
||||||
macro merge_prop(prop)
|
macro merge_prop(prop)
|
||||||
new.{{prop}} = other.{{prop}}.nil? ? self.{{prop}} : other.{{prop}}
|
new.{{prop}} = other.{{prop}}.nil? ? self.{{prop}} : other.{{prop}}
|
||||||
end
|
end
|
||||||
@ -83,44 +78,6 @@ module Tartrazine
|
|||||||
parents
|
parents
|
||||||
end
|
end
|
||||||
|
|
||||||
# Load from a base16 theme name using Sixteen
|
|
||||||
def self.from_base16(name : String) : Theme
|
|
||||||
t = Sixteen.theme(name)
|
|
||||||
theme = Theme.new
|
|
||||||
theme.name = name
|
|
||||||
# The color assignments are adapted from
|
|
||||||
# https://github.com/mohd-akram/base16-pygments/
|
|
||||||
|
|
||||||
theme.styles["Background"] = Style.new(color: t.palette["base05"], background: t.palette["base00"])
|
|
||||||
theme.styles["Text"] = Style.new(color: t.palette["base05"])
|
|
||||||
theme.styles["Error"] = Style.new(color: t.palette["base08"])
|
|
||||||
theme.styles["Comment"] = Style.new(color: t.palette["base03"])
|
|
||||||
theme.styles["CommentPreproc"] = Style.new(color: t.palette["base0F"])
|
|
||||||
theme.styles["CommentPreprocFile"] = Style.new(color: t.palette["base0B"])
|
|
||||||
theme.styles["Keyword"] = Style.new(color: t.palette["base0E"])
|
|
||||||
theme.styles["KeywordType"] = Style.new(color: t.palette["base08"])
|
|
||||||
theme.styles["NameAttribute"] = Style.new(color: t.palette["base0D"])
|
|
||||||
theme.styles["NameBuiltin"] = Style.new(color: t.palette["base08"])
|
|
||||||
theme.styles["NameBuiltinPseudo"] = Style.new(color: t.palette["base08"])
|
|
||||||
theme.styles["NameClass"] = Style.new(color: t.palette["base0D"])
|
|
||||||
theme.styles["NameConstant"] = Style.new(color: t.palette["base09"])
|
|
||||||
theme.styles["NameDecorator"] = Style.new(color: t.palette["base09"])
|
|
||||||
theme.styles["NameFunction"] = Style.new(color: t.palette["base0D"])
|
|
||||||
theme.styles["NameNamespace"] = Style.new(color: t.palette["base0D"])
|
|
||||||
theme.styles["NameTag"] = Style.new(color: t.palette["base0E"])
|
|
||||||
theme.styles["NameVariable"] = Style.new(color: t.palette["base0D"])
|
|
||||||
theme.styles["NameVariableInstance"] = Style.new(color: t.palette["base08"])
|
|
||||||
theme.styles["LiteralNumber"] = Style.new(color: t.palette["base09"])
|
|
||||||
theme.styles["Operator"] = Style.new(color: t.palette["base0C"])
|
|
||||||
theme.styles["OperatorWord"] = Style.new(color: t.palette["base0E"])
|
|
||||||
theme.styles["Literal"] = Style.new(color: t.palette["base0B"])
|
|
||||||
theme.styles["LiteralString"] = Style.new(color: t.palette["base0B"])
|
|
||||||
theme.styles["LiteralStringInterpol"] = Style.new(color: t.palette["base0F"])
|
|
||||||
theme.styles["LiteralStringRegex"] = Style.new(color: t.palette["base0C"])
|
|
||||||
theme.styles["LiteralStringSymbol"] = Style.new(color: t.palette["base09"])
|
|
||||||
theme
|
|
||||||
end
|
|
||||||
|
|
||||||
# Load from a Chroma XML file
|
# Load from a Chroma XML file
|
||||||
def self.from_xml(xml : String) : Theme
|
def self.from_xml(xml : String) : Theme
|
||||||
document = XML.parse(xml)
|
document = XML.parse(xml)
|
||||||
|
@ -54,36 +54,25 @@ module Tartrazine
|
|||||||
|
|
||||||
property state_stack = ["root"]
|
property state_stack = ["root"]
|
||||||
|
|
||||||
# Turn the text into a list of tokens. The `usingself` parameter
|
# Turn the text into a list of tokens.
|
||||||
# is true when the lexer is being used to tokenize a string
|
|
||||||
# from a larger text that is already being tokenized.
|
|
||||||
# So, when it's true, we don't modify the text.
|
|
||||||
def tokenize(text, usingself = false) : Array(Token)
|
def tokenize(text, usingself = false) : Array(Token)
|
||||||
@state_stack = ["root"]
|
@state_stack = ["root"]
|
||||||
tokens = [] of Token
|
tokens = [] of Token
|
||||||
pos = 0
|
pos = 0
|
||||||
matched = false
|
matched = false
|
||||||
time = 0
|
|
||||||
count = 0
|
|
||||||
|
|
||||||
# Respect the `ensure_nl` config option
|
|
||||||
if text.size > 0 && text[-1] != '\n' && config[:ensure_nl] && !usingself
|
if text.size > 0 && text[-1] != '\n' && config[:ensure_nl] && !usingself
|
||||||
text += "\n"
|
text += "\n"
|
||||||
end
|
end
|
||||||
|
|
||||||
# Loop through the text, applying rules
|
|
||||||
while pos < text.size
|
while pos < text.size
|
||||||
state = states[@state_stack.last]
|
state = states[@state_stack.last]
|
||||||
Log.trace { "Stack is #{@state_stack} State is #{state.name}, pos is #{pos}, text is #{text[pos..pos + 10]}" }
|
Log.trace { "Stack is #{@state_stack} State is #{state.name}, pos is #{pos}, text is #{text[pos..pos + 10]}" }
|
||||||
state.rules.each do |rule|
|
state.rules.each do |rule|
|
||||||
matched, new_pos, new_tokens = rule.match(text, pos, self)
|
matched, new_pos, new_tokens = rule.match(text, pos, self)
|
||||||
if matched
|
if matched
|
||||||
# Move position forward, save the tokens,
|
|
||||||
# tokenize from the new position
|
|
||||||
Log.trace { "MATCHED: #{rule.xml}" }
|
Log.trace { "MATCHED: #{rule.xml}" }
|
||||||
pos = new_pos
|
pos = new_pos
|
||||||
tokens += new_tokens
|
tokens += new_tokens
|
||||||
break
|
break # We go back to processing with current state
|
||||||
end
|
end
|
||||||
Log.trace { "NOT MATCHED: #{rule.xml}" }
|
Log.trace { "NOT MATCHED: #{rule.xml}" }
|
||||||
end
|
end
|
||||||
@ -186,6 +175,25 @@ module Tartrazine
|
|||||||
def self.lexer(name : String) : Lexer
|
def self.lexer(name : String) : Lexer
|
||||||
Lexer.from_xml(File.read("lexers/#{name}.xml"))
|
Lexer.from_xml(File.read("lexers/#{name}.xml"))
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# This is a hack to workaround that Crystal seems to disallow
|
||||||
|
# having regexes multiline but not dot_all
|
||||||
|
class Re2 < Regex
|
||||||
|
@source = "fa"
|
||||||
|
@options = Regex::Options::None
|
||||||
|
@jit = true
|
||||||
|
|
||||||
|
def initialize(pattern : String, multiline = false, dotall = false, ignorecase = false)
|
||||||
|
flags = LibPCRE2::UTF | LibPCRE2::DUPNAMES |
|
||||||
|
LibPCRE2::UCP | LibPCRE2::ANCHORED
|
||||||
|
flags |= LibPCRE2::MULTILINE if multiline
|
||||||
|
flags |= LibPCRE2::DOTALL if dotall
|
||||||
|
flags |= LibPCRE2::CASELESS if ignorecase
|
||||||
|
@re = Regex::PCRE2.compile(pattern, flags) do |error_message|
|
||||||
|
raise Exception.new(error_message)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
# Convenience macros to parse XML
|
# Convenience macros to parse XML
|
||||||
|
Loading…
Reference in New Issue
Block a user