6 Commits

10 changed files with 96 additions and 206 deletions

1
.gitignore vendored
View File

@@ -6,4 +6,3 @@
chroma/ chroma/
pygments/ pygments/
shard.lock shard.lock
.vscode/

View File

@@ -1,7 +0,0 @@
build: $(wildcard src/**/*.cr) $(wildcard lexers/*xml) $(wildcard styles/*xml) shard.yml
shards build -Dstrict_multi_assign -Dno_number_autocast
release: $(wildcard src/**/*.cr) $(wildcard lexers/*xml) $(wildcard styles/*xml) shard.yml
shards build --release
static: $(wildcard src/**/*.cr) $(wildcard lexers/*xml) $(wildcard styles/*xml) shard.yml
shards build --release --static
strip bin/tartrazine

View File

@@ -1,16 +0,0 @@
#!/bin/bash
set -e
docker run --rm --privileged \
multiarch/qemu-user-static \
--reset -p yes
# Build for AMD64
docker build . -f Dockerfile.static -t tartrazine-builder
docker run -ti --rm -v "$PWD":/app --user="$UID" tartrazine-builder /bin/sh -c "cd /app && rm -rf lib shard.lock && make static"
mv bin/tartrazine bin/tartrazine-static-linux-amd64
# Build for ARM64
docker build . -f Dockerfile.static --platform linux/arm64 -t tartrazine-builder
docker run -ti --rm -v "$PWD":/app --platform linux/arm64 --user="$UID" tartrazine-builder /bin/sh -c "cd /app && rm -rf lib shard.lock && make static"
mv bin/tartrazine bin/tartrazine-static-linux-arm64

View File

@@ -9,12 +9,13 @@ targets:
main: src/main.cr main: src/main.cr
dependencies: dependencies:
baked_file_system:
github: schovi/baked_file_system
base58: base58:
github: crystal-china/base58.cr github: crystal-china/base58.cr
sixteen: sixteen:
github: ralsina/sixteen github: ralsina/sixteen
branch: main
cre2:
git: "https://git.ralsina.me/ralsina/cre2.git"
crystal: ">= 1.13.0" crystal: ">= 1.13.0"

View File

@@ -1,9 +1,4 @@
require "./actions" require "xml"
require "./constants"
require "./formatter"
require "./rules"
require "./styles"
require "./tartrazine"
# These are Lexer actions. When a rule matches, it will # These are Lexer actions. When a rule matches, it will
# perform a list of actions. These actions can emit tokens # perform a list of actions. These actions can emit tokens
@@ -31,11 +26,12 @@ module Tartrazine
end end
# ameba:disable Metrics/CyclomaticComplexity # ameba:disable Metrics/CyclomaticComplexity
def emit(match : Regex::MatchData?, lexer : Lexer, match_group = 0) : Array(Token) def emit(match : MatchData,
lexer : Lexer, match_group = 0) : Array(Token)
case type case type
when "token" when "token"
raise Exception.new "Can't have a token without a match" if match.nil? raise Exception.new "Can't have a token without a match" if match.nil? || match[0].size == 0
[Token.new(type: xml["type"], value: match[match_group])] [Token.new(type: xml["type"], value: match[0])]
when "push" when "push"
states_to_push = xml.attributes.select { |attrib| states_to_push = xml.attributes.select { |attrib|
attrib.name == "state" attrib.name == "state"
@@ -68,35 +64,37 @@ module Tartrazine
when "bygroups" when "bygroups"
# FIXME: handle # FIXME: handle
# ><bygroups> # ><bygroups>
# <token type="Punctuation"/> # <token type="Punctuation"/>https://github.com/google/re2/wiki/Syntax
# None # None
# <token type="LiteralStringRegex"/> # <token type="LiteralStringRegex"/>
# #
# where that None means skipping a group # where that None means skipping a group
# #
raise Exception.new "Can't have a token without a match" if match.nil? raise Exception.new "Can't have a bygroups without a match" if match.nil? || match[0].size == 0
# Each group matches an action. If the group match is empty, # Each group matches an action. If the group match is empty,
# the action is skipped. # the action is skipped.
result = [] of Token result = [] of Token
@actions.each_with_index do |e, i| @actions.each_with_index do |e, i|
next if match[i + 1]?.nil? next if match[i].size == 0
result += e.emit(match, lexer, i + 1) result += e.emit(match, lexer, i)
end end
result result
when "using" when "using"
# Shunt to another lexer entirely # Shunt to another lexer entirely
return [] of Token if match.nil? return [] of Token if match.nil? || match[0].size == 0
lexer_name = xml["lexer"].downcase lexer_name = xml["lexer"].downcase
Log.trace { "to tokenize: #{match[match_group]}" } # Log.trace { "to tokenize: #{match[match_group]}" }
Tartrazine.lexer(lexer_name).tokenize(match[match_group], usingself: true) to_tokenize = match[match_group]
Tartrazine.lexer(lexer_name).tokenize(to_tokenize, usingself: true)
when "usingself" when "usingself"
# Shunt to another copy of this lexer # Shunt to another copy of this lexer
return [] of Token if match.nil? return [] of Token if match.nil? || match[0].size == 0
new_lexer = Lexer.from_xml(lexer.xml) new_lexer = Lexer.from_xml(lexer.xml)
Log.trace { "to tokenize: #{match[match_group]}" } # Log.trace { "to tokenize: #{match[match_group]}" }
new_lexer.tokenize(match[match_group], usingself: true) to_tokenize = match[match_group]
new_lexer.tokenize(to_tokenize, usingself: true)
when "combined" when "combined"
# Combine two states into one anonymous state # Combine two states into one anonymous state
states = xml.attributes.select { |attrib| states = xml.attributes.select { |attrib|

View File

@@ -1,10 +1,6 @@
require "./actions" require "./constants.cr"
require "./constants" require "./styles.cr"
require "./formatter" require "./tartrazine.cr"
require "./rules"
require "./styles"
require "./tartrazine"
require "colorize"
module Tartrazine module Tartrazine
# This is the base class for all formatters. # This is the base class for all formatters.
@@ -15,42 +11,30 @@ module Tartrazine
raise Exception.new("Not implemented") raise Exception.new("Not implemented")
end end
# ameba:disable Metrics/CyclomaticComplexity
def get_style_defs(theme : Theme) : String def get_style_defs(theme : Theme) : String
raise Exception.new("Not implemented")
end
end
class Ansi < Formatter
def format(text : String, lexer : Lexer, theme : Theme) : String
output = String.build do |outp| output = String.build do |outp|
lexer.tokenize(text).each do |token| theme.styles.each do |token, style|
outp << self.colorize(token[:value], token[:type], theme) outp << ".#{get_css_class(token, theme)} {"
# These are set or nil
outp << "color: #{style.color};" if style.color
outp << "background-color: #{style.background};" if style.background
outp << "border: 1px solid #{style.border};" if style.border
# These are true/false/nil
outp << "border: none;" if style.border == false
outp << "font-weight: bold;" if style.bold
outp << "font-weight: 400;" if style.bold == false
outp << "font-style: italic;" if style.italic
outp << "font-style: normal;" if style.italic == false
outp << "text-decoration: underline;" if style.underline
outp << "text-decoration: none;" if style.underline == false
outp << "}"
end end
end end
output output
end end
def colorize(text : String, token : String, theme : Theme) : String
style = theme.styles.fetch(token, nil)
return text if style.nil?
if theme.styles.has_key?(token)
s = theme.styles[token]
else
# Themes don't contain information for each specific
# token type. However, they may contain information
# for a parent style. Worst case, we go to the root
# (Background) style.
s = theme.styles[theme.style_parents(token).reverse.find { |parent|
theme.styles.has_key?(parent)
}]
end
colorized = text.colorize(s.color.try &.colorize)
# Intentionally not setting background color
colorized.mode(:bold) if s.bold
colorized.mode(:italic) if s.italic
colorized.mode(:underline) if s.underline
colorized.to_s
end
end end
class Html < Formatter class Html < Formatter
@@ -69,31 +53,6 @@ module Tartrazine
output output
end end
# ameba:disable Metrics/CyclomaticComplexity
def get_style_defs(theme : Theme) : String
output = String.build do |outp|
theme.styles.each do |token, style|
outp << ".#{get_css_class(token, theme)} {"
# These are set or nil
outp << "color: #{style.color.try &.hex};" if style.color
outp << "background-color: #{style.background.try &.hex};" if style.background
outp << "border: 1px solid #{style.border.try &.hex};" if style.border
# These are true/false/nil
outp << "border: none;" if style.border == false
outp << "font-weight: bold;" if style.bold
outp << "font-weight: 400;" if style.bold == false
outp << "font-style: italic;" if style.italic
outp << "font-style: normal;" if style.italic == false
outp << "text-decoration: underline;" if style.underline
outp << "text-decoration: none;" if style.underline == false
outp << "}"
end
end
output
end
# Given a token type, return the CSS class to use. # Given a token type, return the CSS class to use.
def get_css_class(token, theme) def get_css_class(token, theme)
return Abbreviations[token] if theme.styles.has_key?(token) return Abbreviations[token] if theme.styles.has_key?(token)

0
src/re2.cr Normal file
View File

View File

@@ -1,9 +1,5 @@
require "./actions" require "./actions"
require "./constants" # require "cre2"
require "./formatter"
require "./rules"
require "./styles"
require "./tartrazine"
# These are lexer rules. They match with the text being parsed # These are lexer rules. They match with the text being parsed
# and perform actions, either emitting tokens or changing the # and perform actions, either emitting tokens or changing the
@@ -11,8 +7,12 @@ require "./tartrazine"
module Tartrazine module Tartrazine
# This rule matches via a regex pattern # This rule matches via a regex pattern
# alias Regex = CRe2::Regex
# alias MatchData = CRe2::MatchDataLike | Regex::MatchData | Nil
alias MatchData = Regex::MatchData | Nil
class Rule class Rule
property pattern : Regex = Re2.new "" property pattern : Regex = Regex.new ""
property actions : Array(Action) = [] of Action property actions : Array(Action) = [] of Action
property xml : String = "foo" property xml : String = "foo"
@@ -20,7 +20,8 @@ module Tartrazine
match = pattern.match(text, pos) match = pattern.match(text, pos)
# We don't match if the match doesn't move the cursor # We don't match if the match doesn't move the cursor
# because that causes infinite loops # because that causes infinite loops
return false, pos, [] of Token if match.nil? || match.end == 0
return false, pos, [] of Token if match.nil?
# Log.trace { "#{match}, #{pattern.inspect}, #{text}, #{pos}" } # Log.trace { "#{match}, #{pattern.inspect}, #{text}, #{pos}" }
tokens = [] of Token tokens = [] of Token
# Emit the tokens # Emit the tokens
@@ -28,18 +29,17 @@ module Tartrazine
# Emit the token # Emit the token
tokens += action.emit(match, lexer) tokens += action.emit(match, lexer)
end end
Log.trace { "#{xml}, #{match.end}, #{tokens}" } # Log.trace { "#{xml}, #{match.end}, #{tokens}" }
return true, match.end, tokens return true, match[0].size, tokens
end end
def initialize(node : XML::Node, multiline, dotall, ignorecase) def initialize(node : XML::Node, multiline, dotall, ignorecase)
@xml = node.to_s @xml = node.to_s
@pattern = Re2.new( options = Regex::Options::ANCHORED
node["pattern"], options |= Regex::Options::MULTILINE if multiline
multiline, options |= Regex::Options::DOTALL if dotall
dotall, options |= Regex::Options::IGNORE_CASE if ignorecase
ignorecase, @pattern = Regex.new(node["pattern"], options)
anchored: true)
add_actions(node) add_actions(node)
end end
@@ -91,25 +91,4 @@ module Tartrazine
add_actions(node) add_actions(node)
end end
end end
# This is a hack to workaround that Crystal seems to disallow
# having regexes multiline but not dot_all
class Re2 < Regex
@source = "fa"
@options = Regex::Options::None
@jit = true
def initialize(pattern : String, multiline = false, dotall = false, ignorecase = false, anchored = false)
flags = LibPCRE2::UTF | LibPCRE2::DUPNAMES |
LibPCRE2::UCP
flags |= LibPCRE2::MULTILINE if multiline
flags |= LibPCRE2::DOTALL if dotall
flags |= LibPCRE2::CASELESS if ignorecase
flags |= LibPCRE2::ANCHORED if anchored
flags |= LibPCRE2::NO_UTF_CHECK
@re = Regex::PCRE2.compile(pattern, flags) do |error_message|
raise Exception.new(error_message)
end
end
end
end end

View File

@@ -1,23 +1,11 @@
require "./actions"
require "./constants"
require "./formatter"
require "./rules"
require "./styles"
require "./tartrazine"
require "sixteen" require "sixteen"
require "xml" require "xml"
module Tartrazine module Tartrazine
alias Color = Sixteen::Color
def self.theme(name : String) : Theme def self.theme(name : String) : Theme
return Theme.from_base16(name[7..]) if name.starts_with? "base16_" return Theme.from_base16(name[7..]) if name.starts_with? "base16_"
Theme.from_xml(ThemeFiles.get("/#{name}.xml").gets_to_end) path = File.join("styles", "#{name}.xml")
end Theme.from_xml(File.read(path))
class ThemeFiles
extend BakedFileSystem
bake_folder "../styles", __DIR__
end end
class Style class Style
@@ -31,9 +19,9 @@ module Tartrazine
# These properties are either set or nil # These properties are either set or nil
# (inherit from parent style) # (inherit from parent style)
property background : Color? property background : String?
property border : Color? property border : String?
property color : Color? property color : String?
# Styles are incomplete by default and inherit # Styles are incomplete by default and inherit
# from parents. If this is true, this style # from parents. If this is true, this style
@@ -103,33 +91,33 @@ module Tartrazine
# The color assignments are adapted from # The color assignments are adapted from
# https://github.com/mohd-akram/base16-pygments/ # https://github.com/mohd-akram/base16-pygments/
theme.styles["Background"] = Style.new(color: t["base05"], background: t["base00"]) theme.styles["Background"] = Style.new(color: t.palette["base05"], background: t.palette["base00"])
theme.styles["Text"] = Style.new(color: t["base05"]) theme.styles["Text"] = Style.new(color: t.palette["base05"])
theme.styles["Error"] = Style.new(color: t["base08"]) theme.styles["Error"] = Style.new(color: t.palette["base08"])
theme.styles["Comment"] = Style.new(color: t["base03"]) theme.styles["Comment"] = Style.new(color: t.palette["base03"])
theme.styles["CommentPreproc"] = Style.new(color: t["base0F"]) theme.styles["CommentPreproc"] = Style.new(color: t.palette["base0F"])
theme.styles["CommentPreprocFile"] = Style.new(color: t["base0B"]) theme.styles["CommentPreprocFile"] = Style.new(color: t.palette["base0B"])
theme.styles["Keyword"] = Style.new(color: t["base0E"]) theme.styles["Keyword"] = Style.new(color: t.palette["base0E"])
theme.styles["KeywordType"] = Style.new(color: t["base08"]) theme.styles["KeywordType"] = Style.new(color: t.palette["base08"])
theme.styles["NameAttribute"] = Style.new(color: t["base0D"]) theme.styles["NameAttribute"] = Style.new(color: t.palette["base0D"])
theme.styles["NameBuiltin"] = Style.new(color: t["base08"]) theme.styles["NameBuiltin"] = Style.new(color: t.palette["base08"])
theme.styles["NameBuiltinPseudo"] = Style.new(color: t["base08"]) theme.styles["NameBuiltinPseudo"] = Style.new(color: t.palette["base08"])
theme.styles["NameClass"] = Style.new(color: t["base0D"]) theme.styles["NameClass"] = Style.new(color: t.palette["base0D"])
theme.styles["NameConstant"] = Style.new(color: t["base09"]) theme.styles["NameConstant"] = Style.new(color: t.palette["base09"])
theme.styles["NameDecorator"] = Style.new(color: t["base09"]) theme.styles["NameDecorator"] = Style.new(color: t.palette["base09"])
theme.styles["NameFunction"] = Style.new(color: t["base0D"]) theme.styles["NameFunction"] = Style.new(color: t.palette["base0D"])
theme.styles["NameNamespace"] = Style.new(color: t["base0D"]) theme.styles["NameNamespace"] = Style.new(color: t.palette["base0D"])
theme.styles["NameTag"] = Style.new(color: t["base0E"]) theme.styles["NameTag"] = Style.new(color: t.palette["base0E"])
theme.styles["NameVariable"] = Style.new(color: t["base0D"]) theme.styles["NameVariable"] = Style.new(color: t.palette["base0D"])
theme.styles["NameVariableInstance"] = Style.new(color: t["base08"]) theme.styles["NameVariableInstance"] = Style.new(color: t.palette["base08"])
theme.styles["LiteralNumber"] = Style.new(color: t["base09"]) theme.styles["LiteralNumber"] = Style.new(color: t.palette["base09"])
theme.styles["Operator"] = Style.new(color: t["base0C"]) theme.styles["Operator"] = Style.new(color: t.palette["base0C"])
theme.styles["OperatorWord"] = Style.new(color: t["base0E"]) theme.styles["OperatorWord"] = Style.new(color: t.palette["base0E"])
theme.styles["Literal"] = Style.new(color: t["base0B"]) theme.styles["Literal"] = Style.new(color: t.palette["base0B"])
theme.styles["LiteralString"] = Style.new(color: t["base0B"]) theme.styles["LiteralString"] = Style.new(color: t.palette["base0B"])
theme.styles["LiteralStringInterpol"] = Style.new(color: t["base0F"]) theme.styles["LiteralStringInterpol"] = Style.new(color: t.palette["base0F"])
theme.styles["LiteralStringRegex"] = Style.new(color: t["base0C"]) theme.styles["LiteralStringRegex"] = Style.new(color: t.palette["base0C"])
theme.styles["LiteralStringSymbol"] = Style.new(color: t["base09"]) theme.styles["LiteralStringSymbol"] = Style.new(color: t.palette["base09"])
theme theme
end end
@@ -156,9 +144,9 @@ module Tartrazine
s.underline = true if style.includes?("underline") s.underline = true if style.includes?("underline")
s.underline = false if style.includes?("nounderline") s.underline = false if style.includes?("nounderline")
s.color = style.find(&.starts_with?("#")).try { |v| Color.new v.split("#").last } s.color = style.find(&.starts_with?("#")).try &.split("#").last
s.background = style.find(&.starts_with?("bg:#")).try { |v| Color.new v.split("#").last } s.background = style.find(&.starts_with?("bg:#")).try &.split("#").last
s.border = style.find(&.starts_with?("border:#")).try { |v| Color.new v.split("#").last } s.border = style.find(&.starts_with?("border:#")).try &.split("#").last
theme.styles[node["type"]] = s theme.styles[node["type"]] = s
end end

View File

@@ -1,10 +1,5 @@
require "./actions" require "./actions"
require "./constants"
require "./formatter"
require "./rules" require "./rules"
require "./styles"
require "./tartrazine"
require "baked_file_system"
require "base58" require "base58"
require "json" require "json"
require "log" require "log"
@@ -38,12 +33,6 @@ module Tartrazine
end end
end end
class LexerFiles
extend BakedFileSystem
bake_folder "../lexers", __DIR__
end
# A token, the output of the tokenizer # A token, the output of the tokenizer
alias Token = NamedTuple(type: String, value: String) alias Token = NamedTuple(type: String, value: String)
@@ -83,22 +72,22 @@ module Tartrazine
# Loop through the text, applying rules # Loop through the text, applying rules
while pos < text.size while pos < text.size
state = states[@state_stack.last] state = states[@state_stack.last]
# Log.trace { "Stack is #{@state_stack} State is #{state.name}, pos is #{pos}, text is #{text[pos..pos + 10]}" } Log.trace { "Stack is #{@state_stack} State is #{state.name}, pos is #{pos}, text is #{text[pos..pos + 10]}" }
state.rules.each do |rule| state.rules.each do |rule|
matched, new_pos, new_tokens = rule.match(text, pos, self) matched, new_pos, new_tokens = rule.match(text, pos, self)
if matched if matched
# Move position forward, save the tokens, # Move position forward, save the tokens,
# tokenize from the new position # tokenize from the new position
# Log.trace { "MATCHED: #{rule.xml}" } Log.trace { "MATCHED: #{rule.xml}" }
pos = new_pos pos = new_pos
tokens += new_tokens tokens += new_tokens
break break
end end
# Log.trace { "NOT MATCHED: #{rule.xml}" } Log.trace { "NOT MATCHED: #{rule.xml}" }
end end
# If no rule matches, emit an error token # If no rule matches, emit an error token
unless matched unless matched
# Log.trace { "Error at #{pos}" } Log.trace { "Error at #{pos}" }
tokens << {type: "Error", value: "#{text[pos]}"} tokens << {type: "Error", value: "#{text[pos]}"}
pos += 1 pos += 1
end end
@@ -193,7 +182,7 @@ module Tartrazine
end end
def self.lexer(name : String) : Lexer def self.lexer(name : String) : Lexer
Lexer.from_xml(LexerFiles.get("/#{name}.xml").gets_to_end) Lexer.from_xml(File.read("lexers/#{name}.xml"))
end end
end end