mirror of
https://github.com/ralsina/tartrazine.git
synced 2025-06-07 20:20:26 -03:00
Compare commits
17 Commits
5c074344d5
...
84ee7e6934
Author | SHA1 | Date | |
---|---|---|---|
84ee7e6934 | |||
89d212b71c | |||
a92d2501f7 | |||
6b44bcb5ad | |||
86a5894429 | |||
be12e0f4f1 | |||
96dcb7e15e | |||
d1762f477a | |||
f98f44365f | |||
d0c2b1764a | |||
e6a292ade0 | |||
4ced996f90 | |||
fd5af6ba3b | |||
47237eecc3 | |||
a0ff4e0118 | |||
ece3d4163a | |||
3180168261 |
15
Dockerfile.static
Normal file
15
Dockerfile.static
Normal file
@ -0,0 +1,15 @@
|
||||
FROM --platform=${TARGETPLATFORM:-linux/amd64} alpine:3.20 AS build
|
||||
RUN apk add --no-cache \
|
||||
crystal \
|
||||
shards \
|
||||
yaml-dev \
|
||||
yaml-static \
|
||||
openssl-dev \
|
||||
openssl-libs-static \
|
||||
libxml2-dev \
|
||||
libxml2-static \
|
||||
zlib-dev \
|
||||
zlib-static \
|
||||
xz-dev \
|
||||
xz-static \
|
||||
make
|
@ -36,9 +36,7 @@ It has 332 themes (64 from Chroma, the rest are base16 themes via
|
||||
|
||||
## Installation
|
||||
|
||||
This will have a CLI tool that can be installed, but it's not
|
||||
there yet.
|
||||
|
||||
This has a CLI but it's not generally usable.
|
||||
|
||||
## Usage
|
||||
|
||||
|
@ -1,24 +1,46 @@
|
||||
import sys
|
||||
import string
|
||||
import glob
|
||||
|
||||
# Run it as grep token lexers/* | python scripts/token_abbrevs.py
|
||||
|
||||
tokens = {"Highlight"}
|
||||
abbrevs = {"Highlight": "hl"}
|
||||
|
||||
def abbr(line):
|
||||
return "".join(c for c in line if c in string.ascii_uppercase).lower()
|
||||
|
||||
abbrevs = {}
|
||||
tokens = set([])
|
||||
for line in sys.stdin:
|
||||
if "<token" not in line:
|
||||
continue
|
||||
line = line.strip()
|
||||
line = line.split('<token ',1)[-1]
|
||||
line = line.split('"')[1]
|
||||
abbrevs[line] = abbr(line)
|
||||
tokens.add(line)
|
||||
def check_abbrevs():
|
||||
if len(abbrevs) != len(tokens):
|
||||
print("Warning: Abbreviations are not unique")
|
||||
print(len(abbrevs), len(tokens))
|
||||
sys.exit(1)
|
||||
|
||||
print("Abbreviations: {")
|
||||
# Processes all files in lexers looking for token names
|
||||
for fname in glob.glob("lexers/*.xml"):
|
||||
with open(fname) as f:
|
||||
for line in f:
|
||||
if "<token" not in line:
|
||||
continue
|
||||
line = line.strip()
|
||||
line = line.split('<token ',1)[-1]
|
||||
line = line.split('"')[1]
|
||||
abbrevs[line] = abbr(line)
|
||||
tokens.add(line)
|
||||
check_abbrevs()
|
||||
|
||||
# Processes all files in styles looking for token names too
|
||||
for fname in glob.glob("styles/*.xml"):
|
||||
with open(fname) as f:
|
||||
for line in f:
|
||||
if "<entry" not in line:
|
||||
continue
|
||||
line = line.strip()
|
||||
line = line.split('type=',1)[-1]
|
||||
line = line.split('"')[1]
|
||||
abbrevs[line] = abbr(line)
|
||||
tokens.add(line)
|
||||
check_abbrevs()
|
||||
|
||||
print("Abbreviations = {")
|
||||
for k, v in abbrevs.items():
|
||||
print(f' "{k}" => "{v}",')
|
||||
print("}")
|
||||
|
@ -1,5 +1,5 @@
|
||||
name: tartrazine
|
||||
version: 0.1.0
|
||||
version: 0.1.1
|
||||
|
||||
authors:
|
||||
- Roberto Alsina <roberto.alsina@gmail.com>
|
||||
@ -15,6 +15,8 @@ dependencies:
|
||||
github: crystal-china/base58.cr
|
||||
sixteen:
|
||||
github: ralsina/sixteen
|
||||
docopt:
|
||||
github: chenkovsky/docopt.cr
|
||||
|
||||
crystal: ">= 1.13.0"
|
||||
|
||||
|
165
src/constants.cr
165
src/constants.cr
@ -1,92 +1,99 @@
|
||||
module Tartrazine
|
||||
Abbreviations = {
|
||||
"Background" => "b",
|
||||
"Text" => "t",
|
||||
"CodeLine" => "cl",
|
||||
"Comment" => "c",
|
||||
"CommentHashbang" => "ch",
|
||||
"CommentMultiline" => "cm",
|
||||
"CommentPreproc" => "cp",
|
||||
"CommentPreprocFile" => "cpf",
|
||||
"CommentSingle" => "cs",
|
||||
"CommentSpecial" => "cs",
|
||||
"NameVariable" => "nv",
|
||||
"Keyword" => "k",
|
||||
"NameFunction" => "nf",
|
||||
"Punctuation" => "p",
|
||||
"Operator" => "o",
|
||||
"LiteralNumberInteger" => "lni",
|
||||
"NameBuiltin" => "nb",
|
||||
"Name" => "n",
|
||||
"OperatorWord" => "ow",
|
||||
"LiteralStringSingle" => "lss",
|
||||
"Literal" => "l",
|
||||
"NameClass" => "nc",
|
||||
"CommentMultiline" => "cm",
|
||||
"LiteralStringRegex" => "lsr",
|
||||
"KeywordDeclaration" => "kd",
|
||||
"KeywordConstant" => "kc",
|
||||
"NameOther" => "no",
|
||||
"LiteralNumberFloat" => "lnf",
|
||||
"LiteralNumberHex" => "lnh",
|
||||
"LiteralStringDouble" => "lsd",
|
||||
"KeywordType" => "kt",
|
||||
"NameNamespace" => "nn",
|
||||
"NameAttribute" => "na",
|
||||
"KeywordReserved" => "kr",
|
||||
"CommentPreproc" => "cp",
|
||||
"KeywordNamespace" => "kn",
|
||||
"NameConstant" => "nc",
|
||||
"NameLabel" => "nl",
|
||||
"LiteralString" => "ls",
|
||||
"LiteralStringChar" => "lsc",
|
||||
"TextWhitespace" => "tw",
|
||||
"LiteralStringEscape" => "lse",
|
||||
"LiteralNumber" => "ln",
|
||||
"Other" => "o",
|
||||
"LiteralStringBoolean" => "lsb",
|
||||
"NameProperty" => "np",
|
||||
"Comment" => "c",
|
||||
"NameTag" => "nt",
|
||||
"LiteralStringOther" => "lso",
|
||||
"NameVariableGlobal" => "nvg",
|
||||
"NameBuiltinPseudo" => "nbp",
|
||||
"LiteralNumberBin" => "lnb",
|
||||
"KeywordPseudo" => "kp",
|
||||
"CommentPreprocFile" => "cpf",
|
||||
"LiteralStringAffix" => "lsa",
|
||||
"LiteralStringDelimiter" => "lsd",
|
||||
"LiteralNumberOct" => "lno",
|
||||
"Error" => "e",
|
||||
"Generic" => "g",
|
||||
"LiteralNumberIntegerLong" => "lnil",
|
||||
"NameDecorator" => "nd",
|
||||
"LiteralStringInterpol" => "lsi",
|
||||
"LiteralStringBacktick" => "lsb",
|
||||
"GenericPrompt" => "gp",
|
||||
"GenericOutput" => "go",
|
||||
"LiteralStringName" => "lsn",
|
||||
"LiteralStringHeredoc" => "lsh",
|
||||
"LiteralStringSymbol" => "lss",
|
||||
"NameVariableInstance" => "nvi",
|
||||
"LiteralOther" => "lo",
|
||||
"NameVariableClass" => "nvc",
|
||||
"NameOperator" => "no",
|
||||
"None" => "n",
|
||||
"LiteralStringDoc" => "lsd",
|
||||
"NameException" => "ne",
|
||||
"GenericSubheading" => "gs",
|
||||
"GenericStrong" => "gs",
|
||||
"GenericDeleted" => "gd",
|
||||
"GenericInserted" => "gi",
|
||||
"GenericHeading" => "gh",
|
||||
"NameEntity" => "ne",
|
||||
"NamePseudo" => "np",
|
||||
"CommentHashbang" => "ch",
|
||||
"TextPunctuation" => "tp",
|
||||
"NameVariableAnonymous" => "nva",
|
||||
"NameVariableMagic" => "nvm",
|
||||
"NameFunctionMagic" => "nfm",
|
||||
"GenericEmph" => "ge",
|
||||
"GenericUnderline" => "gu",
|
||||
"LiteralStringAtom" => "lsa",
|
||||
"LiteralDate" => "ld",
|
||||
"GenericError" => "ge",
|
||||
"TextSymbol" => "ts",
|
||||
"GenericHeading" => "gh",
|
||||
"GenericInserted" => "gi",
|
||||
"GenericOutput" => "go",
|
||||
"GenericPrompt" => "gp",
|
||||
"GenericStrong" => "gs",
|
||||
"GenericSubheading" => "gs",
|
||||
"GenericTraceback" => "gt",
|
||||
"GenericUnderline" => "gu",
|
||||
"Keyword" => "k",
|
||||
"KeywordConstant" => "kc",
|
||||
"KeywordDeclaration" => "kd",
|
||||
"KeywordNamespace" => "kn",
|
||||
"KeywordPseudo" => "kp",
|
||||
"KeywordReserved" => "kr",
|
||||
"KeywordType" => "kt",
|
||||
"LineHighlight" => "lh",
|
||||
"LineNumbers" => "ln",
|
||||
"LineNumbersTable" => "lnt",
|
||||
"LineTable" => "lt",
|
||||
"LineTableTD" => "lttd",
|
||||
"Literal" => "l",
|
||||
"LiteralDate" => "ld",
|
||||
"LiteralNumber" => "ln",
|
||||
"LiteralNumberBin" => "lnb",
|
||||
"LiteralNumberFloat" => "lnf",
|
||||
"LiteralNumberHex" => "lnh",
|
||||
"LiteralNumberInteger" => "lni",
|
||||
"LiteralNumberIntegerLong" => "lnil",
|
||||
"LiteralNumberOct" => "lno",
|
||||
"LiteralOther" => "lo",
|
||||
"LiteralString" => "ls",
|
||||
"LiteralStringAffix" => "lsa",
|
||||
"LiteralStringAtom" => "lsa",
|
||||
"LiteralStringBacktick" => "lsb",
|
||||
"LiteralStringBoolean" => "lsb",
|
||||
"LiteralStringChar" => "lsc",
|
||||
"LiteralStringDelimiter" => "lsd",
|
||||
"LiteralStringDoc" => "lsd",
|
||||
"LiteralStringDouble" => "lsd",
|
||||
"LiteralStringEscape" => "lse",
|
||||
"LiteralStringHeredoc" => "lsh",
|
||||
"LiteralStringInterpol" => "lsi",
|
||||
"LiteralStringName" => "lsn",
|
||||
"LiteralStringOther" => "lso",
|
||||
"LiteralStringRegex" => "lsr",
|
||||
"LiteralStringSingle" => "lss",
|
||||
"LiteralStringSymbol" => "lss",
|
||||
"Name" => "n",
|
||||
"NameAttribute" => "na",
|
||||
"NameBuiltin" => "nb",
|
||||
"NameBuiltinPseudo" => "nbp",
|
||||
"NameClass" => "nc",
|
||||
"NameConstant" => "nc",
|
||||
"NameDecorator" => "nd",
|
||||
"NameEntity" => "ne",
|
||||
"NameException" => "ne",
|
||||
"NameFunction" => "nf",
|
||||
"NameFunctionMagic" => "nfm",
|
||||
"NameKeyword" => "nk",
|
||||
"NameLabel" => "nl",
|
||||
"NameNamespace" => "nn",
|
||||
"NameOperator" => "no",
|
||||
"NameOther" => "no",
|
||||
"NameProperty" => "np",
|
||||
"NamePseudo" => "np",
|
||||
"NameTag" => "nt",
|
||||
"NameVariable" => "nv",
|
||||
"NameVariableAnonymous" => "nva",
|
||||
"NameVariableClass" => "nvc",
|
||||
"NameVariableGlobal" => "nvg",
|
||||
"NameVariableInstance" => "nvi",
|
||||
"NameVariableMagic" => "nvm",
|
||||
"None" => "n",
|
||||
"Operator" => "o",
|
||||
"OperatorWord" => "ow",
|
||||
"Other" => "o",
|
||||
"Punctuation" => "p",
|
||||
"Text" => "t",
|
||||
"TextPunctuation" => "tp",
|
||||
"TextSymbol" => "ts",
|
||||
"TextWhitespace" => "tw",
|
||||
}
|
||||
end
|
||||
|
@ -19,92 +19,4 @@ module Tartrazine
|
||||
raise Exception.new("Not implemented")
|
||||
end
|
||||
end
|
||||
|
||||
class Ansi < Formatter
|
||||
def format(text : String, lexer : Lexer, theme : Theme) : String
|
||||
output = String.build do |outp|
|
||||
lexer.tokenize(text).each do |token|
|
||||
outp << self.colorize(token[:value], token[:type], theme)
|
||||
end
|
||||
end
|
||||
output
|
||||
end
|
||||
|
||||
def colorize(text : String, token : String, theme : Theme) : String
|
||||
style = theme.styles.fetch(token, nil)
|
||||
return text if style.nil?
|
||||
if theme.styles.has_key?(token)
|
||||
s = theme.styles[token]
|
||||
else
|
||||
# Themes don't contain information for each specific
|
||||
# token type. However, they may contain information
|
||||
# for a parent style. Worst case, we go to the root
|
||||
# (Background) style.
|
||||
s = theme.styles[theme.style_parents(token).reverse.find { |parent|
|
||||
theme.styles.has_key?(parent)
|
||||
}]
|
||||
end
|
||||
colorized = text.colorize(s.color.try &.colorize)
|
||||
# Intentionally not setting background color
|
||||
colorized.mode(:bold) if s.bold
|
||||
colorized.mode(:italic) if s.italic
|
||||
colorized.mode(:underline) if s.underline
|
||||
colorized.to_s
|
||||
end
|
||||
end
|
||||
|
||||
class Html < Formatter
|
||||
def format(text : String, lexer : Lexer, theme : Theme) : String
|
||||
output = String.build do |outp|
|
||||
outp << "<html><head><style>"
|
||||
outp << get_style_defs(theme)
|
||||
outp << "</style></head><body>"
|
||||
outp << "<pre class=\"#{get_css_class("Background", theme)}\"><code class=\"#{get_css_class("Background", theme)}\">"
|
||||
lexer.tokenize(text).each do |token|
|
||||
fragment = "<span class=\"#{get_css_class(token[:type], theme)}\">#{token[:value]}</span>"
|
||||
outp << fragment
|
||||
end
|
||||
outp << "</code></pre></body></html>"
|
||||
end
|
||||
output
|
||||
end
|
||||
|
||||
# ameba:disable Metrics/CyclomaticComplexity
|
||||
def get_style_defs(theme : Theme) : String
|
||||
output = String.build do |outp|
|
||||
theme.styles.each do |token, style|
|
||||
outp << ".#{get_css_class(token, theme)} {"
|
||||
# These are set or nil
|
||||
outp << "color: #{style.color.try &.hex};" if style.color
|
||||
outp << "background-color: #{style.background.try &.hex};" if style.background
|
||||
outp << "border: 1px solid #{style.border.try &.hex};" if style.border
|
||||
|
||||
# These are true/false/nil
|
||||
outp << "border: none;" if style.border == false
|
||||
outp << "font-weight: bold;" if style.bold
|
||||
outp << "font-weight: 400;" if style.bold == false
|
||||
outp << "font-style: italic;" if style.italic
|
||||
outp << "font-style: normal;" if style.italic == false
|
||||
outp << "text-decoration: underline;" if style.underline
|
||||
outp << "text-decoration: none;" if style.underline == false
|
||||
|
||||
outp << "}"
|
||||
end
|
||||
end
|
||||
output
|
||||
end
|
||||
|
||||
# Given a token type, return the CSS class to use.
|
||||
def get_css_class(token, theme)
|
||||
return Abbreviations[token] if theme.styles.has_key?(token)
|
||||
|
||||
# Themes don't contain information for each specific
|
||||
# token type. However, they may contain information
|
||||
# for a parent style. Worst case, we go to the root
|
||||
# (Background) style.
|
||||
Abbreviations[theme.style_parents(token).reverse.find { |parent|
|
||||
theme.styles.has_key?(parent)
|
||||
}]
|
||||
end
|
||||
end
|
||||
end
|
||||
|
37
src/formatters/ansi.cr
Normal file
37
src/formatters/ansi.cr
Normal file
@ -0,0 +1,37 @@
|
||||
require "../formatter"
|
||||
|
||||
module Tartrazine
|
||||
class Ansi < Formatter
|
||||
def format(text : String, lexer : Lexer, theme : Theme) : String
|
||||
output = String.build do |outp|
|
||||
lexer.tokenize(text).each do |token|
|
||||
outp << self.colorize(token[:value], token[:type], theme)
|
||||
end
|
||||
end
|
||||
output
|
||||
end
|
||||
|
||||
def colorize(text : String, token : String, theme : Theme) : String
|
||||
style = theme.styles.fetch(token, nil)
|
||||
return text if style.nil?
|
||||
if theme.styles.has_key?(token)
|
||||
s = theme.styles[token]
|
||||
else
|
||||
# Themes don't contain information for each specific
|
||||
# token type. However, they may contain information
|
||||
# for a parent style. Worst case, we go to the root
|
||||
# (Background) style.
|
||||
s = theme.styles[theme.style_parents(token).reverse.find { |parent|
|
||||
theme.styles.has_key?(parent)
|
||||
}]
|
||||
end
|
||||
colorized = text.colorize
|
||||
s.color.try { |col| colorized = colorized.fore(col.colorize) }
|
||||
# Intentionally not setting background color
|
||||
colorized.mode(:bold) if s.bold
|
||||
colorized.mode(:italic) if s.italic
|
||||
colorized.mode(:underline) if s.underline
|
||||
colorized.to_s
|
||||
end
|
||||
end
|
||||
end
|
127
src/formatters/html.cr
Normal file
127
src/formatters/html.cr
Normal file
@ -0,0 +1,127 @@
|
||||
require "../formatter"
|
||||
|
||||
module Tartrazine
|
||||
class Html < Formatter
|
||||
# property line_number_in_table : Bool = false
|
||||
# property with_classes : Bool = true
|
||||
property class_prefix : String = ""
|
||||
property highlight_lines : Array(Range(Int32, Int32)) = [] of Range(Int32, Int32)
|
||||
property line_number_id_prefix : String = "line-"
|
||||
property line_number_start : Int32 = 1
|
||||
property tab_width = 8
|
||||
property? line_numbers : Bool = false
|
||||
property? linkable_line_numbers : Bool = true
|
||||
property? standalone : Bool = false
|
||||
property? surrounding_pre : Bool = true
|
||||
property? wrap_long_lines : Bool = false
|
||||
|
||||
def format(text : String, lexer : Lexer, theme : Theme) : String
|
||||
text = format_text(text, lexer, theme)
|
||||
if standalone?
|
||||
text = wrap_standalone(text, theme)
|
||||
end
|
||||
text
|
||||
end
|
||||
|
||||
# Wrap text into a full HTML document, including the CSS for the theme
|
||||
def wrap_standalone(text, theme) : String
|
||||
output = String.build do |outp|
|
||||
outp << "<!DOCTYPE html><html><head><style>"
|
||||
outp << get_style_defs(theme)
|
||||
outp << "</style></head><body>"
|
||||
outp << text
|
||||
outp << "</body></html>"
|
||||
end
|
||||
output
|
||||
end
|
||||
|
||||
def format_text(text : String, lexer : Lexer, theme : Theme) : String
|
||||
lines = group_tokens_in_lines(lexer.tokenize(text))
|
||||
output = String.build do |outp|
|
||||
if surrounding_pre?
|
||||
pre_style= wrap_long_lines? ? "style=\"white-space: pre-wrap; word-break: break-word;\"" : ""
|
||||
outp << "<pre class=\"#{get_css_class("Background", theme)}\" #{pre_style}>"
|
||||
end
|
||||
"<code class=\"#{get_css_class("Background", theme)}\">"
|
||||
lines.each_with_index(offset: line_number_start - 1) do |line, i|
|
||||
line_label = line_numbers? ? "#{i + 1}".rjust(4).ljust(5) : ""
|
||||
line_class = highlighted?(i + 1) ? "class=\"#{get_css_class("LineHighlight", theme)}\"" : ""
|
||||
line_id = linkable_line_numbers? ? "id=\"#{line_number_id_prefix}#{i + 1}\"" : ""
|
||||
outp << "<span #{line_id} #{line_class}>#{line_label}</span>"
|
||||
line.each do |token|
|
||||
fragment = "<span class=\"#{get_css_class(token[:type], theme)}\">#{token[:value]}</span>"
|
||||
outp << fragment
|
||||
end
|
||||
end
|
||||
outp << "</code></pre>"
|
||||
end
|
||||
output
|
||||
end
|
||||
|
||||
# ameba:disable Metrics/CyclomaticComplexity
|
||||
def get_style_defs(theme : Theme) : String
|
||||
output = String.build do |outp|
|
||||
theme.styles.each do |token, style|
|
||||
outp << ".#{get_css_class(token, theme)} {"
|
||||
# These are set or nil
|
||||
outp << "color: ##{style.color.try &.hex};" if style.color
|
||||
outp << "background-color: ##{style.background.try &.hex};" if style.background
|
||||
outp << "border: 1px solid ##{style.border.try &.hex};" if style.border
|
||||
|
||||
# These are true/false/nil
|
||||
outp << "border: none;" if style.border == false
|
||||
outp << "font-weight: bold;" if style.bold
|
||||
outp << "font-weight: 400;" if style.bold == false
|
||||
outp << "font-style: italic;" if style.italic
|
||||
outp << "font-style: normal;" if style.italic == false
|
||||
outp << "text-decoration: underline;" if style.underline
|
||||
outp << "text-decoration: none;" if style.underline == false
|
||||
outp << "tab-size: #{tab_width};" if token == "Background"
|
||||
|
||||
outp << "}"
|
||||
end
|
||||
end
|
||||
output
|
||||
end
|
||||
|
||||
# Given a token type, return the CSS class to use.
|
||||
def get_css_class(token, theme)
|
||||
return class_prefix + Abbreviations[token] if theme.styles.has_key?(token)
|
||||
|
||||
# Themes don't contain information for each specific
|
||||
# token type. However, they may contain information
|
||||
# for a parent style. Worst case, we go to the root
|
||||
# (Background) style.
|
||||
class_prefix + Abbreviations[theme.style_parents(token).reverse.find { |parent|
|
||||
theme.styles.has_key?(parent)
|
||||
}]
|
||||
end
|
||||
|
||||
def highlighted?(line : Int) : Bool
|
||||
highlight_lines.any?(&.includes?(line))
|
||||
end
|
||||
|
||||
def group_tokens_in_lines(tokens : Array(Token)) : Array(Array(Token))
|
||||
split_tokens = [] of Token
|
||||
tokens.each do |token|
|
||||
if token[:value].includes?("\n")
|
||||
values = token[:value].split("\n")
|
||||
values.each_with_index do |value, index|
|
||||
value += "\n" if index < values.size - 1
|
||||
split_tokens << {type: token[:type], value: value}
|
||||
end
|
||||
else
|
||||
split_tokens << token
|
||||
end
|
||||
end
|
||||
lines = [Array(Token).new]
|
||||
split_tokens.each do |token|
|
||||
lines.last << token
|
||||
if token[:value].includes?("\n")
|
||||
lines << Array(Token).new
|
||||
end
|
||||
end
|
||||
lines
|
||||
end
|
||||
end
|
||||
end
|
11
src/formatters/json.cr
Normal file
11
src/formatters/json.cr
Normal file
@ -0,0 +1,11 @@
|
||||
require "../formatter"
|
||||
|
||||
module Tartrazine
|
||||
class Json < Formatter
|
||||
property name = "json"
|
||||
|
||||
def format(text : String, lexer : Lexer, _theme : Theme) : String
|
||||
lexer.tokenize(text).to_json
|
||||
end
|
||||
end
|
||||
end
|
180
src/lexer.cr
Normal file
180
src/lexer.cr
Normal file
@ -0,0 +1,180 @@
|
||||
module Tartrazine
|
||||
class LexerFiles
|
||||
extend BakedFileSystem
|
||||
|
||||
bake_folder "../lexers", __DIR__
|
||||
end
|
||||
|
||||
# This implements a lexer for Pygments RegexLexers as expressed
|
||||
# in Chroma's XML serialization.
|
||||
#
|
||||
# For explanations on what actions and states do
|
||||
# the Pygments documentation is a good place to start.
|
||||
# https://pygments.org/docs/lexerdevelopment/
|
||||
class Lexer
|
||||
property config = {
|
||||
name: "",
|
||||
aliases: [] of String,
|
||||
filenames: [] of String,
|
||||
mime_types: [] of String,
|
||||
priority: 0.0,
|
||||
case_insensitive: false,
|
||||
dot_all: false,
|
||||
not_multiline: false,
|
||||
ensure_nl: false,
|
||||
}
|
||||
property xml : String = ""
|
||||
|
||||
property states = {} of String => State
|
||||
|
||||
property state_stack = ["root"]
|
||||
|
||||
# Turn the text into a list of tokens. The `usingself` parameter
|
||||
# is true when the lexer is being used to tokenize a string
|
||||
# from a larger text that is already being tokenized.
|
||||
# So, when it's true, we don't modify the text.
|
||||
def tokenize(text, usingself = false) : Array(Token)
|
||||
@state_stack = ["root"]
|
||||
tokens = [] of Token
|
||||
pos = 0
|
||||
matched = false
|
||||
|
||||
# Respect the `ensure_nl` config option
|
||||
if text.size > 0 && text[-1] != '\n' && config[:ensure_nl] && !usingself
|
||||
text += "\n"
|
||||
end
|
||||
|
||||
# Loop through the text, applying rules
|
||||
while pos < text.size
|
||||
state = states[@state_stack.last]
|
||||
# Log.trace { "Stack is #{@state_stack} State is #{state.name}, pos is #{pos}, text is #{text[pos..pos + 10]}" }
|
||||
state.rules.each do |rule|
|
||||
matched, new_pos, new_tokens = rule.match(text, pos, self)
|
||||
if matched
|
||||
# Move position forward, save the tokens,
|
||||
# tokenize from the new position
|
||||
# Log.trace { "MATCHED: #{rule.xml}" }
|
||||
pos = new_pos
|
||||
tokens += new_tokens
|
||||
break
|
||||
end
|
||||
# Log.trace { "NOT MATCHED: #{rule.xml}" }
|
||||
end
|
||||
# If no rule matches, emit an error token
|
||||
unless matched
|
||||
# Log.trace { "Error at #{pos}" }
|
||||
tokens << {type: "Error", value: "#{text[pos]}"}
|
||||
pos += 1
|
||||
end
|
||||
end
|
||||
Lexer.collapse_tokens(tokens)
|
||||
end
|
||||
|
||||
# Collapse consecutive tokens of the same type for easier comparison
|
||||
# and smaller output
|
||||
def self.collapse_tokens(tokens : Array(Tartrazine::Token)) : Array(Tartrazine::Token)
|
||||
result = [] of Tartrazine::Token
|
||||
tokens = tokens.reject { |token| token[:value] == "" }
|
||||
tokens.each do |token|
|
||||
if result.empty?
|
||||
result << token
|
||||
next
|
||||
end
|
||||
last = result.last
|
||||
if last[:type] == token[:type]
|
||||
new_token = {type: last[:type], value: last[:value] + token[:value]}
|
||||
result.pop
|
||||
result << new_token
|
||||
else
|
||||
result << token
|
||||
end
|
||||
end
|
||||
result
|
||||
end
|
||||
|
||||
# ameba:disable Metrics/CyclomaticComplexity
|
||||
def self.from_xml(xml : String) : Lexer
|
||||
l = Lexer.new
|
||||
l.xml = xml
|
||||
lexer = XML.parse(xml).first_element_child
|
||||
if lexer
|
||||
config = lexer.children.find { |node|
|
||||
node.name == "config"
|
||||
}
|
||||
if config
|
||||
l.config = {
|
||||
name: xml_to_s(config, name) || "",
|
||||
aliases: xml_to_a(config, _alias) || [] of String,
|
||||
filenames: xml_to_a(config, filename) || [] of String,
|
||||
mime_types: xml_to_a(config, mime_type) || [] of String,
|
||||
priority: xml_to_f(config, priority) || 0.0,
|
||||
not_multiline: xml_to_s(config, not_multiline) == "true",
|
||||
dot_all: xml_to_s(config, dot_all) == "true",
|
||||
case_insensitive: xml_to_s(config, case_insensitive) == "true",
|
||||
ensure_nl: xml_to_s(config, ensure_nl) == "true",
|
||||
}
|
||||
end
|
||||
|
||||
rules = lexer.children.find { |node|
|
||||
node.name == "rules"
|
||||
}
|
||||
if rules
|
||||
# Rules contains states 🤷
|
||||
rules.children.select { |node|
|
||||
node.name == "state"
|
||||
}.each do |state_node|
|
||||
state = State.new
|
||||
state.name = state_node["name"]
|
||||
if l.states.has_key?(state.name)
|
||||
raise Exception.new("Duplicate state: #{state.name}")
|
||||
else
|
||||
l.states[state.name] = state
|
||||
end
|
||||
# And states contain rules 🤷
|
||||
state_node.children.select { |node|
|
||||
node.name == "rule"
|
||||
}.each do |rule_node|
|
||||
case rule_node["pattern"]?
|
||||
when nil
|
||||
if rule_node.first_element_child.try &.name == "include"
|
||||
rule = IncludeStateRule.new(rule_node)
|
||||
else
|
||||
rule = UnconditionalRule.new(rule_node)
|
||||
end
|
||||
else
|
||||
rule = Rule.new(rule_node,
|
||||
multiline: !l.config[:not_multiline],
|
||||
dotall: l.config[:dot_all],
|
||||
ignorecase: l.config[:case_insensitive])
|
||||
end
|
||||
state.rules << rule
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
l
|
||||
end
|
||||
end
|
||||
|
||||
# A Lexer state. A state has a name and a list of rules.
|
||||
# The state machine has a state stack containing references
|
||||
# to states to decide which rules to apply.
|
||||
class State
|
||||
property name : String = ""
|
||||
property rules = [] of Rule
|
||||
|
||||
def +(other : State)
|
||||
new_state = State.new
|
||||
new_state.name = Random.base58(8)
|
||||
new_state.rules = rules + other.rules
|
||||
new_state
|
||||
end
|
||||
end
|
||||
|
||||
# A token, the output of the tokenizer
|
||||
alias Token = NamedTuple(type: String, value: String)
|
||||
|
||||
def self.lexer(name : String) : Lexer
|
||||
Lexer.from_xml(LexerFiles.get("/#{name}.xml").gets_to_end)
|
||||
end
|
||||
end
|
31
src/main.cr
31
src/main.cr
@ -1,5 +1,34 @@
|
||||
require "./**"
|
||||
|
||||
HELP = <<-HELP
|
||||
tartrazine: a syntax highlighting tool
|
||||
|
||||
Usage:
|
||||
|
||||
tartrazine FILE -f html [-t theme][--standalone][--line-numbers]
|
||||
[-l lexer] [-o output][--css]
|
||||
tartrazine FILE -f terminal [-t theme][-l lexer][-o output]
|
||||
tartrazine FILE -f json [-o output]
|
||||
tartrazine --list-themes
|
||||
tartrazine --list-lexers
|
||||
|
||||
-f <formatter> Format to use (html, terminal, json)
|
||||
-t <theme> Theme to use (see --list-themes)
|
||||
-l <lexer> Lexer (language) to use (see --list-lexers)
|
||||
-o <output> Output file (default: stdout)
|
||||
--standalone Generate a standalone HTML file
|
||||
--css Generate a CSS file for the theme
|
||||
--line-numbers Include line numbers in the output
|
||||
HELP
|
||||
|
||||
lexer = Tartrazine.lexer("crystal")
|
||||
theme = Tartrazine.theme(ARGV[1])
|
||||
puts Tartrazine::Html.new.format(File.read(ARGV[0]), lexer, theme)
|
||||
formatter = Tartrazine::Json.new
|
||||
# formatter.standalone = true
|
||||
# formatter.class_prefix = "hl-"
|
||||
# formatter.line_number_id_prefix = "ln-"
|
||||
# formatter.line_numbers = true
|
||||
# formatter.highlight_lines = [3..7, 20..30]
|
||||
# formatter.linkable_line_numbers = false
|
||||
# formatter.wrap_long_lines = false
|
||||
puts formatter.format(File.read(ARGV[0]), lexer, theme)
|
||||
|
@ -3,7 +3,7 @@ require "./constants"
|
||||
require "./formatter"
|
||||
require "./rules"
|
||||
require "./styles"
|
||||
require "./tartrazine"
|
||||
require "./lexer"
|
||||
|
||||
# These are lexer rules. They match with the text being parsed
|
||||
# and perform actions, either emitting tokens or changing the
|
||||
|
@ -11,8 +11,16 @@ module Tartrazine
|
||||
alias Color = Sixteen::Color
|
||||
|
||||
def self.theme(name : String) : Theme
|
||||
return Theme.from_base16(name[7..]) if name.starts_with? "base16_"
|
||||
Theme.from_xml(ThemeFiles.get("/#{name}.xml").gets_to_end)
|
||||
begin
|
||||
return Theme.from_base16(name)
|
||||
rescue ex : Exception
|
||||
raise ex unless ex.message.try &.includes? "Theme not found"
|
||||
end
|
||||
begin
|
||||
return Theme.from_xml(ThemeFiles.get("/#{name}.xml").gets_to_end)
|
||||
rescue
|
||||
raise Exception.new("Theme #{name} not found")
|
||||
end
|
||||
end
|
||||
|
||||
class ThemeFiles
|
||||
@ -104,6 +112,7 @@ module Tartrazine
|
||||
# https://github.com/mohd-akram/base16-pygments/
|
||||
|
||||
theme.styles["Background"] = Style.new(color: t["base05"], background: t["base00"])
|
||||
theme.styles["LineHighlight"] = Style.new(color: t["base0D"], background: t["base01"])
|
||||
theme.styles["Text"] = Style.new(color: t["base05"])
|
||||
theme.styles["Error"] = Style.new(color: t["base08"])
|
||||
theme.styles["Comment"] = Style.new(color: t["base03"])
|
||||
@ -162,7 +171,31 @@ module Tartrazine
|
||||
|
||||
theme.styles[node["type"]] = s
|
||||
end
|
||||
# We really want a LineHighlight class
|
||||
if !theme.styles.has_key?("LineHighlight")
|
||||
theme.styles["LineHighlight"] = Style.new
|
||||
theme.styles["LineHighlight"].background = make_highlight_color(theme.styles["Background"].background)
|
||||
end
|
||||
theme
|
||||
end
|
||||
|
||||
# If the color is dark, make it brighter and viceversa
|
||||
def self.make_highlight_color(base_color)
|
||||
# FIXME: do a proper luminance adjustment in the color class
|
||||
return nil if base_color.nil?
|
||||
color = Color.new(base_color.hex)
|
||||
if base_color.light?
|
||||
color.r = [(base_color.r - 40), 255].min.to_u8
|
||||
color.g = [(base_color.g - 40), 255].min.to_u8
|
||||
color.b = [(base_color.b - 40), 255].min.to_u8
|
||||
else
|
||||
color.r = [(base_color.r + 40), 255].min.to_u8
|
||||
color.g = [(base_color.g + 40), 255].min.to_u8
|
||||
color.b = [(base_color.b + 40), 255].min.to_u8
|
||||
end
|
||||
# Bug in color, setting rgb doesn't update hex
|
||||
color.hex = "#{color.r.to_s(16)}#{color.g.to_s(16)}#{color.b.to_s(16)}"
|
||||
color
|
||||
end
|
||||
end
|
||||
end
|
||||
|
@ -12,189 +12,9 @@ require "xml"
|
||||
|
||||
module Tartrazine
|
||||
extend self
|
||||
VERSION = "0.1.0"
|
||||
VERSION = "0.1.1"
|
||||
|
||||
Log = ::Log.for("tartrazine")
|
||||
|
||||
# This implements a lexer for Pygments RegexLexers as expressed
|
||||
# in Chroma's XML serialization.
|
||||
#
|
||||
# For explanations on what actions and states do
|
||||
# the Pygments documentation is a good place to start.
|
||||
# https://pygments.org/docs/lexerdevelopment/
|
||||
|
||||
# A Lexer state. A state has a name and a list of rules.
|
||||
# The state machine has a state stack containing references
|
||||
# to states to decide which rules to apply.
|
||||
class State
|
||||
property name : String = ""
|
||||
property rules = [] of Rule
|
||||
|
||||
def +(other : State)
|
||||
new_state = State.new
|
||||
new_state.name = Random.base58(8)
|
||||
new_state.rules = rules + other.rules
|
||||
new_state
|
||||
end
|
||||
end
|
||||
|
||||
class LexerFiles
|
||||
extend BakedFileSystem
|
||||
|
||||
bake_folder "../lexers", __DIR__
|
||||
end
|
||||
|
||||
# A token, the output of the tokenizer
|
||||
alias Token = NamedTuple(type: String, value: String)
|
||||
|
||||
class Lexer
|
||||
property config = {
|
||||
name: "",
|
||||
aliases: [] of String,
|
||||
filenames: [] of String,
|
||||
mime_types: [] of String,
|
||||
priority: 0.0,
|
||||
case_insensitive: false,
|
||||
dot_all: false,
|
||||
not_multiline: false,
|
||||
ensure_nl: false,
|
||||
}
|
||||
property xml : String = ""
|
||||
|
||||
property states = {} of String => State
|
||||
|
||||
property state_stack = ["root"]
|
||||
|
||||
# Turn the text into a list of tokens. The `usingself` parameter
|
||||
# is true when the lexer is being used to tokenize a string
|
||||
# from a larger text that is already being tokenized.
|
||||
# So, when it's true, we don't modify the text.
|
||||
def tokenize(text, usingself = false) : Array(Token)
|
||||
@state_stack = ["root"]
|
||||
tokens = [] of Token
|
||||
pos = 0
|
||||
matched = false
|
||||
|
||||
# Respect the `ensure_nl` config option
|
||||
if text.size > 0 && text[-1] != '\n' && config[:ensure_nl] && !usingself
|
||||
text += "\n"
|
||||
end
|
||||
|
||||
# Loop through the text, applying rules
|
||||
while pos < text.size
|
||||
state = states[@state_stack.last]
|
||||
# Log.trace { "Stack is #{@state_stack} State is #{state.name}, pos is #{pos}, text is #{text[pos..pos + 10]}" }
|
||||
state.rules.each do |rule|
|
||||
matched, new_pos, new_tokens = rule.match(text, pos, self)
|
||||
if matched
|
||||
# Move position forward, save the tokens,
|
||||
# tokenize from the new position
|
||||
# Log.trace { "MATCHED: #{rule.xml}" }
|
||||
pos = new_pos
|
||||
tokens += new_tokens
|
||||
break
|
||||
end
|
||||
# Log.trace { "NOT MATCHED: #{rule.xml}" }
|
||||
end
|
||||
# If no rule matches, emit an error token
|
||||
unless matched
|
||||
# Log.trace { "Error at #{pos}" }
|
||||
tokens << {type: "Error", value: "#{text[pos]}"}
|
||||
pos += 1
|
||||
end
|
||||
end
|
||||
Lexer.collapse_tokens(tokens)
|
||||
end
|
||||
|
||||
# Collapse consecutive tokens of the same type for easier comparison
|
||||
# and smaller output
|
||||
def self.collapse_tokens(tokens : Array(Tartrazine::Token)) : Array(Tartrazine::Token)
|
||||
result = [] of Tartrazine::Token
|
||||
tokens = tokens.reject { |token| token[:value] == "" }
|
||||
tokens.each do |token|
|
||||
if result.empty?
|
||||
result << token
|
||||
next
|
||||
end
|
||||
last = result.last
|
||||
if last[:type] == token[:type]
|
||||
new_token = {type: last[:type], value: last[:value] + token[:value]}
|
||||
result.pop
|
||||
result << new_token
|
||||
else
|
||||
result << token
|
||||
end
|
||||
end
|
||||
result
|
||||
end
|
||||
|
||||
# ameba:disable Metrics/CyclomaticComplexity
|
||||
def self.from_xml(xml : String) : Lexer
|
||||
l = Lexer.new
|
||||
l.xml = xml
|
||||
lexer = XML.parse(xml).first_element_child
|
||||
if lexer
|
||||
config = lexer.children.find { |node|
|
||||
node.name == "config"
|
||||
}
|
||||
if config
|
||||
l.config = {
|
||||
name: xml_to_s(config, name) || "",
|
||||
aliases: xml_to_a(config, _alias) || [] of String,
|
||||
filenames: xml_to_a(config, filename) || [] of String,
|
||||
mime_types: xml_to_a(config, mime_type) || [] of String,
|
||||
priority: xml_to_f(config, priority) || 0.0,
|
||||
not_multiline: xml_to_s(config, not_multiline) == "true",
|
||||
dot_all: xml_to_s(config, dot_all) == "true",
|
||||
case_insensitive: xml_to_s(config, case_insensitive) == "true",
|
||||
ensure_nl: xml_to_s(config, ensure_nl) == "true",
|
||||
}
|
||||
end
|
||||
|
||||
rules = lexer.children.find { |node|
|
||||
node.name == "rules"
|
||||
}
|
||||
if rules
|
||||
# Rules contains states 🤷
|
||||
rules.children.select { |node|
|
||||
node.name == "state"
|
||||
}.each do |state_node|
|
||||
state = State.new
|
||||
state.name = state_node["name"]
|
||||
if l.states.has_key?(state.name)
|
||||
raise Exception.new("Duplicate state: #{state.name}")
|
||||
else
|
||||
l.states[state.name] = state
|
||||
end
|
||||
# And states contain rules 🤷
|
||||
state_node.children.select { |node|
|
||||
node.name == "rule"
|
||||
}.each do |rule_node|
|
||||
case rule_node["pattern"]?
|
||||
when nil
|
||||
if rule_node.first_element_child.try &.name == "include"
|
||||
rule = IncludeStateRule.new(rule_node)
|
||||
else
|
||||
rule = UnconditionalRule.new(rule_node)
|
||||
end
|
||||
else
|
||||
rule = Rule.new(rule_node,
|
||||
multiline: !l.config[:not_multiline],
|
||||
dotall: l.config[:dot_all],
|
||||
ignorecase: l.config[:case_insensitive])
|
||||
end
|
||||
state.rules << rule
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
l
|
||||
end
|
||||
end
|
||||
|
||||
def self.lexer(name : String) : Lexer
|
||||
Lexer.from_xml(LexerFiles.get("/#{name}.xml").gets_to_end)
|
||||
end
|
||||
end
|
||||
|
||||
# Convenience macros to parse XML
|
||||
|
Loading…
x
Reference in New Issue
Block a user