Merge branch 'main' of github.com:ralsina/tartrazine

This commit is contained in:
Roberto Alsina 2024-08-12 10:11:03 -03:00
commit 78ddc69937
18 changed files with 1427 additions and 165 deletions

View File

@ -1,5 +1,5 @@
build: $(wildcard src/**/*.cr) $(wildcard lexers/*xml) $(wildcard styles/*xml) shard.yml
shards build -Dstrict_multi_assign -Dno_number_autocast
shards build -Dstrict_multi_assign -Dno_number_autocast -d --error-trace
release: $(wildcard src/**/*.cr) $(wildcard lexers/*xml) $(wildcard styles/*xml) shard.yml
shards build --release
static: $(wildcard src/**/*.cr) $(wildcard lexers/*xml) $(wildcard styles/*xml) shard.yml

View File

@ -31,12 +31,21 @@ is a subset of Pygments'.
Currently Tartrazine supports ... 241 languages.
It has 332 themes (64 from Chroma, the rest are base16 themes via
It has 331 themes (63 from Chroma, the rest are base16 themes via
[Sixteen](https://github.com/ralsina/sixteen)
## Installation
This has a CLI but it's not generally usable.
From prebuilt binaries:
Each release provides statically-linked binaries that should
work on any Linux. Get them from the [releases page](https://github.com/ralsina/tartrazine/releases) and put them in your PATH.
To build from source:
1. Clone this repo
2. Run `make` to build the `tartrazine` binary
3. Copy the binary somewhere in your PATH.
## Usage
@ -60,4 +69,4 @@ puts Tartrazine::Html.new.format(File.read(ARGV[0]), lexer, theme)
## Contributors
- [Roberto Alsina](https://github.com/ralsina) - creator and maintainer
- [Roberto Alsina](https://github.com/ralsina) - creator and maintainer

10
TODO.md
View File

@ -2,6 +2,10 @@
## TODO
* Implement styles
* Implement formatters
* Implement lexer loader that respects aliases, etc
* ✅ Implement styles
* ✅ Implement formatters
* ✅ Implement CLI
* ✅ Implement lexer loader that respects aliases
* ✅ Implement lexer loader by file extension
* ✅ Add --line-numbers to terminal formatter
* Implement lexer loader by mime type

54
scripts/lexer_metadata.py Normal file
View File

@ -0,0 +1,54 @@
# This script parses the metadata of all the lexers and generates
# a datafile with all the information so we don't have to instantiate
# all the lexers to get the information.
import glob
from collections import defaultdict
lexer_by_name = {}
lexer_by_mimetype = defaultdict(set)
lexer_by_filename = defaultdict(set)
for fname in glob.glob("lexers/*.xml"):
aliases = set([])
mimetypes = set([])
filenames = set([])
print(fname)
with open(fname) as f:
lexer_name = fname.split("/")[-1].split(".")[0]
for line in f:
if "</config" in line:
break
if "<filename>" in line:
filenames.add(line.split(">")[1].split("<")[0].lower())
if "<mime_type>" in line:
mimetypes.add(line.split(">")[1].split("<")[0].lower())
if "<alias>" in line:
aliases.add(line.split(">")[1].split("<")[0].lower())
if "<name>" in line:
aliases.add(line.split(">")[1].split("<")[0].lower())
for alias in aliases:
if alias in lexer_by_name and alias != lexer_by_name[alias]:
raise Exception(f"Alias {alias} already in use by {lexer_by_name[alias]}")
lexer_by_name[alias] = lexer_name
for mimetype in mimetypes:
lexer_by_mimetype[mimetype] = lexer_name
for filename in filenames:
lexer_by_filename[filename].add(lexer_name)
with open("src/constants/lexers.cr", "w") as f:
f.write("module Tartrazine\n")
f.write(" LEXERS_BY_NAME = {\n")
for k, v in lexer_by_name.items():
f.write(f'"{k}" => "{v}", \n')
f.write("}\n")
f.write(" LEXERS_BY_MIMETYPE = {\n")
for k, v in lexer_by_mimetype.items():
f.write(f'"{k}" => "{v}", \n')
f.write("}\n")
f.write(" LEXERS_BY_FILENAME = {\n")
for k, v in lexer_by_filename.items():
f.write(f'"{k}" => {str(list(v)).replace("'", "\"")}, \n')
f.write("}\n")
f.write("end\n")

View File

@ -1,3 +1,10 @@
# Script to generate abbreviations for tokens. Parses all lexers
# and styles files to find all token names and generate a unique
# abbreviation for each one. The abbreviations are generated by
# taking the uppercase letters of the token name and converting
# them to lowercase. If the abbreviation is not unique, the script
# will print a warning and exit.
import sys
import string
import glob
@ -40,7 +47,9 @@ for fname in glob.glob("styles/*.xml"):
tokens.add(line)
check_abbrevs()
print("Abbreviations = {")
for k, v in abbrevs.items():
print(f' "{k}" => "{v}",')
print("}")
with open ("src/constants/token_abbrevs.cr", "w") as outf:
outf.write("module Tartrazine\n")
outf.write(" Abbreviations = {\n")
for k in sorted(abbrevs.keys()):
outf.write(f' "{k}" => "{abbrevs[k]}",\n')
outf.write(" }\nend\n")

View File

@ -1,5 +1,5 @@
name: tartrazine
version: 0.1.1
version: 0.2.0
authors:
- Roberto Alsina <roberto.alsina@gmail.com>

View File

@ -1,5 +1,4 @@
require "./actions"
require "./constants"
require "./formatter"
require "./rules"
require "./styles"

1160
src/constants/lexers.cr Normal file

File diff suppressed because it is too large Load Diff

View File

@ -22,6 +22,7 @@ module Tartrazine
"GenericSubheading" => "gs",
"GenericTraceback" => "gt",
"GenericUnderline" => "gu",
"Highlight" => "hl",
"Keyword" => "k",
"KeywordConstant" => "kc",
"KeywordDeclaration" => "kd",

View File

@ -1,5 +1,4 @@
require "./actions"
require "./constants"
require "./formatter"
require "./rules"
require "./styles"

View File

@ -2,10 +2,16 @@ require "../formatter"
module Tartrazine
class Ansi < Formatter
property? line_numbers : Bool = false
def format(text : String, lexer : Lexer, theme : Theme) : String
output = String.build do |outp|
lexer.tokenize(text).each do |token|
outp << self.colorize(token[:value], token[:type], theme)
lexer.group_tokens_in_lines(lexer.tokenize(text)).each_with_index do |line, i|
label = line_numbers? ? "#{i + 1}".rjust(4).ljust(5) : ""
outp << label
line.each do |token|
outp << colorize(token[:value], token[:type], theme)
end
end
end
output

View File

@ -1,3 +1,4 @@
require "../constants/token_abbrevs.cr"
require "../formatter"
module Tartrazine
@ -36,10 +37,10 @@ module Tartrazine
end
def format_text(text : String, lexer : Lexer, theme : Theme) : String
lines = group_tokens_in_lines(lexer.tokenize(text))
lines = lexer.group_tokens_in_lines(lexer.tokenize(text))
output = String.build do |outp|
if surrounding_pre?
pre_style= wrap_long_lines? ? "style=\"white-space: pre-wrap; word-break: break-word;\"" : ""
pre_style = wrap_long_lines? ? "style=\"white-space: pre-wrap; word-break: break-word;\"" : ""
outp << "<pre class=\"#{get_css_class("Background", theme)}\" #{pre_style}>"
end
outp << "<code class=\"#{get_css_class("Background", theme)}\">"
@ -47,7 +48,7 @@ module Tartrazine
line_label = line_numbers? ? "#{i + 1}".rjust(4).ljust(5) : ""
line_class = highlighted?(i + 1) ? "class=\"#{get_css_class("LineHighlight", theme)}\"" : ""
line_id = linkable_line_numbers? ? "id=\"#{line_number_id_prefix}#{i + 1}\"" : ""
outp << "<span #{line_id} #{line_class}>#{line_label}</span>"
outp << "<span #{line_id} #{line_class} style=\"user-select: none;\">#{line_label} </span>"
line.each do |token|
fragment = "<span class=\"#{get_css_class(token[:type], theme)}\">#{token[:value]}</span>"
outp << fragment
@ -100,28 +101,5 @@ module Tartrazine
def highlighted?(line : Int) : Bool
highlight_lines.any?(&.includes?(line))
end
def group_tokens_in_lines(tokens : Array(Token)) : Array(Array(Token))
split_tokens = [] of Token
tokens.each do |token|
if token[:value].includes?("\n")
values = token[:value].split("\n")
values.each_with_index do |value, index|
value += "\n" if index < values.size - 1
split_tokens << {type: token[:type], value: value}
end
else
split_tokens << token
end
end
lines = [Array(Token).new]
split_tokens.each do |token|
lines.last << token
if token[:value].includes?("\n")
lines << Array(Token).new
end
end
lines
end
end
end

View File

@ -1,3 +1,5 @@
require "./constants/lexers"
module Tartrazine
class LexerFiles
extend BakedFileSystem
@ -5,6 +7,36 @@ module Tartrazine
bake_folder "../lexers", __DIR__
end
# Get the lexer object for a language name
# FIXME: support mimetypes
def self.lexer(name : String? = nil, filename : String? = nil) : Lexer
if name.nil? && filename.nil?
lexer_file_name = LEXERS_BY_NAME["plaintext"]
elsif name && name != "autodetect"
lexer_file_name = LEXERS_BY_NAME[name.downcase]
else
# Guess by filename
candidates = Set(String).new
LEXERS_BY_FILENAME.each do |k, v|
candidates += v.to_set if File.match?(k, File.basename(filename.to_s))
end
case candidates.size
when 0
lexer_file_name = LEXERS_BY_NAME["plaintext"]
when 1
lexer_file_name = candidates.first
else
raise Exception.new("Multiple lexers match the filename: #{candidates.to_a.join(", ")}")
end
end
Lexer.from_xml(LexerFiles.get("/#{lexer_file_name}.xml").gets_to_end)
end
# Return a list of all lexers
def self.lexers : Array(String)
LEXERS_BY_NAME.keys.sort!
end
# This implements a lexer for Pygments RegexLexers as expressed
# in Chroma's XML serialization.
#
@ -92,6 +124,30 @@ module Tartrazine
result
end
# Group tokens into lines, splitting them when a newline is found
def group_tokens_in_lines(tokens : Array(Token)) : Array(Array(Token))
split_tokens = [] of Token
tokens.each do |token|
if token[:value].includes?("\n")
values = token[:value].split("\n")
values.each_with_index do |value, index|
value += "\n" if index < values.size - 1
split_tokens << {type: token[:type], value: value}
end
else
split_tokens << token
end
end
lines = [Array(Token).new]
split_tokens.each do |token|
lines.last << token
if token[:value].includes?("\n")
lines << Array(Token).new
end
end
lines
end
# ameba:disable Metrics/CyclomaticComplexity
def self.from_xml(xml : String) : Lexer
l = Lexer.new
@ -173,8 +229,4 @@ module Tartrazine
# A token, the output of the tokenizer
alias Token = NamedTuple(type: String, value: String)
def self.lexer(name : String) : Lexer
Lexer.from_xml(LexerFiles.get("/#{name}.xml").gets_to_end)
end
end

View File

@ -1,34 +1,95 @@
require "docopt"
require "./**"
HELP = <<-HELP
tartrazine: a syntax highlighting tool
Usage:
tartrazine (-h, --help)
tartrazine FILE -f html [-t theme][--standalone][--line-numbers]
[-l lexer] [-o output][--css]
tartrazine FILE -f terminal [-t theme][-l lexer][-o output]
[-l lexer][-o output]
tartrazine -f html -t theme --css
tartrazine FILE -f terminal [-t theme][-l lexer][--line-numbers]
[-o output]
tartrazine FILE -f json [-o output]
tartrazine --list-themes
tartrazine --list-lexers
tartrazine --list-formatters
tartrazine --version
-f <formatter> Format to use (html, terminal, json)
-t <theme> Theme to use (see --list-themes)
-l <lexer> Lexer (language) to use (see --list-lexers)
-o <output> Output file (default: stdout)
--standalone Generate a standalone HTML file
--css Generate a CSS file for the theme
--line-numbers Include line numbers in the output
Options:
-f <formatter> Format to use (html, terminal, json)
-t <theme> Theme to use, see --list-themes [default: default-dark]
-l <lexer> Lexer (language) to use, see --list-lexers [default: autodetect]
-o <output> Output file. Default is stdout.
--standalone Generate a standalone HTML file, which includes
all style information. If not given, it will generate just
a HTML fragment ready to include in your own page.
--css Generate a CSS file for the theme called <theme>.css
--line-numbers Include line numbers in the output
-h, --help Show this screen
-v, --version Show version number
HELP
lexer = Tartrazine.lexer("crystal")
theme = Tartrazine.theme(ARGV[1])
formatter = Tartrazine::Json.new
# formatter.standalone = true
# formatter.class_prefix = "hl-"
# formatter.line_number_id_prefix = "ln-"
# formatter.line_numbers = true
# formatter.highlight_lines = [3..7, 20..30]
# formatter.linkable_line_numbers = false
# formatter.wrap_long_lines = false
puts formatter.format(File.read(ARGV[0]), lexer, theme)
options = Docopt.docopt(HELP, ARGV)
# Handle version manually
if options["--version"]
puts "tartrazine #{Tartrazine::VERSION}"
exit 0
end
if options["--list-themes"]
puts Tartrazine.themes.join("\n")
exit 0
end
if options["--list-lexers"]
puts Tartrazine.lexers.join("\n")
exit 0
end
if options["--list-formatters"]
puts "html\njson\nterminal"
exit 0
end
if options["-f"]
formatter = options["-f"].as(String)
case formatter
when "html"
formatter = Tartrazine::Html.new
formatter.standalone = options["--standalone"] != nil
formatter.line_numbers = options["--line-numbers"] != nil
when "terminal"
formatter = Tartrazine::Ansi.new
formatter.line_numbers = options["--line-numbers"] != nil
when "json"
formatter = Tartrazine::Json.new
else
puts "Invalid formatter: #{formatter}"
exit 1
end
theme = Tartrazine.theme(options["-t"].as(String))
if formatter.is_a?(Tartrazine::Html) && options["--css"]
File.open("#{options["-t"].as(String)}.css", "w") do |outf|
outf.puts formatter.get_style_defs(theme)
end
exit 0
end
lexer = Tartrazine.lexer(name: options["-l"].as(String), filename: options["FILE"].as(String))
input = File.open(options["FILE"].as(String)).gets_to_end
output = formatter.format(input, lexer, theme)
if options["-o"].nil?
puts output
else
File.open(options["-o"].as(String), "w") do |outf|
outf.puts output
end
end
end

View File

@ -1,5 +1,4 @@
require "./actions"
require "./constants"
require "./formatter"
require "./rules"
require "./styles"

View File

@ -1,5 +1,4 @@
require "./actions"
require "./constants"
require "./formatter"
require "./rules"
require "./styles"
@ -10,22 +9,34 @@ require "xml"
module Tartrazine
alias Color = Sixteen::Color
class ThemeFiles
extend BakedFileSystem
bake_folder "../styles", __DIR__
end
def self.theme(name : String) : Theme
begin
return Theme.from_base16(name)
return Theme.from_base16(name)
rescue ex : Exception
raise ex unless ex.message.try &.includes? "Theme not found"
end
begin
return Theme.from_xml(ThemeFiles.get("/#{name}.xml").gets_to_end)
Theme.from_xml(ThemeFiles.get("/#{name}.xml").gets_to_end)
rescue
raise Exception.new("Theme #{name} not found")
end
end
class ThemeFiles
extend BakedFileSystem
bake_folder "../styles", __DIR__
# Return a list of all themes
def self.themes
themes = Set(String).new
ThemeFiles.files.each do |file|
themes << file.path.split("/").last.split(".").first
end
Sixteen::DataFiles.files.each do |file|
themes << file.path.split("/").last.split(".").first
end
themes.to_a.sort!
end
class Style
@ -111,7 +122,7 @@ module Tartrazine
# The color assignments are adapted from
# https://github.com/mohd-akram/base16-pygments/
theme.styles["Background"] = Style.new(color: t["base05"], background: t["base00"])
theme.styles["Background"] = Style.new(color: t["base05"], background: t["base00"], bold: true)
theme.styles["LineHighlight"] = Style.new(color: t["base0D"], background: t["base01"])
theme.styles["Text"] = Style.new(color: t["base05"])
theme.styles["Error"] = Style.new(color: t["base08"])
@ -175,27 +186,22 @@ module Tartrazine
if !theme.styles.has_key?("LineHighlight")
theme.styles["LineHighlight"] = Style.new
theme.styles["LineHighlight"].background = make_highlight_color(theme.styles["Background"].background)
theme.styles["LineHighlight"].bold = true
end
theme
end
# If the color is dark, make it brighter and viceversa
def self.make_highlight_color(base_color)
# FIXME: do a proper luminance adjustment in the color class
return nil if base_color.nil?
color = Color.new(base_color.hex)
if base_color.light?
color.r = [(base_color.r - 40), 255].min.to_u8
color.g = [(base_color.g - 40), 255].min.to_u8
color.b = [(base_color.b - 40), 255].min.to_u8
else
color.r = [(base_color.r + 40), 255].min.to_u8
color.g = [(base_color.g + 40), 255].min.to_u8
color.b = [(base_color.b + 40), 255].min.to_u8
if base_color.nil?
# WHo knows
return Color.new(127, 127, 127)
end
if base_color.dark?
base_color.lighter(0.2)
else
base_color.darker(0.2)
end
# Bug in color, setting rgb doesn't update hex
color.hex = "#{color.r.to_s(16)}#{color.g.to_s(16)}#{color.b.to_s(16)}"
color
end
end
end

View File

@ -1,5 +1,4 @@
require "./actions"
require "./constants"
require "./formatter"
require "./rules"
require "./styles"
@ -12,7 +11,7 @@ require "xml"
module Tartrazine
extend self
VERSION = "0.1.1"
VERSION = "0.2.0"
Log = ::Log.for("tartrazine")
end

View File

@ -1,74 +0,0 @@
<style name="base16-snazzy">
<entry type="Other" style="#e2e4e5"/>
<entry type="Error" style="#ff5c57"/>
<entry type="Background" style="bg:#282a36"/>
<entry type="Keyword" style="#ff6ac1"/>
<entry type="KeywordConstant" style="#ff6ac1"/>
<entry type="KeywordDeclaration" style="#ff5c57"/>
<entry type="KeywordNamespace" style="#ff6ac1"/>
<entry type="KeywordPseudo" style="#ff6ac1"/>
<entry type="KeywordReserved" style="#ff6ac1"/>
<entry type="KeywordType" style="#9aedfe"/>
<entry type="Name" style="#e2e4e5"/>
<entry type="NameAttribute" style="#57c7ff"/>
<entry type="NameBuiltin" style="#ff5c57"/>
<entry type="NameBuiltinPseudo" style="#e2e4e5"/>
<entry type="NameClass" style="#f3f99d"/>
<entry type="NameConstant" style="#ff9f43"/>
<entry type="NameDecorator" style="#ff9f43"/>
<entry type="NameEntity" style="#e2e4e5"/>
<entry type="NameException" style="#e2e4e5"/>
<entry type="NameFunction" style="#57c7ff"/>
<entry type="NameLabel" style="#ff5c57"/>
<entry type="NameNamespace" style="#e2e4e5"/>
<entry type="NameOther" style="#e2e4e5"/>
<entry type="NameTag" style="#ff6ac1"/>
<entry type="NameVariable" style="#ff5c57"/>
<entry type="NameVariableClass" style="#ff5c57"/>
<entry type="NameVariableGlobal" style="#ff5c57"/>
<entry type="NameVariableInstance" style="#ff5c57"/>
<entry type="Literal" style="#e2e4e5"/>
<entry type="LiteralDate" style="#e2e4e5"/>
<entry type="LiteralString" style="#5af78e"/>
<entry type="LiteralStringBacktick" style="#5af78e"/>
<entry type="LiteralStringChar" style="#5af78e"/>
<entry type="LiteralStringDoc" style="#5af78e"/>
<entry type="LiteralStringDouble" style="#5af78e"/>
<entry type="LiteralStringEscape" style="#5af78e"/>
<entry type="LiteralStringHeredoc" style="#5af78e"/>
<entry type="LiteralStringInterpol" style="#5af78e"/>
<entry type="LiteralStringOther" style="#5af78e"/>
<entry type="LiteralStringRegex" style="#5af78e"/>
<entry type="LiteralStringSingle" style="#5af78e"/>
<entry type="LiteralStringSymbol" style="#5af78e"/>
<entry type="LiteralNumber" style="#ff9f43"/>
<entry type="LiteralNumberBin" style="#ff9f43"/>
<entry type="LiteralNumberFloat" style="#ff9f43"/>
<entry type="LiteralNumberHex" style="#ff9f43"/>
<entry type="LiteralNumberInteger" style="#ff9f43"/>
<entry type="LiteralNumberIntegerLong" style="#ff9f43"/>
<entry type="LiteralNumberOct" style="#ff9f43"/>
<entry type="Operator" style="#ff6ac1"/>
<entry type="OperatorWord" style="#ff6ac1"/>
<entry type="Punctuation" style="#e2e4e5"/>
<entry type="Comment" style="#78787e"/>
<entry type="CommentHashbang" style="#78787e"/>
<entry type="CommentMultiline" style="#78787e"/>
<entry type="CommentSingle" style="#78787e"/>
<entry type="CommentSpecial" style="#78787e"/>
<entry type="CommentPreproc" style="#78787e"/>
<entry type="Generic" style="#e2e4e5"/>
<entry type="GenericDeleted" style="#ff5c57"/>
<entry type="GenericEmph" style="underline #e2e4e5"/>
<entry type="GenericError" style="#ff5c57"/>
<entry type="GenericHeading" style="bold #e2e4e5"/>
<entry type="GenericInserted" style="bold #e2e4e5"/>
<entry type="GenericOutput" style="#43454f"/>
<entry type="GenericPrompt" style="#e2e4e5"/>
<entry type="GenericStrong" style="italic #e2e4e5"/>
<entry type="GenericSubheading" style="bold #e2e4e5"/>
<entry type="GenericTraceback" style="#e2e4e5"/>
<entry type="GenericUnderline" style="underline"/>
<entry type="Text" style="#e2e4e5"/>
<entry type="TextWhitespace" style="#e2e4e5"/>
</style>