require "xml"
module Tartrazine
VERSION = "0.1.0"
# This implements a lexer for Pygments RegexLexers as expressed
# in Chroma's XML serialization.
#
# For explanations on what emitters, transformers, etc do
# the Pygments documentation is a good place to start.
# https://pygments.org/docs/lexerdevelopment/
class State
property name : String = ""
property rules = [] of Rule
end
class Rule
property pattern : Regex? = nil
property emitters : Array(Emitter) = [] of Emitter
property transformers : Array(TRansformer) = [] of TRansformer
end
# This rule includes another state like this:
#
#
#
#
#
#
#
# ...
class IncludeStateRule < Rule
property state : String = ""
end
class Emitter
property type : String = ""
property xml : String = ""
end
class Transformer
property xml: String = ""
end
class Lexer
property config = {
name: "",
aliases: [] of String,
filenames: [] of String,
mime_types: [] of String,
priority: 0.0,
}
property states = {} of String => State
def self.from_xml(xml : String) : Lexer
l = Lexer.new
lexer = XML.parse(xml).first_element_child
if lexer
config = lexer.children.find { |n| n.name == "config" }
if config
l.config = {
name: xml_to_s(config, name) || "",
aliases: xml_to_a(config, _alias) || [] of String,
filenames: xml_to_a(config, filename) || [] of String,
mime_types: xml_to_a(config, mime_type) || [] of String,
priority: xml_to_f(config, priority) || 0.0,
}
end
rules = lexer.children.find { |n| n.name == "rules" }
if rules
# Rules contains states 🤷
rules.children.select { |n| n.name == "state" }.each do |state_node|
state = State.new
state.name = state_node["name"]
if l.states.has_key?(state.name)
puts "Duplicate state: #{state.name}"
else
l.states[state.name] = state
end
# And states contain rules 🤷
state_node.children.select { |n| n.name == "rule" }.each do |rule_node|
if rule_node["pattern"]?
# We have patter rules
rule = Rule.new
begin
rule.pattern = /#{rule_node["pattern"]}/
rescue ex : Exception
puts "Bad regex in #{l.config[:name]}: #{ex}"
end
else
# And rules that include a state
rule = IncludeStateRule.new
include_node = rule_node.children.find { |n| n.name == "include" }
rule.state = include_node["state"] if include_node
end
state.rules << rule
# Rules contain maybe an emitter and maybe a transformer
# emitters emit tokens, transformers do things to
# the state stack.
rule_node.children.each do |node|
next unless node.element?
case node.name
when "pop", "push", "include", "multi", "combine"
transformer = Transformer.new
transformer.xml = node.to_s
rule.transformers << transformer
when "bygroups", "combined", "mutators", "token", "using", "usingbygroup", "usingself"
emitter = Emitter.new
emitter.xml = node.to_s
rule.emitters << emitter
end
end
end
end
end
end
l
end
end
end
Dir.glob("lexers/*.xml").each do |fname|
Tartrazine::Lexer.from_xml(File.read(fname))
end
# Convenience macros to parse XML
macro xml_to_s(node, name)
{{node}}.children.find{|n| n.name == "{{name}}".lstrip("_")}.try &.content.to_s
end
macro xml_to_f(node, name)
({{node}}.children.find{|n| n.name == "{{name}}".lstrip("_")}.try &.content.to_s.to_f)
end
macro xml_to_a(node, name)
{{node}}.children.select{|n| n.name == "{{name}}".lstrip("_")}.map {|n| n.content.to_s}
end