2024-08-02 17:03:39 -03:00
|
|
|
require "xml"
|
|
|
|
|
|
|
|
module Tartrazine
|
|
|
|
VERSION = "0.1.0"
|
|
|
|
|
2024-08-02 17:23:40 -03:00
|
|
|
class State
|
|
|
|
property name : String = ""
|
2024-08-02 17:33:01 -03:00
|
|
|
property rules = [] of Rule
|
|
|
|
end
|
|
|
|
|
|
|
|
class Rule
|
2024-08-02 17:51:12 -03:00
|
|
|
property pattern : Regex? = nil
|
|
|
|
end
|
|
|
|
|
|
|
|
# This rule includes another state
|
|
|
|
# I have no idea what thet MEANS yet but in the XML
|
|
|
|
# it's this:
|
|
|
|
# <rule>
|
|
|
|
# <include state="interp"/>
|
|
|
|
# </rule>
|
|
|
|
# </state>
|
|
|
|
# <state name="interp">
|
|
|
|
# <rule pattern="\$\(\(">
|
|
|
|
# <token type="Keyword"/>
|
|
|
|
# ...
|
|
|
|
|
2024-08-02 20:32:15 -03:00
|
|
|
class IncludeStateRule < Rule
|
2024-08-02 20:01:53 -03:00
|
|
|
property state : String = ""
|
2024-08-02 17:23:40 -03:00
|
|
|
end
|
|
|
|
|
2024-08-02 20:32:15 -03:00
|
|
|
class Emitter
|
|
|
|
property type : String = ""
|
|
|
|
end
|
|
|
|
|
2024-08-02 17:03:39 -03:00
|
|
|
class Lexer
|
|
|
|
property config = {
|
2024-08-02 17:09:05 -03:00
|
|
|
name: "",
|
2024-08-02 17:03:39 -03:00
|
|
|
aliases: [] of String,
|
|
|
|
filenames: [] of String,
|
|
|
|
mime_types: [] of String,
|
|
|
|
priority: 0,
|
|
|
|
}
|
|
|
|
|
2024-08-02 19:48:58 -03:00
|
|
|
property states = {} of String => State
|
2024-08-02 17:23:40 -03:00
|
|
|
|
2024-08-02 17:03:39 -03:00
|
|
|
def self.from_xml(xml : String) : Lexer
|
|
|
|
l = Lexer.new
|
|
|
|
lexer = XML.parse(xml).first_element_child
|
|
|
|
if lexer
|
|
|
|
config = lexer.children.find { |n| n.name == "config" }
|
|
|
|
if config
|
|
|
|
l.config = {
|
2024-08-02 17:23:40 -03:00
|
|
|
name: xml_to_s(config, name) || "",
|
|
|
|
aliases: xml_to_a(config, _alias) || [] of String,
|
|
|
|
filenames: xml_to_a(config, filename) || [] of String,
|
|
|
|
mime_types: xml_to_a(config, mime_type) || [] of String,
|
|
|
|
priority: xml_to_i(config, priority) || 0,
|
2024-08-02 17:03:39 -03:00
|
|
|
}
|
|
|
|
end
|
2024-08-02 17:23:40 -03:00
|
|
|
|
|
|
|
rules = lexer.children.find { |n| n.name == "rules" }
|
|
|
|
if rules
|
|
|
|
# Rules contains states 🤷
|
2024-08-02 20:01:53 -03:00
|
|
|
rules.children.select { |n| n.name == "state" }.each do |state_node|
|
2024-08-02 17:23:40 -03:00
|
|
|
state = State.new
|
2024-08-02 20:01:53 -03:00
|
|
|
state.name = state_node["name"]
|
2024-08-02 19:48:58 -03:00
|
|
|
if l.states.has_key?(state.name)
|
|
|
|
puts "Duplicate state: #{state.name}"
|
|
|
|
else
|
|
|
|
l.states[state.name] = state
|
|
|
|
end
|
2024-08-02 17:33:01 -03:00
|
|
|
# And states contain rules 🤷
|
2024-08-02 20:01:53 -03:00
|
|
|
state_node.children.select { |n| n.name == "rule" }.each do |rule_node|
|
2024-08-02 17:33:01 -03:00
|
|
|
if rule_node["pattern"]?
|
2024-08-02 20:01:53 -03:00
|
|
|
# We have patter rules
|
2024-08-02 17:51:12 -03:00
|
|
|
rule = Rule.new
|
2024-08-02 17:33:01 -03:00
|
|
|
rule.pattern = /#{rule_node["pattern"]}/
|
|
|
|
else
|
2024-08-02 20:01:53 -03:00
|
|
|
# And rules that include a state
|
2024-08-02 20:32:15 -03:00
|
|
|
rule = IncludeStateRule.new
|
2024-08-02 20:01:53 -03:00
|
|
|
include_node = rule_node.children.find { |n| n.name == "include" }
|
|
|
|
rule.state = include_node["state"] if include_node
|
2024-08-02 17:33:01 -03:00
|
|
|
end
|
2024-08-02 20:01:53 -03:00
|
|
|
state.rules << rule
|
2024-08-02 20:32:15 -03:00
|
|
|
|
|
|
|
# Rules contain maybe an emitter and maybe a transformer
|
|
|
|
# emitters emit tokens, transformers do things to
|
|
|
|
# the state stack. The transformers go last, but
|
|
|
|
# both kinds are optional 😭
|
|
|
|
|
|
|
|
rule_node.children.each do |node|
|
|
|
|
next unless node.element?
|
|
|
|
case node.name
|
|
|
|
when "pop", "push", "include", "multi", "combine"
|
|
|
|
p! "transformer", node.to_s
|
|
|
|
else
|
|
|
|
p! "emitter", node.to_s
|
|
|
|
end
|
|
|
|
end
|
2024-08-02 17:33:01 -03:00
|
|
|
end
|
2024-08-02 17:23:40 -03:00
|
|
|
end
|
|
|
|
end
|
2024-08-02 17:03:39 -03:00
|
|
|
end
|
|
|
|
l
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2024-08-02 20:32:15 -03:00
|
|
|
l = Tartrazine::Lexer.from_xml(File.read("lexers/ada.xml"))
|
2024-08-02 17:09:05 -03:00
|
|
|
|
|
|
|
# Convenience macros to parse XML
|
|
|
|
macro xml_to_s(node, name)
|
2024-08-02 17:23:40 -03:00
|
|
|
{{node}}.children.find{|n| n.name == "{{name}}".lstrip("_")}.try &.content.to_s
|
|
|
|
end
|
|
|
|
|
|
|
|
macro xml_to_i(node, name)
|
|
|
|
({{node}}.children.find{|n| n.name == "{{name}}".lstrip("_")}.try &.content.to_s.to_i)
|
2024-08-02 17:09:05 -03:00
|
|
|
end
|
|
|
|
|
|
|
|
macro xml_to_a(node, name)
|
|
|
|
{{node}}.children.select{|n| n.name == "{{name}}".lstrip("_")}.map {|n| n.content.to_s}
|
|
|
|
end
|