This commit is contained in:
Roberto Alsina 2024-08-03 06:37:15 -03:00
parent d47c946e8b
commit b82a535928

View File

@ -18,6 +18,24 @@ module Tartrazine
property pattern : Regex = Regex.new "" property pattern : Regex = Regex.new ""
property emitters : Array(Emitter) = [] of Emitter property emitters : Array(Emitter) = [] of Emitter
property transformers : Array(Transformer) = [] of Transformer property transformers : Array(Transformer) = [] of Transformer
def match(text, pos) : Tuple(Int32, Array(Token))
tokens = [] of Token
match = pattern.match(text, pos)
# We are matched, move post to after the match
return pos, [] of Token if match.nil?
# Emit the tokens
emitters.each do |emitter|
# Emit the token
tokens += emitter.emit(match)
end
# Transform the state
transformers.each do |transformer|
transformer.transform
end
return match.end, tokens
end
end end
# This rule includes another state like this: # This rule includes another state like this:
@ -32,6 +50,11 @@ module Tartrazine
class IncludeStateRule < Rule class IncludeStateRule < Rule
property state : String = "" property state : String = ""
def match(text, pos) : Tuple(Int32, Array(Token))
puts "Including state #{state}"
return pos, [] of Token
end
end end
class Emitter class Emitter
@ -44,9 +67,10 @@ module Tartrazine
def emit(match : Regex::MatchData) : Array(Token) def emit(match : Regex::MatchData) : Array(Token)
case type case type
when "token" when "token"
return [Token.new(type: xml["type"], value: match[0])] [Token.new(type: xml["type"], value: match[0])]
else
raise Exception.new("Unknown emitter type: #{type}")
end end
[] of Token
end end
end end
@ -82,28 +106,16 @@ module Tartrazine
state = states[state_stack.last] state = states[state_stack.last]
matched = false matched = false
state.rules.each do |rule| state.rules.each do |rule|
case rule new_pos, new_tokens = rule.match(text, pos)
when Rule # A normal regex rule pos = new_pos
match = rule.pattern.match(text, pos) tokens += new_tokens
matched = true
# We are matched, move post to after the match break # We go back to processing with current state
next if match.nil? end
matched = true # If no rule matches, emit an error token
pos = match.end if !matched
tokens << {type: "Error", value: ""}
# Emit the tokens pos += 1
rule.emitters.each do |emitter|
# Emit the token
tokens += emitter.emit(match)
end
# Transform the state
rule.transformers.each do |transformer|
transformer.transform
end
when IncludeStateRule
# TODO: something
end
# TODO: Emit error if no rule matched
end end
end end
tokens tokens
@ -159,7 +171,7 @@ module Tartrazine
rule_node.children.each do |node| rule_node.children.each do |node|
next unless node.element? next unless node.element?
case node.name case node.name
when "pop", "push", "include", "multi", "combine" when "pop", "push", "multi", "combine" # "include",
transformer = Transformer.new transformer = Transformer.new
transformer.type = node.name transformer.type = node.name
transformer.xml = node.to_s transformer.xml = node.to_s
@ -188,6 +200,9 @@ end
# Parse some plaintext # Parse some plaintext
puts lexers["plaintext"].tokenize("Hello, world!\n") puts lexers["plaintext"].tokenize("Hello, world!\n")
# Now some bash
puts lexers["Bash"].tokenize("echo 'Hello, world!'\n")
# Convenience macros to parse XML # Convenience macros to parse XML
macro xml_to_s(node, name) macro xml_to_s(node, name)
{{node}}.children.find{|n| n.name == "{{name}}".lstrip("_")}.try &.content.to_s {{node}}.children.find{|n| n.name == "{{name}}".lstrip("_")}.try &.content.to_s