This commit is contained in:
2024-08-04 19:47:54 -03:00
parent e7c2053222
commit ab263ac26f
2 changed files with 85 additions and 83 deletions

80
src/rules.cr Normal file
View File

@ -0,0 +1,80 @@
# These are lexer rules. They match with the text being parsed
# and perform actions, either emitting tokens or changing the
# state of the lexer.
module Tartrazine
# This rule matches via a regex pattern
class Rule
property pattern : Regex = Regex.new ""
property emitters : Array(Emitter) = [] of Emitter
property xml : String = "foo"
def match(text, pos, lexer) : Tuple(Bool, Int32, Array(Token))
tokens = [] of Token
match = pattern.match(text, pos)
# We don't match if the match doesn't move the cursor
# because that causes infinite loops
# pp! match, pattern.inspect, text, pos
return false, pos, [] of Token if match.nil? || match.end == 0
# Emit the tokens
emitters.each do |emitter|
# Emit the token
tokens += emitter.emit(match, lexer)
end
# p! xml, match.end, tokens
return true, match.end, tokens
end
def initialize(node : XML::Node, flags)
@xml = node.to_s
@pattern = Regex.new(node["pattern"], flags)
add_emitters(node)
end
def add_emitters(node : XML::Node)
node.children.each do |node|
next unless node.element?
@emitters << Emitter.new(node.name, node)
end
end
end
# This rule includes another state. If any of the rules of the
# included state matches, this rule matches.
class IncludeStateRule < Rule
property state : String = ""
def match(text, pos, lexer) : Tuple(Bool, Int32, Array(Token))
# puts "Including state #{state} from #{lexer.state_stack.last}"
lexer.states[state].rules.each do |rule|
matched, new_pos, new_tokens = rule.match(text, pos, lexer)
# p! xml, new_pos, new_tokens if matched
return true, new_pos, new_tokens if matched
end
return false, pos, [] of Token
end
def initialize(node : XML::Node)
@xml = node.to_s
include_node = node.children.find { |n| n.name == "include" }
@state = include_node["state"] if include_node
add_emitters(node)
end
end
# This rule always matches, unconditionally
class Always < Rule
def match(text, pos, lexer) : Tuple(Bool, Int32, Array(Token))
tokens = [] of Token
emitters.each do |emitter|
tokens += emitter.emit(nil, lexer)
end
return true, pos, tokens
end
def initialize(node : XML::Node)
@xml = node.to_s
add_emitters(node)
end
end
end