This commit is contained in:
Roberto Alsina 2024-08-04 19:47:54 -03:00
parent e7c2053222
commit ab263ac26f
2 changed files with 85 additions and 83 deletions

80
src/rules.cr Normal file
View File

@ -0,0 +1,80 @@
# These are lexer rules. They match with the text being parsed
# and perform actions, either emitting tokens or changing the
# state of the lexer.
module Tartrazine
# This rule matches via a regex pattern
class Rule
property pattern : Regex = Regex.new ""
property emitters : Array(Emitter) = [] of Emitter
property xml : String = "foo"
def match(text, pos, lexer) : Tuple(Bool, Int32, Array(Token))
tokens = [] of Token
match = pattern.match(text, pos)
# We don't match if the match doesn't move the cursor
# because that causes infinite loops
# pp! match, pattern.inspect, text, pos
return false, pos, [] of Token if match.nil? || match.end == 0
# Emit the tokens
emitters.each do |emitter|
# Emit the token
tokens += emitter.emit(match, lexer)
end
# p! xml, match.end, tokens
return true, match.end, tokens
end
def initialize(node : XML::Node, flags)
@xml = node.to_s
@pattern = Regex.new(node["pattern"], flags)
add_emitters(node)
end
def add_emitters(node : XML::Node)
node.children.each do |node|
next unless node.element?
@emitters << Emitter.new(node.name, node)
end
end
end
# This rule includes another state. If any of the rules of the
# included state matches, this rule matches.
class IncludeStateRule < Rule
property state : String = ""
def match(text, pos, lexer) : Tuple(Bool, Int32, Array(Token))
# puts "Including state #{state} from #{lexer.state_stack.last}"
lexer.states[state].rules.each do |rule|
matched, new_pos, new_tokens = rule.match(text, pos, lexer)
# p! xml, new_pos, new_tokens if matched
return true, new_pos, new_tokens if matched
end
return false, pos, [] of Token
end
def initialize(node : XML::Node)
@xml = node.to_s
include_node = node.children.find { |n| n.name == "include" }
@state = include_node["state"] if include_node
add_emitters(node)
end
end
# This rule always matches, unconditionally
class Always < Rule
def match(text, pos, lexer) : Tuple(Bool, Int32, Array(Token))
tokens = [] of Token
emitters.each do |emitter|
tokens += emitter.emit(nil, lexer)
end
return true, pos, tokens
end
def initialize(node : XML::Node)
@xml = node.to_s
add_emitters(node)
end
end
end

View File

@ -1,7 +1,7 @@
require "base58" require "base58"
require "json" require "json"
require "xml" require "xml"
require "./rules"
module Tartrazine module Tartrazine
VERSION = "0.1.0" VERSION = "0.1.0"
@ -23,67 +23,6 @@ module Tartrazine
end end
end end
class Rule
property pattern : Regex = Regex.new ""
property emitters : Array(Emitter) = [] of Emitter
property xml : String = "foo"
def match(text, pos, lexer) : Tuple(Bool, Int32, Array(Token))
tokens = [] of Token
match = pattern.match(text, pos)
# We don't match if the match doesn't move the cursor
# because that causes infinite loops
# pp! match, pattern.inspect, text, pos
return false, pos, [] of Token if match.nil? || match.end == 0
# Emit the tokens
emitters.each do |emitter|
# Emit the token
tokens += emitter.emit(match, lexer)
end
# p! xml, match.end, tokens
return true, match.end, tokens
end
end
# This rule includes another state like this:
# <rule>
# <include state="interp"/>
# </rule>
# </state>
# <state name="interp">
# <rule pattern="\$\(\(">
# <token type="Keyword"/>
# ...
class IncludeStateRule < Rule
property state : String = ""
def match(text, pos, lexer) : Tuple(Bool, Int32, Array(Token))
# puts "Including state #{state} from #{lexer.state_stack.last}"
lexer.states[state].rules.each do |rule|
matched, new_pos, new_tokens = rule.match(text, pos, lexer)
# p! xml, new_pos, new_tokens if matched
return true, new_pos, new_tokens if matched
end
return false, pos, [] of Token
end
end
# These rules look like this:
# <rule>
# <pop depth="1"/>
# </rule>
# They match, don't move pos, probably alter
# the stack, probably not generate tokens
class Always < Rule
def match(text, pos, lexer) : Tuple(Bool, Int32, Array(Token))
tokens = [] of Token
emitters.each do |emitter|
tokens += emitter.emit(nil, lexer)
end
return true, pos, tokens
end
end
class Emitter class Emitter
property type : String property type : String
@ -274,36 +213,19 @@ module Tartrazine
case rule_node["pattern"]? case rule_node["pattern"]?
when nil when nil
if rule_node.first_element_child.try &.name == "include" if rule_node.first_element_child.try &.name == "include"
rule = IncludeStateRule.new rule = IncludeStateRule.new(rule_node)
rule.xml = rule_node.to_s
include_node = rule_node.children.find { |n| n.name == "include" }
rule.state = include_node["state"] if include_node
state.rules << rule
else else
rule = Always.new rule = Always.new(rule_node)
rule.xml = rule_node.to_s
state.rules << rule
end end
else else
flags = Regex::Options::ANCHORED flags = Regex::Options::ANCHORED
flags |= Regex::Options::MULTILINE unless l.config[:not_multiline] flags |= Regex::Options::MULTILINE unless l.config[:not_multiline]
flags |= Regex::Options::IGNORE_CASE if l.config[:case_insensitive] flags |= Regex::Options::IGNORE_CASE if l.config[:case_insensitive]
flags |= Regex::Options::DOTALL if l.config[:dot_all] flags |= Regex::Options::DOTALL if l.config[:dot_all]
rule = Rule.new rule = Rule.new(rule_node, flags)
rule.xml = rule_node.to_s end
rule.pattern = Regex.new(rule_node["pattern"], flags)
state.rules << rule state.rules << rule
end end
next if rule.nil?
# Rules contain maybe an emitter and maybe a transformer
# emitters emit tokens, transformers do things to
# the state stack.
rule_node.children.each do |node|
next unless node.element?
rule.emitters << Emitter.new(node.name, node)
end
end
end end
end end
end end