From ab263ac26fcbe0b3673762c56d93c24661ce6549 Mon Sep 17 00:00:00 2001 From: Roberto Alsina Date: Sun, 4 Aug 2024 19:47:54 -0300 Subject: [PATCH] refactor --- src/rules.cr | 80 ++++++++++++++++++++++++++++++++++++++++++ src/tartrazine.cr | 88 +++-------------------------------------------- 2 files changed, 85 insertions(+), 83 deletions(-) create mode 100644 src/rules.cr diff --git a/src/rules.cr b/src/rules.cr new file mode 100644 index 0000000..a318f3e --- /dev/null +++ b/src/rules.cr @@ -0,0 +1,80 @@ +# These are lexer rules. They match with the text being parsed +# and perform actions, either emitting tokens or changing the +# state of the lexer. +module Tartrazine + # This rule matches via a regex pattern + class Rule + property pattern : Regex = Regex.new "" + property emitters : Array(Emitter) = [] of Emitter + property xml : String = "foo" + + def match(text, pos, lexer) : Tuple(Bool, Int32, Array(Token)) + tokens = [] of Token + match = pattern.match(text, pos) + # We don't match if the match doesn't move the cursor + # because that causes infinite loops + # pp! match, pattern.inspect, text, pos + return false, pos, [] of Token if match.nil? || match.end == 0 + # Emit the tokens + emitters.each do |emitter| + # Emit the token + tokens += emitter.emit(match, lexer) + end + # p! xml, match.end, tokens + return true, match.end, tokens + end + + def initialize(node : XML::Node, flags) + @xml = node.to_s + @pattern = Regex.new(node["pattern"], flags) + add_emitters(node) + end + + def add_emitters(node : XML::Node) + node.children.each do |node| + next unless node.element? + @emitters << Emitter.new(node.name, node) + end + end + end + + # This rule includes another state. If any of the rules of the + # included state matches, this rule matches. + class IncludeStateRule < Rule + property state : String = "" + + def match(text, pos, lexer) : Tuple(Bool, Int32, Array(Token)) + # puts "Including state #{state} from #{lexer.state_stack.last}" + lexer.states[state].rules.each do |rule| + matched, new_pos, new_tokens = rule.match(text, pos, lexer) + # p! xml, new_pos, new_tokens if matched + return true, new_pos, new_tokens if matched + end + return false, pos, [] of Token + end + + def initialize(node : XML::Node) + @xml = node.to_s + include_node = node.children.find { |n| n.name == "include" } + @state = include_node["state"] if include_node + add_emitters(node) + end + end + + # This rule always matches, unconditionally + class Always < Rule + def match(text, pos, lexer) : Tuple(Bool, Int32, Array(Token)) + tokens = [] of Token + emitters.each do |emitter| + tokens += emitter.emit(nil, lexer) + end + return true, pos, tokens + end + + def initialize(node : XML::Node) + @xml = node.to_s + add_emitters(node) + end + end + +end \ No newline at end of file diff --git a/src/tartrazine.cr b/src/tartrazine.cr index 1b46c3b..1621462 100644 --- a/src/tartrazine.cr +++ b/src/tartrazine.cr @@ -1,7 +1,7 @@ require "base58" require "json" require "xml" - +require "./rules" module Tartrazine VERSION = "0.1.0" @@ -23,67 +23,6 @@ module Tartrazine end end - class Rule - property pattern : Regex = Regex.new "" - property emitters : Array(Emitter) = [] of Emitter - property xml : String = "foo" - - def match(text, pos, lexer) : Tuple(Bool, Int32, Array(Token)) - tokens = [] of Token - match = pattern.match(text, pos) - # We don't match if the match doesn't move the cursor - # because that causes infinite loops - # pp! match, pattern.inspect, text, pos - return false, pos, [] of Token if match.nil? || match.end == 0 - # Emit the tokens - emitters.each do |emitter| - # Emit the token - tokens += emitter.emit(match, lexer) - end - # p! xml, match.end, tokens - return true, match.end, tokens - end - end - - # This rule includes another state like this: - # - # - # - # - # - # - # - # ... - - class IncludeStateRule < Rule - property state : String = "" - - def match(text, pos, lexer) : Tuple(Bool, Int32, Array(Token)) - # puts "Including state #{state} from #{lexer.state_stack.last}" - lexer.states[state].rules.each do |rule| - matched, new_pos, new_tokens = rule.match(text, pos, lexer) - # p! xml, new_pos, new_tokens if matched - return true, new_pos, new_tokens if matched - end - return false, pos, [] of Token - end - end - - # These rules look like this: - # - # - # - # They match, don't move pos, probably alter - # the stack, probably not generate tokens - class Always < Rule - def match(text, pos, lexer) : Tuple(Bool, Int32, Array(Token)) - tokens = [] of Token - emitters.each do |emitter| - tokens += emitter.emit(nil, lexer) - end - return true, pos, tokens - end - end class Emitter property type : String @@ -274,35 +213,18 @@ module Tartrazine case rule_node["pattern"]? when nil if rule_node.first_element_child.try &.name == "include" - rule = IncludeStateRule.new - rule.xml = rule_node.to_s - include_node = rule_node.children.find { |n| n.name == "include" } - rule.state = include_node["state"] if include_node - state.rules << rule + rule = IncludeStateRule.new(rule_node) else - rule = Always.new - rule.xml = rule_node.to_s - state.rules << rule + rule = Always.new(rule_node) end else flags = Regex::Options::ANCHORED flags |= Regex::Options::MULTILINE unless l.config[:not_multiline] flags |= Regex::Options::IGNORE_CASE if l.config[:case_insensitive] flags |= Regex::Options::DOTALL if l.config[:dot_all] - rule = Rule.new - rule.xml = rule_node.to_s - rule.pattern = Regex.new(rule_node["pattern"], flags) - state.rules << rule - end - - next if rule.nil? - # Rules contain maybe an emitter and maybe a transformer - # emitters emit tokens, transformers do things to - # the state stack. - rule_node.children.each do |node| - next unless node.element? - rule.emitters << Emitter.new(node.name, node) + rule = Rule.new(rule_node, flags) end + state.rules << rule end end end