mirror of
https://github.com/ralsina/tartrazine.git
synced 2024-11-10 05:22:23 +00:00
refactor
This commit is contained in:
parent
e7c2053222
commit
ab263ac26f
80
src/rules.cr
Normal file
80
src/rules.cr
Normal file
@ -0,0 +1,80 @@
|
|||||||
|
# These are lexer rules. They match with the text being parsed
|
||||||
|
# and perform actions, either emitting tokens or changing the
|
||||||
|
# state of the lexer.
|
||||||
|
module Tartrazine
|
||||||
|
# This rule matches via a regex pattern
|
||||||
|
class Rule
|
||||||
|
property pattern : Regex = Regex.new ""
|
||||||
|
property emitters : Array(Emitter) = [] of Emitter
|
||||||
|
property xml : String = "foo"
|
||||||
|
|
||||||
|
def match(text, pos, lexer) : Tuple(Bool, Int32, Array(Token))
|
||||||
|
tokens = [] of Token
|
||||||
|
match = pattern.match(text, pos)
|
||||||
|
# We don't match if the match doesn't move the cursor
|
||||||
|
# because that causes infinite loops
|
||||||
|
# pp! match, pattern.inspect, text, pos
|
||||||
|
return false, pos, [] of Token if match.nil? || match.end == 0
|
||||||
|
# Emit the tokens
|
||||||
|
emitters.each do |emitter|
|
||||||
|
# Emit the token
|
||||||
|
tokens += emitter.emit(match, lexer)
|
||||||
|
end
|
||||||
|
# p! xml, match.end, tokens
|
||||||
|
return true, match.end, tokens
|
||||||
|
end
|
||||||
|
|
||||||
|
def initialize(node : XML::Node, flags)
|
||||||
|
@xml = node.to_s
|
||||||
|
@pattern = Regex.new(node["pattern"], flags)
|
||||||
|
add_emitters(node)
|
||||||
|
end
|
||||||
|
|
||||||
|
def add_emitters(node : XML::Node)
|
||||||
|
node.children.each do |node|
|
||||||
|
next unless node.element?
|
||||||
|
@emitters << Emitter.new(node.name, node)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# This rule includes another state. If any of the rules of the
|
||||||
|
# included state matches, this rule matches.
|
||||||
|
class IncludeStateRule < Rule
|
||||||
|
property state : String = ""
|
||||||
|
|
||||||
|
def match(text, pos, lexer) : Tuple(Bool, Int32, Array(Token))
|
||||||
|
# puts "Including state #{state} from #{lexer.state_stack.last}"
|
||||||
|
lexer.states[state].rules.each do |rule|
|
||||||
|
matched, new_pos, new_tokens = rule.match(text, pos, lexer)
|
||||||
|
# p! xml, new_pos, new_tokens if matched
|
||||||
|
return true, new_pos, new_tokens if matched
|
||||||
|
end
|
||||||
|
return false, pos, [] of Token
|
||||||
|
end
|
||||||
|
|
||||||
|
def initialize(node : XML::Node)
|
||||||
|
@xml = node.to_s
|
||||||
|
include_node = node.children.find { |n| n.name == "include" }
|
||||||
|
@state = include_node["state"] if include_node
|
||||||
|
add_emitters(node)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# This rule always matches, unconditionally
|
||||||
|
class Always < Rule
|
||||||
|
def match(text, pos, lexer) : Tuple(Bool, Int32, Array(Token))
|
||||||
|
tokens = [] of Token
|
||||||
|
emitters.each do |emitter|
|
||||||
|
tokens += emitter.emit(nil, lexer)
|
||||||
|
end
|
||||||
|
return true, pos, tokens
|
||||||
|
end
|
||||||
|
|
||||||
|
def initialize(node : XML::Node)
|
||||||
|
@xml = node.to_s
|
||||||
|
add_emitters(node)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
@ -1,7 +1,7 @@
|
|||||||
require "base58"
|
require "base58"
|
||||||
require "json"
|
require "json"
|
||||||
require "xml"
|
require "xml"
|
||||||
|
require "./rules"
|
||||||
module Tartrazine
|
module Tartrazine
|
||||||
VERSION = "0.1.0"
|
VERSION = "0.1.0"
|
||||||
|
|
||||||
@ -23,67 +23,6 @@ module Tartrazine
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
class Rule
|
|
||||||
property pattern : Regex = Regex.new ""
|
|
||||||
property emitters : Array(Emitter) = [] of Emitter
|
|
||||||
property xml : String = "foo"
|
|
||||||
|
|
||||||
def match(text, pos, lexer) : Tuple(Bool, Int32, Array(Token))
|
|
||||||
tokens = [] of Token
|
|
||||||
match = pattern.match(text, pos)
|
|
||||||
# We don't match if the match doesn't move the cursor
|
|
||||||
# because that causes infinite loops
|
|
||||||
# pp! match, pattern.inspect, text, pos
|
|
||||||
return false, pos, [] of Token if match.nil? || match.end == 0
|
|
||||||
# Emit the tokens
|
|
||||||
emitters.each do |emitter|
|
|
||||||
# Emit the token
|
|
||||||
tokens += emitter.emit(match, lexer)
|
|
||||||
end
|
|
||||||
# p! xml, match.end, tokens
|
|
||||||
return true, match.end, tokens
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
# This rule includes another state like this:
|
|
||||||
# <rule>
|
|
||||||
# <include state="interp"/>
|
|
||||||
# </rule>
|
|
||||||
# </state>
|
|
||||||
# <state name="interp">
|
|
||||||
# <rule pattern="\$\(\(">
|
|
||||||
# <token type="Keyword"/>
|
|
||||||
# ...
|
|
||||||
|
|
||||||
class IncludeStateRule < Rule
|
|
||||||
property state : String = ""
|
|
||||||
|
|
||||||
def match(text, pos, lexer) : Tuple(Bool, Int32, Array(Token))
|
|
||||||
# puts "Including state #{state} from #{lexer.state_stack.last}"
|
|
||||||
lexer.states[state].rules.each do |rule|
|
|
||||||
matched, new_pos, new_tokens = rule.match(text, pos, lexer)
|
|
||||||
# p! xml, new_pos, new_tokens if matched
|
|
||||||
return true, new_pos, new_tokens if matched
|
|
||||||
end
|
|
||||||
return false, pos, [] of Token
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
# These rules look like this:
|
|
||||||
# <rule>
|
|
||||||
# <pop depth="1"/>
|
|
||||||
# </rule>
|
|
||||||
# They match, don't move pos, probably alter
|
|
||||||
# the stack, probably not generate tokens
|
|
||||||
class Always < Rule
|
|
||||||
def match(text, pos, lexer) : Tuple(Bool, Int32, Array(Token))
|
|
||||||
tokens = [] of Token
|
|
||||||
emitters.each do |emitter|
|
|
||||||
tokens += emitter.emit(nil, lexer)
|
|
||||||
end
|
|
||||||
return true, pos, tokens
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
class Emitter
|
class Emitter
|
||||||
property type : String
|
property type : String
|
||||||
@ -274,35 +213,18 @@ module Tartrazine
|
|||||||
case rule_node["pattern"]?
|
case rule_node["pattern"]?
|
||||||
when nil
|
when nil
|
||||||
if rule_node.first_element_child.try &.name == "include"
|
if rule_node.first_element_child.try &.name == "include"
|
||||||
rule = IncludeStateRule.new
|
rule = IncludeStateRule.new(rule_node)
|
||||||
rule.xml = rule_node.to_s
|
|
||||||
include_node = rule_node.children.find { |n| n.name == "include" }
|
|
||||||
rule.state = include_node["state"] if include_node
|
|
||||||
state.rules << rule
|
|
||||||
else
|
else
|
||||||
rule = Always.new
|
rule = Always.new(rule_node)
|
||||||
rule.xml = rule_node.to_s
|
|
||||||
state.rules << rule
|
|
||||||
end
|
end
|
||||||
else
|
else
|
||||||
flags = Regex::Options::ANCHORED
|
flags = Regex::Options::ANCHORED
|
||||||
flags |= Regex::Options::MULTILINE unless l.config[:not_multiline]
|
flags |= Regex::Options::MULTILINE unless l.config[:not_multiline]
|
||||||
flags |= Regex::Options::IGNORE_CASE if l.config[:case_insensitive]
|
flags |= Regex::Options::IGNORE_CASE if l.config[:case_insensitive]
|
||||||
flags |= Regex::Options::DOTALL if l.config[:dot_all]
|
flags |= Regex::Options::DOTALL if l.config[:dot_all]
|
||||||
rule = Rule.new
|
rule = Rule.new(rule_node, flags)
|
||||||
rule.xml = rule_node.to_s
|
|
||||||
rule.pattern = Regex.new(rule_node["pattern"], flags)
|
|
||||||
state.rules << rule
|
|
||||||
end
|
|
||||||
|
|
||||||
next if rule.nil?
|
|
||||||
# Rules contain maybe an emitter and maybe a transformer
|
|
||||||
# emitters emit tokens, transformers do things to
|
|
||||||
# the state stack.
|
|
||||||
rule_node.children.each do |node|
|
|
||||||
next unless node.element?
|
|
||||||
rule.emitters << Emitter.new(node.name, node)
|
|
||||||
end
|
end
|
||||||
|
state.rules << rule
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
Loading…
Reference in New Issue
Block a user