Files
tartrazine/src/rules.cr

103 lines
3.2 KiB
Crystal

require "./actions"
require "./bytes_regex"
require "./formatter"
require "./lexer"
require "./rules"
require "./styles"
# These are lexer rules. They match with the text being parsed
# and perform actions, either emitting tokens or changing the
# state of the lexer.
module Tartrazine
# This rule matches via a regex pattern
alias Regex = BytesRegex::Regex
alias Match = BytesRegex::Match
alias MatchData = Array(Match)
class Rule
property pattern : Regex = Regex.new ""
property actions : Array(Action) = [] of Action
property xml : String = "foo"
def match(text : Bytes, pos, lexer) : Tuple(Bool, Int32, Array(Token))
match = pattern.match(text, pos)
# We don't match if the match doesn't move the cursor
# because that causes infinite loops
return false, pos, [] of Token if match.empty? || match[0].size == 0
# p! match, String.new(text[pos..pos+20])
# Log.trace { "#{match}, #{pattern.inspect}, #{text}, #{pos}" }
tokens = [] of Token
# Emit the tokens
actions.each do |action|
# Emit the token
tokens += action.emit(match, lexer)
end
Log.trace { "#{xml}, #{pos + match[0].size}, #{tokens}" }
return true, pos + match[0].size, tokens
end
def initialize(node : XML::Node, multiline, dotall, ignorecase)
@xml = node.to_s
pattern = node["pattern"]
# flags = Regex::Options::ANCHORED
# MULTILINE implies DOTALL which we don't want, so we
# use in-pattern flag (?m) instead
# flags |= Regex::Options::MULTILINE if multiline
pattern = "(?m)" + pattern if multiline
# flags |= Regex::Options::DOTALL if dotall
# flags |= Regex::Options::IGNORE_CASE if ignorecase
@pattern = Regex.new(pattern, multiline, dotall, ignorecase, true)
add_actions(node)
end
def add_actions(node : XML::Node)
node.children.each do |child|
next unless child.element?
@actions << Action.new(child.name, child)
end
end
end
# This rule includes another state. If any of the rules of the
# included state matches, this rule matches.
class IncludeStateRule < Rule
property state : String = ""
def match(text, pos, lexer) : Tuple(Bool, Int32, Array(Token))
Log.trace { "Including state #{state} from #{lexer.state_stack.last}" }
lexer.states[state].rules.each do |rule|
matched, new_pos, new_tokens = rule.match(text, pos, lexer)
Log.trace { "#{xml}, #{new_pos}, #{new_tokens}" } if matched
return true, new_pos, new_tokens if matched
end
return false, pos, [] of Token
end
def initialize(node : XML::Node)
@xml = node.to_s
include_node = node.children.find { |child|
child.name == "include"
}
@state = include_node["state"] if include_node
add_actions(node)
end
end
# This rule always matches, unconditionally
class UnconditionalRule < Rule
def match(text, pos, lexer) : Tuple(Bool, Int32, Array(Token))
tokens = [] of Token
actions.each do |action|
tokens += action.emit([] of Match, lexer)
end
return true, pos, tokens
end
def initialize(node : XML::Node)
@xml = node.to_s
add_actions(node)
end
end
end