This commit is contained in:
Roberto Alsina 2024-08-06 23:34:14 -03:00
parent 0c86e91b0b
commit e0f697f1f9
2 changed files with 31 additions and 22 deletions

View File

@ -5,18 +5,19 @@ require "./actions"
# state of the lexer.
module Tartrazine
# This rule matches via a regex pattern
class Rule
property pattern : Regex = Re2.new ""
property actions : Array(Action) = [] of Action
property xml : String = "foo"
def match(text, pos, lexer) : Tuple(Bool, Int32, Array(Token))
tokens = [] of Token
match = pattern.match(text, pos)
# We don't match if the match doesn't move the cursor
# because that causes infinite loops
Log.trace { "#{match}, #{pattern.inspect}, #{text}, #{pos}" }
return false, pos, [] of Token if match.nil? || match.end == 0
# Log.trace { "#{match}, #{pattern.inspect}, #{text}, #{pos}" }
tokens = [] of Token
# Emit the tokens
actions.each do |action|
# Emit the token
@ -28,7 +29,12 @@ module Tartrazine
def initialize(node : XML::Node, multiline, dotall, ignorecase)
@xml = node.to_s
@pattern = Re2.new(node["pattern"], multiline, dotall, ignorecase)
@pattern = Re2.new(
node["pattern"],
multiline,
dotall,
ignorecase,
anchored: true)
add_actions(node)
end
@ -80,4 +86,24 @@ module Tartrazine
add_actions(node)
end
end
# This is a hack to workaround that Crystal seems to disallow
# having regexes multiline but not dot_all
class Re2 < Regex
@source = "fa"
@options = Regex::Options::None
@jit = true
def initialize(pattern : String, multiline = false, dotall = false, ignorecase = false, anchored = false)
flags = LibPCRE2::UTF | LibPCRE2::DUPNAMES |
LibPCRE2::UCP
flags |= LibPCRE2::MULTILINE if multiline
flags |= LibPCRE2::DOTALL if dotall
flags |= LibPCRE2::CASELESS if ignorecase
flags |= LibPCRE2::ANCHORED if anchored
@re = Regex::PCRE2.compile(pattern, flags) do |error_message|
raise Exception.new(error_message)
end
end
end
end

View File

@ -63,6 +63,8 @@ module Tartrazine
tokens = [] of Token
pos = 0
matched = false
time = 0
count = 0
# Respect the `ensure_nl` config option
if text.size > 0 && text[-1] != '\n' && config[:ensure_nl] && !usingself
@ -184,25 +186,6 @@ module Tartrazine
def self.lexer(name : String) : Lexer
Lexer.from_xml(File.read("lexers/#{name}.xml"))
end
# This is a hack to workaround that Crystal seems to disallow
# having regexes multiline but not dot_all
class Re2 < Regex
@source = "fa"
@options = Regex::Options::None
@jit = true
def initialize(pattern : String, multiline = false, dotall = false, ignorecase = false)
flags = LibPCRE2::UTF | LibPCRE2::DUPNAMES |
LibPCRE2::UCP | LibPCRE2::ANCHORED
flags |= LibPCRE2::MULTILINE if multiline
flags |= LibPCRE2::DOTALL if dotall
flags |= LibPCRE2::CASELESS if ignorecase
@re = Regex::PCRE2.compile(pattern, flags) do |error_message|
raise Exception.new(error_message)
end
end
end
end
# Convenience macros to parse XML