More tests pass

This commit is contained in:
Roberto Alsina 2024-08-13 20:09:36 -03:00
parent a704c59fa9
commit d2b61fdc6c
2 changed files with 30 additions and 10 deletions

View File

@ -26,6 +26,10 @@ module Onigmo
def initialize(@begin, @end, @value)
end
def to_s
@value
end
end
class Regex
@ -38,18 +42,31 @@ module Onigmo
end
def match(str : String, offset = 0)
# The offset argument is a character index, but Onigmo expects a byte index
offset = str.char_index_to_byte_index(offset)
if offset.nil?
raise Exception.new "Invalid offset"
end
region = LibOnigmo.search(@re, str.to_unsafe, offset, str.bytesize)
result = [] of Match?
num_regs = LibOnigmo.num_regs(region)
if num_regs > 0
(0...num_regs).each do |i|
b = LibOnigmo.pos(region, i)
e = b + LibOnigmo.len(region, i)
if b == -1 || e == -1
pos = LibOnigmo.pos(region, i)
l = LibOnigmo.len(region, i)
if pos == -1 || l == -1
result << nil
else
b = str.byte_index_to_char_index(pos)
e = str.byte_index_to_char_index(pos + l)
# p! pos, l, b, e, str[pos..]
if b.nil? || e.nil?
raise Exception.new "Invalid substring"
end
v = str[b...e]
result << Match.new(b, e, v)
result << Match.new(b, b + v.size, v)
end
end
else

View File

@ -15,16 +15,18 @@ module Tartrazine
class Rule
property pattern : Regex = Regex.new ""
property pattern2 : ::Regex = ::Regex.new ""
property actions : Array(Action) = [] of Action
property xml : String = "foo"
def match(text, pos, lexer) : Tuple(Bool, Int32, Array(Token))
match = pattern.match(text, pos)
match2 = pattern2.match(text, pos)
# We don't match if the match doesn't move the cursor
# because that causes infinite loops
# The `match.begin > pos` is the same as the ANCHORED option
return false, pos, [] of Token if match.empty? || match[0].nil? || match[0].try { |m| m.begin > pos }
# p! match.map(&.value), text[pos..pos + 20]
# p! match.map(&.to_s), match2, text[pos-1..pos + 20],"----------------------"
# Log.trace { "#{match}, #{pattern.inspect}, #{text}, #{pos}" }
tokens = [] of Token
# Emit the tokens
@ -33,21 +35,22 @@ module Tartrazine
tokens += action.emit(match, lexer)
end
# Log.trace { "#{xml}, #{match[0].end}, #{tokens}" }
return true, match[0].as(Onigmo::Match).end, tokens
return true, pos + match[0].as(Onigmo::Match).value.size, tokens
end
def initialize(node : XML::Node, multiline, dotall, ignorecase)
@xml = node.to_s
pattern = node["pattern"]
# flags = Regex::Options::ANCHORED
# flags = Regex::Options::NO_UTF_CHECK
flags = ::Regex::Options::NO_UTF_CHECK
# MULTILINE implies DOTALL which we don't want, so we
# use in-pattern flag (?m) instead
# flags |= Regex::Options::MULTILINE if multiline
flags |= ::Regex::Options::MULTILINE if multiline
pattern = "(?m)" + pattern if multiline
# flags |= Regex::Options::DOTALL if dotall
# flags |= Regex::Options::IGNORE_CASE if ignorecase
flags |= ::Regex::Options::DOTALL if dotall
flags |= ::Regex::Options::IGNORE_CASE if ignorecase
@pattern = Regex.new(pattern, ignorecase, multiline, dotall)
@pattern2 = ::Regex.new(pattern, flags)
add_actions(node)
end