More tests pass

This commit is contained in:
2024-08-13 20:09:36 -03:00
parent a704c59fa9
commit d2b61fdc6c
2 changed files with 30 additions and 10 deletions

View File

@@ -26,6 +26,10 @@ module Onigmo
def initialize(@begin, @end, @value) def initialize(@begin, @end, @value)
end end
def to_s
@value
end
end end
class Regex class Regex
@@ -38,18 +42,31 @@ module Onigmo
end end
def match(str : String, offset = 0) def match(str : String, offset = 0)
# The offset argument is a character index, but Onigmo expects a byte index
offset = str.char_index_to_byte_index(offset)
if offset.nil?
raise Exception.new "Invalid offset"
end
region = LibOnigmo.search(@re, str.to_unsafe, offset, str.bytesize) region = LibOnigmo.search(@re, str.to_unsafe, offset, str.bytesize)
result = [] of Match? result = [] of Match?
num_regs = LibOnigmo.num_regs(region) num_regs = LibOnigmo.num_regs(region)
if num_regs > 0 if num_regs > 0
(0...num_regs).each do |i| (0...num_regs).each do |i|
b = LibOnigmo.pos(region, i) pos = LibOnigmo.pos(region, i)
e = b + LibOnigmo.len(region, i) l = LibOnigmo.len(region, i)
if b == -1 || e == -1 if pos == -1 || l == -1
result << nil result << nil
else else
b = str.byte_index_to_char_index(pos)
e = str.byte_index_to_char_index(pos + l)
# p! pos, l, b, e, str[pos..]
if b.nil? || e.nil?
raise Exception.new "Invalid substring"
end
v = str[b...e] v = str[b...e]
result << Match.new(b, e, v) result << Match.new(b, b + v.size, v)
end end
end end
else else

View File

@@ -15,16 +15,18 @@ module Tartrazine
class Rule class Rule
property pattern : Regex = Regex.new "" property pattern : Regex = Regex.new ""
property pattern2 : ::Regex = ::Regex.new ""
property actions : Array(Action) = [] of Action property actions : Array(Action) = [] of Action
property xml : String = "foo" property xml : String = "foo"
def match(text, pos, lexer) : Tuple(Bool, Int32, Array(Token)) def match(text, pos, lexer) : Tuple(Bool, Int32, Array(Token))
match = pattern.match(text, pos) match = pattern.match(text, pos)
match2 = pattern2.match(text, pos)
# We don't match if the match doesn't move the cursor # We don't match if the match doesn't move the cursor
# because that causes infinite loops # because that causes infinite loops
# The `match.begin > pos` is the same as the ANCHORED option # The `match.begin > pos` is the same as the ANCHORED option
return false, pos, [] of Token if match.empty? || match[0].nil? || match[0].try { |m| m.begin > pos } return false, pos, [] of Token if match.empty? || match[0].nil? || match[0].try { |m| m.begin > pos }
# p! match.map(&.value), text[pos..pos + 20] # p! match.map(&.to_s), match2, text[pos-1..pos + 20],"----------------------"
# Log.trace { "#{match}, #{pattern.inspect}, #{text}, #{pos}" } # Log.trace { "#{match}, #{pattern.inspect}, #{text}, #{pos}" }
tokens = [] of Token tokens = [] of Token
# Emit the tokens # Emit the tokens
@@ -33,21 +35,22 @@ module Tartrazine
tokens += action.emit(match, lexer) tokens += action.emit(match, lexer)
end end
# Log.trace { "#{xml}, #{match[0].end}, #{tokens}" } # Log.trace { "#{xml}, #{match[0].end}, #{tokens}" }
return true, match[0].as(Onigmo::Match).end, tokens return true, pos + match[0].as(Onigmo::Match).value.size, tokens
end end
def initialize(node : XML::Node, multiline, dotall, ignorecase) def initialize(node : XML::Node, multiline, dotall, ignorecase)
@xml = node.to_s @xml = node.to_s
pattern = node["pattern"] pattern = node["pattern"]
# flags = Regex::Options::ANCHORED # flags = Regex::Options::ANCHORED
# flags = Regex::Options::NO_UTF_CHECK flags = ::Regex::Options::NO_UTF_CHECK
# MULTILINE implies DOTALL which we don't want, so we # MULTILINE implies DOTALL which we don't want, so we
# use in-pattern flag (?m) instead # use in-pattern flag (?m) instead
# flags |= Regex::Options::MULTILINE if multiline flags |= ::Regex::Options::MULTILINE if multiline
pattern = "(?m)" + pattern if multiline pattern = "(?m)" + pattern if multiline
# flags |= Regex::Options::DOTALL if dotall flags |= ::Regex::Options::DOTALL if dotall
# flags |= Regex::Options::IGNORE_CASE if ignorecase flags |= ::Regex::Options::IGNORE_CASE if ignorecase
@pattern = Regex.new(pattern, ignorecase, multiline, dotall) @pattern = Regex.new(pattern, ignorecase, multiline, dotall)
@pattern2 = ::Regex.new(pattern, flags)
add_actions(node) add_actions(node)
end end