mirror of
https://github.com/ralsina/tartrazine.git
synced 2025-06-07 20:20:26 -03:00
More tests pass
This commit is contained in:
parent
a704c59fa9
commit
d2b61fdc6c
@ -26,6 +26,10 @@ module Onigmo
|
||||
|
||||
def initialize(@begin, @end, @value)
|
||||
end
|
||||
|
||||
def to_s
|
||||
@value
|
||||
end
|
||||
end
|
||||
|
||||
class Regex
|
||||
@ -38,18 +42,31 @@ module Onigmo
|
||||
end
|
||||
|
||||
def match(str : String, offset = 0)
|
||||
# The offset argument is a character index, but Onigmo expects a byte index
|
||||
offset = str.char_index_to_byte_index(offset)
|
||||
if offset.nil?
|
||||
raise Exception.new "Invalid offset"
|
||||
end
|
||||
|
||||
region = LibOnigmo.search(@re, str.to_unsafe, offset, str.bytesize)
|
||||
result = [] of Match?
|
||||
num_regs = LibOnigmo.num_regs(region)
|
||||
if num_regs > 0
|
||||
(0...num_regs).each do |i|
|
||||
b = LibOnigmo.pos(region, i)
|
||||
e = b + LibOnigmo.len(region, i)
|
||||
if b == -1 || e == -1
|
||||
pos = LibOnigmo.pos(region, i)
|
||||
l = LibOnigmo.len(region, i)
|
||||
if pos == -1 || l == -1
|
||||
result << nil
|
||||
else
|
||||
b = str.byte_index_to_char_index(pos)
|
||||
e = str.byte_index_to_char_index(pos + l)
|
||||
# p! pos, l, b, e, str[pos..]
|
||||
if b.nil? || e.nil?
|
||||
raise Exception.new "Invalid substring"
|
||||
end
|
||||
|
||||
v = str[b...e]
|
||||
result << Match.new(b, e, v)
|
||||
result << Match.new(b, b + v.size, v)
|
||||
end
|
||||
end
|
||||
else
|
||||
|
15
src/rules.cr
15
src/rules.cr
@ -15,16 +15,18 @@ module Tartrazine
|
||||
|
||||
class Rule
|
||||
property pattern : Regex = Regex.new ""
|
||||
property pattern2 : ::Regex = ::Regex.new ""
|
||||
property actions : Array(Action) = [] of Action
|
||||
property xml : String = "foo"
|
||||
|
||||
def match(text, pos, lexer) : Tuple(Bool, Int32, Array(Token))
|
||||
match = pattern.match(text, pos)
|
||||
match2 = pattern2.match(text, pos)
|
||||
# We don't match if the match doesn't move the cursor
|
||||
# because that causes infinite loops
|
||||
# The `match.begin > pos` is the same as the ANCHORED option
|
||||
return false, pos, [] of Token if match.empty? || match[0].nil? || match[0].try { |m| m.begin > pos }
|
||||
# p! match.map(&.value), text[pos..pos + 20]
|
||||
# p! match.map(&.to_s), match2, text[pos-1..pos + 20],"----------------------"
|
||||
# Log.trace { "#{match}, #{pattern.inspect}, #{text}, #{pos}" }
|
||||
tokens = [] of Token
|
||||
# Emit the tokens
|
||||
@ -33,21 +35,22 @@ module Tartrazine
|
||||
tokens += action.emit(match, lexer)
|
||||
end
|
||||
# Log.trace { "#{xml}, #{match[0].end}, #{tokens}" }
|
||||
return true, match[0].as(Onigmo::Match).end, tokens
|
||||
return true, pos + match[0].as(Onigmo::Match).value.size, tokens
|
||||
end
|
||||
|
||||
def initialize(node : XML::Node, multiline, dotall, ignorecase)
|
||||
@xml = node.to_s
|
||||
pattern = node["pattern"]
|
||||
# flags = Regex::Options::ANCHORED
|
||||
# flags = Regex::Options::NO_UTF_CHECK
|
||||
flags = ::Regex::Options::NO_UTF_CHECK
|
||||
# MULTILINE implies DOTALL which we don't want, so we
|
||||
# use in-pattern flag (?m) instead
|
||||
# flags |= Regex::Options::MULTILINE if multiline
|
||||
flags |= ::Regex::Options::MULTILINE if multiline
|
||||
pattern = "(?m)" + pattern if multiline
|
||||
# flags |= Regex::Options::DOTALL if dotall
|
||||
# flags |= Regex::Options::IGNORE_CASE if ignorecase
|
||||
flags |= ::Regex::Options::DOTALL if dotall
|
||||
flags |= ::Regex::Options::IGNORE_CASE if ignorecase
|
||||
@pattern = Regex.new(pattern, ignorecase, multiline, dotall)
|
||||
@pattern2 = ::Regex.new(pattern, flags)
|
||||
add_actions(node)
|
||||
end
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user