mirror of
https://github.com/ralsina/tartrazine.git
synced 2025-08-02 13:59:51 +00:00
More tests pass
This commit is contained in:
@@ -26,6 +26,10 @@ module Onigmo
|
|||||||
|
|
||||||
def initialize(@begin, @end, @value)
|
def initialize(@begin, @end, @value)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def to_s
|
||||||
|
@value
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
class Regex
|
class Regex
|
||||||
@@ -38,18 +42,31 @@ module Onigmo
|
|||||||
end
|
end
|
||||||
|
|
||||||
def match(str : String, offset = 0)
|
def match(str : String, offset = 0)
|
||||||
|
# The offset argument is a character index, but Onigmo expects a byte index
|
||||||
|
offset = str.char_index_to_byte_index(offset)
|
||||||
|
if offset.nil?
|
||||||
|
raise Exception.new "Invalid offset"
|
||||||
|
end
|
||||||
|
|
||||||
region = LibOnigmo.search(@re, str.to_unsafe, offset, str.bytesize)
|
region = LibOnigmo.search(@re, str.to_unsafe, offset, str.bytesize)
|
||||||
result = [] of Match?
|
result = [] of Match?
|
||||||
num_regs = LibOnigmo.num_regs(region)
|
num_regs = LibOnigmo.num_regs(region)
|
||||||
if num_regs > 0
|
if num_regs > 0
|
||||||
(0...num_regs).each do |i|
|
(0...num_regs).each do |i|
|
||||||
b = LibOnigmo.pos(region, i)
|
pos = LibOnigmo.pos(region, i)
|
||||||
e = b + LibOnigmo.len(region, i)
|
l = LibOnigmo.len(region, i)
|
||||||
if b == -1 || e == -1
|
if pos == -1 || l == -1
|
||||||
result << nil
|
result << nil
|
||||||
else
|
else
|
||||||
|
b = str.byte_index_to_char_index(pos)
|
||||||
|
e = str.byte_index_to_char_index(pos + l)
|
||||||
|
# p! pos, l, b, e, str[pos..]
|
||||||
|
if b.nil? || e.nil?
|
||||||
|
raise Exception.new "Invalid substring"
|
||||||
|
end
|
||||||
|
|
||||||
v = str[b...e]
|
v = str[b...e]
|
||||||
result << Match.new(b, e, v)
|
result << Match.new(b, b + v.size, v)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
else
|
else
|
||||||
|
15
src/rules.cr
15
src/rules.cr
@@ -15,16 +15,18 @@ module Tartrazine
|
|||||||
|
|
||||||
class Rule
|
class Rule
|
||||||
property pattern : Regex = Regex.new ""
|
property pattern : Regex = Regex.new ""
|
||||||
|
property pattern2 : ::Regex = ::Regex.new ""
|
||||||
property actions : Array(Action) = [] of Action
|
property actions : Array(Action) = [] of Action
|
||||||
property xml : String = "foo"
|
property xml : String = "foo"
|
||||||
|
|
||||||
def match(text, pos, lexer) : Tuple(Bool, Int32, Array(Token))
|
def match(text, pos, lexer) : Tuple(Bool, Int32, Array(Token))
|
||||||
match = pattern.match(text, pos)
|
match = pattern.match(text, pos)
|
||||||
|
match2 = pattern2.match(text, pos)
|
||||||
# We don't match if the match doesn't move the cursor
|
# We don't match if the match doesn't move the cursor
|
||||||
# because that causes infinite loops
|
# because that causes infinite loops
|
||||||
# The `match.begin > pos` is the same as the ANCHORED option
|
# The `match.begin > pos` is the same as the ANCHORED option
|
||||||
return false, pos, [] of Token if match.empty? || match[0].nil? || match[0].try { |m| m.begin > pos }
|
return false, pos, [] of Token if match.empty? || match[0].nil? || match[0].try { |m| m.begin > pos }
|
||||||
# p! match.map(&.value), text[pos..pos + 20]
|
# p! match.map(&.to_s), match2, text[pos-1..pos + 20],"----------------------"
|
||||||
# Log.trace { "#{match}, #{pattern.inspect}, #{text}, #{pos}" }
|
# Log.trace { "#{match}, #{pattern.inspect}, #{text}, #{pos}" }
|
||||||
tokens = [] of Token
|
tokens = [] of Token
|
||||||
# Emit the tokens
|
# Emit the tokens
|
||||||
@@ -33,21 +35,22 @@ module Tartrazine
|
|||||||
tokens += action.emit(match, lexer)
|
tokens += action.emit(match, lexer)
|
||||||
end
|
end
|
||||||
# Log.trace { "#{xml}, #{match[0].end}, #{tokens}" }
|
# Log.trace { "#{xml}, #{match[0].end}, #{tokens}" }
|
||||||
return true, match[0].as(Onigmo::Match).end, tokens
|
return true, pos + match[0].as(Onigmo::Match).value.size, tokens
|
||||||
end
|
end
|
||||||
|
|
||||||
def initialize(node : XML::Node, multiline, dotall, ignorecase)
|
def initialize(node : XML::Node, multiline, dotall, ignorecase)
|
||||||
@xml = node.to_s
|
@xml = node.to_s
|
||||||
pattern = node["pattern"]
|
pattern = node["pattern"]
|
||||||
# flags = Regex::Options::ANCHORED
|
# flags = Regex::Options::ANCHORED
|
||||||
# flags = Regex::Options::NO_UTF_CHECK
|
flags = ::Regex::Options::NO_UTF_CHECK
|
||||||
# MULTILINE implies DOTALL which we don't want, so we
|
# MULTILINE implies DOTALL which we don't want, so we
|
||||||
# use in-pattern flag (?m) instead
|
# use in-pattern flag (?m) instead
|
||||||
# flags |= Regex::Options::MULTILINE if multiline
|
flags |= ::Regex::Options::MULTILINE if multiline
|
||||||
pattern = "(?m)" + pattern if multiline
|
pattern = "(?m)" + pattern if multiline
|
||||||
# flags |= Regex::Options::DOTALL if dotall
|
flags |= ::Regex::Options::DOTALL if dotall
|
||||||
# flags |= Regex::Options::IGNORE_CASE if ignorecase
|
flags |= ::Regex::Options::IGNORE_CASE if ignorecase
|
||||||
@pattern = Regex.new(pattern, ignorecase, multiline, dotall)
|
@pattern = Regex.new(pattern, ignorecase, multiline, dotall)
|
||||||
|
@pattern2 = ::Regex.new(pattern, flags)
|
||||||
add_actions(node)
|
add_actions(node)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user