tartrazine/src/onigmo.cr

86 lines
2.4 KiB
Crystal
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

@[Link("onigmo")]
@[Link(ldflags: "#{__DIR__}/onigmo/onigwrap.o")]
lib LibOnigmo
type Regex = Pointer(Void)
type Region = Pointer(Void)
fun create = onigwrap_create(pattern : LibC::Char*, len : UInt32,
ignoreCase : Int32,
multiline : Int32,
dotall : Int32) : Regex
fun free = onigwrap_free(re : Regex)
fun region_free = onigwrap_region_free(region : Region)
fun search = onigwrap_search(re : Regex, str : LibC::Char*, offset : UInt32, length : UInt32) : Region
fun num_regs = onigwrap_num_regs(region : Region) : Int32
fun pos = onigwrap_pos(region : Region, index : Int32) : Int32
fun len = onigwrap_len(region : Region, index : Int32) : Int32
end
module Onigmo
class Match
property begin : Int32
property end : Int32
property value : String
def initialize(@begin, @end, @value)
end
def to_s
@value
end
end
class Regex
def initialize(@pattern : String, @ignorecase = false, @multiline = false, @dotall = false)
@re = LibOnigmo.create(@pattern.to_unsafe, @pattern.bytesize, @ignorecase ? 1 : 0, @multiline ? 1 : 0, @dotall ? 1 : 0)
end
def finalize
LibOnigmo.free(@re)
end
def match(str : String, offset = 0)
# The offset argument is a character index, but Onigmo expects a byte index
offset = str.char_index_to_byte_index(offset)
if offset.nil?
raise Exception.new "Invalid offset"
end
region = LibOnigmo.search(@re, str.to_unsafe, offset, str.bytesize)
result = [] of Match?
num_regs = LibOnigmo.num_regs(region)
if num_regs > 0
(0...num_regs).each do |i|
pos = LibOnigmo.pos(region, i)
l = LibOnigmo.len(region, i)
if pos == -1 || l == -1
result << nil
else
b = str.byte_index_to_char_index(pos)
e = str.byte_index_to_char_index(pos + l)
# p! pos, l, b, e, str[pos..]
if b.nil? || e.nil?
raise Exception.new "Invalid substring"
end
v = str[b...e]
result << Match.new(b, b + v.size, v)
end
end
else
return [] of Match
end
LibOnigmo.region_free(region)
result
end
end
end
# pattern = "\\w"
# str = "α"
# re = Onigmo::Regex.new(pattern, false, false, false)
# p! re.match(str)