tartrazine/src/bytes_regex.cr

74 lines
1.9 KiB
Crystal
Raw Normal View History

module BytesRegex
extend self
class Regex
def initialize(pattern : String, multiline = false, dotall = false, ignorecase = false, anchored = false)
2024-08-16 00:35:06 +00:00
flags = LibPCRE2::UTF | LibPCRE2::UCP | LibPCRE2::NO_UTF_CHECK
flags |= LibPCRE2::MULTILINE if multiline
flags |= LibPCRE2::DOTALL if dotall
flags |= LibPCRE2::CASELESS if ignorecase
flags |= LibPCRE2::ANCHORED if anchored
if @re = LibPCRE2.compile(
pattern,
pattern.bytesize,
flags,
out errorcode,
out erroroffset,
nil)
else
2024-08-14 14:24:25 +00:00
msg = String.new(256) do |buffer|
bytesize = LibPCRE2.get_error_message(errorcode, buffer, 256)
{bytesize, 0}
end
raise Exception.new "Error #{msg} compiling regex at offset #{erroroffset}"
end
2024-08-15 20:04:16 +00:00
@match_data = LibPCRE2.match_data_create_from_pattern(@re, nil)
end
def finalize
2024-08-15 20:04:16 +00:00
LibPCRE2.match_data_free(@match_data)
LibPCRE2.code_free(@re)
end
def match(str : Bytes, pos = 0) : Array(Match)
rc = LibPCRE2.match(
@re,
str,
str.size,
pos,
2024-08-14 16:25:20 +00:00
LibPCRE2::NO_UTF_CHECK,
2024-08-15 20:04:16 +00:00
@match_data,
nil)
2024-08-24 22:59:05 +00:00
if rc > 0
2024-08-15 20:04:16 +00:00
ovector = LibPCRE2.get_ovector_pointer(@match_data)
2024-08-24 22:59:05 +00:00
(0...rc).map do |i|
m_start = ovector[2 * i]
2024-08-16 01:01:50 +00:00
m_end = ovector[2 * i + 1]
if m_start == m_end
m_value = Bytes.new(0)
else
2024-08-16 01:01:50 +00:00
m_value = str[m_start...m_end]
end
2024-08-16 01:01:50 +00:00
Match.new(m_value, m_start, m_end - m_start)
end
2024-08-16 01:01:50 +00:00
else
[] of Match
end
end
end
2024-08-16 02:11:49 +00:00
struct Match
property value : Bytes
property start : UInt64
property size : UInt64
def initialize(@value : Bytes, @start : UInt64, @size : UInt64)
end
end
end
# pattern = "foo"
# str = "foo bar"
# re = BytesRegex::Regex.new(pattern)
# p! String.new(re.match(str.to_slice)[0].value)