2024-08-14 14:06:53 +00:00
|
|
|
module BytesRegex
|
|
|
|
extend self
|
|
|
|
|
|
|
|
class Regex
|
|
|
|
def initialize(pattern : String, multiline = false, dotall = false, ignorecase = false, anchored = false)
|
2024-08-16 00:35:06 +00:00
|
|
|
flags = LibPCRE2::UTF | LibPCRE2::UCP | LibPCRE2::NO_UTF_CHECK
|
2024-08-14 14:06:53 +00:00
|
|
|
flags |= LibPCRE2::MULTILINE if multiline
|
|
|
|
flags |= LibPCRE2::DOTALL if dotall
|
|
|
|
flags |= LibPCRE2::CASELESS if ignorecase
|
|
|
|
flags |= LibPCRE2::ANCHORED if anchored
|
|
|
|
if @re = LibPCRE2.compile(
|
|
|
|
pattern,
|
|
|
|
pattern.bytesize,
|
|
|
|
flags,
|
|
|
|
out errorcode,
|
|
|
|
out erroroffset,
|
|
|
|
nil)
|
|
|
|
else
|
2024-08-14 14:24:25 +00:00
|
|
|
msg = String.new(256) do |buffer|
|
|
|
|
bytesize = LibPCRE2.get_error_message(errorcode, buffer, 256)
|
|
|
|
{bytesize, 0}
|
|
|
|
end
|
|
|
|
raise Exception.new "Error #{msg} compiling regex at offset #{erroroffset}"
|
2024-08-14 14:06:53 +00:00
|
|
|
end
|
2024-08-15 20:04:16 +00:00
|
|
|
@match_data = LibPCRE2.match_data_create_from_pattern(@re, nil)
|
2024-08-14 14:06:53 +00:00
|
|
|
end
|
|
|
|
|
|
|
|
def finalize
|
2024-08-15 20:04:16 +00:00
|
|
|
LibPCRE2.match_data_free(@match_data)
|
2024-08-14 14:06:53 +00:00
|
|
|
LibPCRE2.code_free(@re)
|
|
|
|
end
|
|
|
|
|
|
|
|
def match(str : Bytes, pos = 0) : Array(Match)
|
|
|
|
rc = LibPCRE2.match(
|
|
|
|
@re,
|
|
|
|
str,
|
|
|
|
str.size,
|
|
|
|
pos,
|
2024-08-14 16:25:20 +00:00
|
|
|
LibPCRE2::NO_UTF_CHECK,
|
2024-08-15 20:04:16 +00:00
|
|
|
@match_data,
|
2024-08-14 14:06:53 +00:00
|
|
|
nil)
|
2024-08-24 22:59:05 +00:00
|
|
|
if rc > 0
|
2024-08-15 20:04:16 +00:00
|
|
|
ovector = LibPCRE2.get_ovector_pointer(@match_data)
|
2024-08-24 22:59:05 +00:00
|
|
|
(0...rc).map do |i|
|
2024-08-14 14:06:53 +00:00
|
|
|
m_start = ovector[2 * i]
|
2024-08-16 01:01:50 +00:00
|
|
|
m_end = ovector[2 * i + 1]
|
|
|
|
if m_start == m_end
|
2024-08-14 14:06:53 +00:00
|
|
|
m_value = Bytes.new(0)
|
|
|
|
else
|
2024-08-16 01:01:50 +00:00
|
|
|
m_value = str[m_start...m_end]
|
2024-08-14 14:06:53 +00:00
|
|
|
end
|
2024-08-16 01:01:50 +00:00
|
|
|
Match.new(m_value, m_start, m_end - m_start)
|
2024-08-14 14:06:53 +00:00
|
|
|
end
|
2024-08-16 01:01:50 +00:00
|
|
|
else
|
|
|
|
[] of Match
|
2024-08-14 14:06:53 +00:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2024-08-16 02:11:49 +00:00
|
|
|
struct Match
|
2024-08-14 14:06:53 +00:00
|
|
|
property value : Bytes
|
|
|
|
property start : UInt64
|
|
|
|
property size : UInt64
|
|
|
|
|
|
|
|
def initialize(@value : Bytes, @start : UInt64, @size : UInt64)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
# pattern = "foo"
|
|
|
|
# str = "foo bar"
|
|
|
|
# re = BytesRegex::Regex.new(pattern)
|
|
|
|
# p! String.new(re.match(str.to_slice)[0].value)
|