From 2a9e7fde0d34bfb04cb985fd2cce1fcda238ed7e Mon Sep 17 00:00:00 2001 From: Roberto Alsina Date: Tue, 13 Aug 2024 14:02:13 -0300 Subject: [PATCH] Working onigmo wrapper, but onigmo doesn't support anchored regexes --- src/onigmo.cr | 106 +++++++++++------------------------------- src/onigmo/onigwrap.c | 94 +++++++++++++++++++++++++++++++++++++ src/onigmo/onigwrap.h | 32 +++++++++++++ 3 files changed, 152 insertions(+), 80 deletions(-) create mode 100644 src/onigmo/onigwrap.c create mode 100644 src/onigmo/onigwrap.h diff --git a/src/onigmo.cr b/src/onigmo.cr index f17726f..255cde2 100644 --- a/src/onigmo.cr +++ b/src/onigmo.cr @@ -1,88 +1,34 @@ @[Link("onigmo")] +@[Link(ldflags: "#{__DIR__}/onigmo/onigwrap.o")] lib Onigmo - type OnigOptionType = UInt32 - type OnigCaseFoldType = UInt32 - type OnigDistance = LibC::SizeT + type Regex = Pointer(Void) + type Region = Pointer(Void) - struct OnigRegex - p : LibC::UChar* - used : UInt32 - alloc : UInt32 + fun create = onigwrap_create(pattern : LibC::Char*, len : UInt32, + ignoreCase : Int32, + multiline : Int32, + dotall : Int32, + anchored : Int32) : Regex + fun free = onigwrap_free(re : Regex) + fun region_free = onigwrap_region_free(region : Region) - num_mem : Int32 - num_repeat : Int32 - num_null_check : Int32 - num_comb_exp_check : Int32 - num_call : Int32 - capture_history : UInt32 - bt_mem_start : UInt32 - bt_mem_end : UInt32 - stack_pop_level : Int32 - repeat_range_alloc : Int32 - options : OnigOptionType - syntax : OnigSyntaxType* - name_table : Void* - case_fold_flag : OnigCaseFoldType - optimize : Int32 - threshold_len : Int32 - anchor : Int32 - anchor_dmin : OnigDistance - anchor_dmax : OnigDistance - sub_anchor : Int32 - exact : LibC::UChar* - exact_end : LibC::UChar* - map : LibC::UChar* - int_map : Int32* - int_map_backward : Int32* - dmin : OnigDistance - dmax : OnigDistance - chain : OnigRegex* - end - - type OnigRegexType = OnigRegex* - type OnigCodePoint = UInt32 - type OnigUChar = LibC::UChar - type OnigEncoding = Void* - - struct OnigMetaCharTableType - esc : OnigCodePoint - anychar : OnigCodePoint - anytime : OnigCodePoint - zero_or_one_time : OnigCodePoint - one_or_one_time : OnigCodePoint - anychar_anytime : OnigCodePoint - end - - struct OnigSyntaxType - op : UInt32 - op2 : UInt32 - behavior : UInt32 - options : OnigOptionType - meta_char_table : OnigMetaCharTableType - end - - struct OnigErrorInfo - enc : OnigEncoding - par : OnigUChar* - par_end : OnigUChar* - end - - ONIG_OPTION_NONE = 0u32 - ONIG_OPTION_DEFAULT = ONIG_OPTION_NONE - - - fun new = onig_new(OnigRegex*, OnigUChar*, OnigUChar*, OnigOptionType, OnigEncoding, OnigSyntaxType*, OnigErrorInfo*) + fun search = onigwrap_search(re : Regex, str : LibC::Char*, offset : UInt32, length : UInt32) : Region + fun num_regs = onigwrap_num_regs(region : Region) : Int32 + fun pos = onigwrap_pos(region : Region, index : Int32) : Int32 + fun len = onigwrap_len(region : Region, index : Int32) : Int32 end -pattern = "a(.*)b|[e-f]+" -str = "zzzzaffffffffb" +pattern = "a" +str = "# foobar" -einfo = Onigmo::OnigErrorInfo.new - -Onigmo.new(out reg, -pattern.to_unsafe, -pattern.to_unsafe + pattern.size, -Onigmo::ONIG_OPTION_DEFAULT, -0, -Onigmo::ONIG_SYNTAX_DEFAULT, pointerof(einfo)) \ No newline at end of file +re = Onigmo.create(pattern, pattern.size, false, true, false, false) +region = Onigmo.search(re, str.to_unsafe, 0, str.bytesize) +num_regs = Onigmo.num_regs(region) +(0...num_regs).each do |i| + pos = Onigmo.pos(region, i) + len = Onigmo.len(region, i) + puts "match #{i}: #{str[pos, len]}" +end +Onigmo.region_free(region) +Onigmo.free(re) diff --git a/src/onigmo/onigwrap.c b/src/onigmo/onigwrap.c new file mode 100644 index 0000000..75bc4ae --- /dev/null +++ b/src/onigmo/onigwrap.c @@ -0,0 +1,94 @@ +#include "onigmo.h" + +regex_t *onigwrap_create(char *pattern, int len, int ignoreCase, int multiline, int dotall, int anchored ) +{ + regex_t *reg; + + OnigErrorInfo einfo; + + OnigOptionType onigOptions = ONIG_OPTION_DEFAULT; + + if (ignoreCase == 1) + onigOptions |= ONIG_OPTION_IGNORECASE; + + if (multiline == 1) + onigOptions |= ONIG_OPTION_NEGATE_SINGLELINE; + + if (dotall == 1) + onigOptions |= ONIG_OPTION_DOTALL; + + OnigUChar *stringStart = (OnigUChar*) pattern; + OnigUChar *stringEnd = (OnigUChar*) pattern + len; + int res = onig_new(®, stringStart, stringEnd, onigOptions, ONIG_ENCODING_UTF8, ONIG_SYNTAX_PYTHON, &einfo); + + return reg; +} + +void onigwrap_region_free(OnigRegion *region) +{ + onig_region_free(region, 1); +} + +void onigwrap_free(regex_t *reg) +{ + onig_free(reg); +} + +int onigwrap_index_in(regex_t *reg, char *charPtr, int offset, int length) +{ + OnigUChar *stringStart = (OnigUChar*) charPtr; + OnigUChar *stringEnd = (OnigUChar*) (charPtr + length); + OnigUChar *stringOffset = (OnigUChar*) (charPtr + offset); + OnigUChar *stringRange = (OnigUChar*) stringEnd; + + OnigRegion *region = onig_region_new(); + int result = onig_search(reg, stringStart, stringEnd, stringOffset, stringRange, region, ONIG_OPTION_NONE); + onig_region_free(region, 1); + + if (result >= 0) + return result >> 1; + if (result == ONIG_MISMATCH) + return -1; + return -2; +} + +OnigRegion *onigwrap_search(regex_t *reg, char *charPtr, int offset, int length) +{ + OnigUChar *stringStart = (OnigUChar*) charPtr; + OnigUChar *stringEnd = (OnigUChar*) (charPtr + length); + OnigUChar *stringOffset = (OnigUChar*) (charPtr + offset); + OnigUChar *stringRange = (OnigUChar*) stringEnd; + + OnigRegion *region = onig_region_new(); + + int result = onig_search(reg, stringStart, stringEnd, stringOffset, stringRange, region, ONIG_OPTION_NONE); + return region; +} + +int onigwrap_num_regs(OnigRegion *region) +{ + return region->num_regs; +} + +int onigwrap_pos(OnigRegion *region, int nth) +{ + if (nth < region->num_regs) + { + int result = region->beg[nth]; + if (result < 0) + return -1; + return result; + } + return -1; +} + +int onigwrap_len(OnigRegion *region, int nth) +{ + if (nth < region->num_regs) + { + int result = region->end[nth] - region->beg[nth]; + return result; + } + return -1; +} + diff --git a/src/onigmo/onigwrap.h b/src/onigmo/onigwrap.h new file mode 100644 index 0000000..ee22485 --- /dev/null +++ b/src/onigmo/onigwrap.h @@ -0,0 +1,32 @@ +#include "oniguruma.h" + +#if defined(_WIN32) +#define ONIGWRAP_EXTERN extern __declspec(dllexport) +#else +#define ONIGWRAP_EXTERN extern +#endif + +ONIGWRAP_EXTERN +regex_t *onigwrap_create(char *pattern, int len, int ignoreCase, int multiline); + +ONIGWRAP_EXTERN +void onigwrap_region_free(OnigRegion *region); + +ONIGWRAP_EXTERN +void onigwrap_free(regex_t *reg); + +ONIGWRAP_EXTERN +int onigwrap_index_in(regex_t *reg, char *charPtr, int offset, int length); + +ONIGWRAP_EXTERN +OnigRegion *onigwrap_search(regex_t *reg, char *charPtr, int offset, int length); + +ONIGWRAP_EXTERN +int onigwrap_num_regs(OnigRegion *region); + +ONIGWRAP_EXTERN +int onigwrap_pos(OnigRegion *region, int nth); + +ONIGWRAP_EXTERN +int onigwrap_len(OnigRegion *region, int nth); +