From a704c59fa9fb220d52756c8118a8da26dba439bd Mon Sep 17 00:00:00 2001
From: Roberto Alsina <roberto.alsina@gmail.com>
Date: Tue, 13 Aug 2024 19:19:12 -0300
Subject: [PATCH] Some tests pass!

---
 spec/tartrazine_spec.cr |  1 -
 src/actions.cr          |  8 +++---
 src/formatters/html.cr  |  2 +-
 src/onigmo.cr           | 62 +++++++++++++++++++++++++++++++----------
 src/onigmo/onigwrap.c   |  2 +-
 src/rules.cr            | 20 ++++++++-----
 6 files changed, 67 insertions(+), 28 deletions(-)

diff --git a/spec/tartrazine_spec.cr b/spec/tartrazine_spec.cr
index 4615a25..4a06dc2 100644
--- a/spec/tartrazine_spec.cr
+++ b/spec/tartrazine_spec.cr
@@ -42,7 +42,6 @@ known_bad = {
   "#{__DIR__}/tests/mcfunction/selectors.txt",
   "#{__DIR__}/tests/php/anonymous_class.txt",
   "#{__DIR__}/tests/html/javascript_unclosed.txt",
-
 }
 
 # Tests that fail because of a limitation in PCRE2
diff --git a/src/actions.cr b/src/actions.cr
index 4ed4008..633583d 100644
--- a/src/actions.cr
+++ b/src/actions.cr
@@ -30,11 +30,11 @@ module Tartrazine
     end
 
     # ameba:disable Metrics/CyclomaticComplexity
-    def emit(match : Regex::MatchData?, lexer : Lexer, match_group = 0) : Array(Token)
+    def emit(match, lexer : Lexer, match_group = 0) : Array(Token)
       case type
       when "token"
         raise Exception.new "Can't have a token without a match" if match.nil?
-        [Token.new(type: xml["type"], value: match[match_group])]
+        [Token.new(type: xml["type"], value: match[match_group].as(Onigmo::Match).value)]
       when "push"
         states_to_push = xml.attributes.select { |attrib|
           attrib.name == "state"
@@ -88,14 +88,14 @@ module Tartrazine
         return [] of Token if match.nil?
         lexer_name = xml["lexer"].downcase
         Log.trace { "to tokenize: #{match[match_group]}" }
-        Tartrazine.lexer(lexer_name).tokenize(match[match_group], usingself: true)
+        Tartrazine.lexer(lexer_name).tokenize(match[match_group].as(Onigmo::Match).value, usingself: true)
       when "usingself"
         # Shunt to another copy of this lexer
         return [] of Token if match.nil?
 
         new_lexer = Lexer.from_xml(lexer.xml)
         Log.trace { "to tokenize: #{match[match_group]}" }
-        new_lexer.tokenize(match[match_group], usingself: true)
+        new_lexer.tokenize(match[match_group].as(Onigmo::Match).value, usingself: true)
       when "combined"
         # Combine two states into one anonymous state
         states = xml.attributes.select { |attrib|
diff --git a/src/formatters/html.cr b/src/formatters/html.cr
index 20f41a2..c8e7611 100644
--- a/src/formatters/html.cr
+++ b/src/formatters/html.cr
@@ -73,7 +73,7 @@ module Tartrazine
           # These are true/false/nil
           outp << "border: none;" if style.border == false
           outp << "font-weight: bold;" if style.bold
-          outp << "font-weight: #{weight_of_bold};" if style.bold == false
+          outp << "font-weight: #{@weight_of_bold};" if style.bold == false
           outp << "font-style: italic;" if style.italic
           outp << "font-style: normal;" if style.italic == false
           outp << "text-decoration: underline;" if style.underline
diff --git a/src/onigmo.cr b/src/onigmo.cr
index 255cde2..4277fce 100644
--- a/src/onigmo.cr
+++ b/src/onigmo.cr
@@ -1,15 +1,14 @@
 @[Link("onigmo")]
 @[Link(ldflags: "#{__DIR__}/onigmo/onigwrap.o")]
 
-lib Onigmo
+lib LibOnigmo
   type Regex = Pointer(Void)
   type Region = Pointer(Void)
 
   fun create = onigwrap_create(pattern : LibC::Char*, len : UInt32,
                                ignoreCase : Int32,
                                multiline : Int32,
-                               dotall : Int32,
-                               anchored : Int32) : Regex
+                               dotall : Int32) : Regex
   fun free = onigwrap_free(re : Regex)
   fun region_free = onigwrap_region_free(region : Region)
 
@@ -19,16 +18,51 @@ lib Onigmo
   fun len = onigwrap_len(region : Region, index : Int32) : Int32
 end
 
-pattern = "a"
+module Onigmo
+  class Match
+    property begin : Int32
+    property end : Int32
+    property value : String
+
+    def initialize(@begin, @end, @value)
+    end
+  end
+
+  class Regex
+    def initialize(@pattern : String, @ignorecase = false, @multiline = false, @dotall = false)
+      @re = LibOnigmo.create(@pattern.to_unsafe, @pattern.bytesize, @ignorecase ? 1 : 0, @multiline ? 1 : 0, @dotall ? 1 : 0)
+    end
+
+    def finalize
+      LibOnigmo.free(@re)
+    end
+
+    def match(str : String, offset = 0)
+      region = LibOnigmo.search(@re, str.to_unsafe, offset, str.bytesize)
+      result = [] of Match?
+      num_regs = LibOnigmo.num_regs(region)
+      if num_regs > 0
+        (0...num_regs).each do |i|
+          b = LibOnigmo.pos(region, i)
+          e = b + LibOnigmo.len(region, i)
+          if b == -1 || e == -1
+            result << nil
+          else
+            v = str[b...e]
+            result << Match.new(b, e, v)
+          end
+        end
+      else
+        return [] of Match
+      end
+      LibOnigmo.region_free(region)
+      result
+    end
+  end
+end
+
+pattern = "#.*x"
 str = "# foobar"
 
-re = Onigmo.create(pattern, pattern.size, false, true, false, false)
-region = Onigmo.search(re, str.to_unsafe, 0, str.bytesize)
-num_regs = Onigmo.num_regs(region)
-(0...num_regs).each do |i|
-  pos = Onigmo.pos(region, i)
-  len = Onigmo.len(region, i)
-  puts "match #{i}: #{str[pos, len]}"
-end
-Onigmo.region_free(region)
-Onigmo.free(re)
+re = Onigmo::Regex.new(pattern, false, false, false)
+p! re.match(str)
diff --git a/src/onigmo/onigwrap.c b/src/onigmo/onigwrap.c
index 75bc4ae..68da0bb 100644
--- a/src/onigmo/onigwrap.c
+++ b/src/onigmo/onigwrap.c
@@ -1,6 +1,6 @@
 #include "onigmo.h"
 
-regex_t *onigwrap_create(char *pattern, int len, int ignoreCase, int multiline, int dotall, int anchored )
+regex_t *onigwrap_create(char *pattern, int len, int ignoreCase, int multiline, int dotall)
 {
 	regex_t *reg;
 
diff --git a/src/rules.cr b/src/rules.cr
index 1f4a7eb..57b1dd0 100644
--- a/src/rules.cr
+++ b/src/rules.cr
@@ -3,6 +3,7 @@ require "./formatter"
 require "./rules"
 require "./styles"
 require "./lexer"
+require "./onigmo"
 
 # These are lexer rules. They match with the text being parsed
 # and perform actions, either emitting tokens or changing the
@@ -10,6 +11,8 @@ require "./lexer"
 module Tartrazine
   # This rule matches via a regex pattern
 
+  alias Regex = Onigmo::Regex
+
   class Rule
     property pattern : Regex = Regex.new ""
     property actions : Array(Action) = [] of Action
@@ -19,7 +22,9 @@ module Tartrazine
       match = pattern.match(text, pos)
       # We don't match if the match doesn't move the cursor
       # because that causes infinite loops
-      return false, pos, [] of Token if match.nil? || match.end == 0
+      # The `match.begin > pos` is the same as the ANCHORED option
+      return false, pos, [] of Token if match.empty? || match[0].nil? || match[0].try { |m| m.begin > pos }
+      # p! match.map(&.value), text[pos..pos + 20]
       # Log.trace { "#{match}, #{pattern.inspect}, #{text}, #{pos}" }
       tokens = [] of Token
       # Emit the tokens
@@ -27,21 +32,22 @@ module Tartrazine
         # Emit the token
         tokens += action.emit(match, lexer)
       end
-      Log.trace { "#{xml}, #{match.end}, #{tokens}" }
-      return true, match.end, tokens
+      # Log.trace { "#{xml}, #{match[0].end}, #{tokens}" }
+      return true, match[0].as(Onigmo::Match).end, tokens
     end
 
     def initialize(node : XML::Node, multiline, dotall, ignorecase)
       @xml = node.to_s
       pattern = node["pattern"]
-      flags = Regex::Options::ANCHORED
+      # flags = Regex::Options::ANCHORED
+      # flags = Regex::Options::NO_UTF_CHECK
       # MULTILINE implies DOTALL which we don't want, so we
       # use in-pattern flag (?m) instead
       # flags |= Regex::Options::MULTILINE if multiline
       pattern = "(?m)" + pattern if multiline
-      flags |= Regex::Options::DOTALL if dotall
-      flags |= Regex::Options::IGNORE_CASE if ignorecase
-      @pattern = Regex.new(pattern, flags)
+      # flags |= Regex::Options::DOTALL if dotall
+      # flags |= Regex::Options::IGNORE_CASE if ignorecase
+      @pattern = Regex.new(pattern, ignorecase, multiline, dotall)
       add_actions(node)
     end