Some tests pass!

2025-09-17 10:48:12 +00:00 · 2024-08-13 19:19:12 -03:00
parent 2a9e7fde0d
commit a704c59fa9
6 changed files with 67 additions and 28 deletions
--- a/spec/tartrazine_spec.cr
+++ b/spec/tartrazine_spec.cr
@@ -42,7 +42,6 @@ known_bad = {
  "#{__DIR__}/tests/mcfunction/selectors.txt",
  "#{__DIR__}/tests/php/anonymous_class.txt",
  "#{__DIR__}/tests/html/javascript_unclosed.txt",
 }
 # Tests that fail because of a limitation in PCRE2
--- a/src/actions.cr
+++ b/src/actions.cr
@@ -30,11 +30,11 @@ module Tartrazine
    end
    # ameba:disable Metrics/CyclomaticComplexity
-    def emit(match : Regex::MatchData?, lexer : Lexer, match_group = 0) : Array(Token)
+    def emit(match, lexer : Lexer, match_group = 0) : Array(Token)
      case type
      when "token"
        raise Exception.new "Can't have a token without a match" if match.nil?
-        [Token.new(type: xml["type"], value: match[match_group])]
+        [Token.new(type: xml["type"], value: match[match_group].as(Onigmo::Match).value)]
      when "push"
        states_to_push = xml.attributes.select { |attrib|
          attrib.name == "state"
@@ -88,14 +88,14 @@ module Tartrazine
        return [] of Token if match.nil?
        lexer_name = xml["lexer"].downcase
        Log.trace { "to tokenize: #{match[match_group]}" }
-        Tartrazine.lexer(lexer_name).tokenize(match[match_group], usingself: true)
+        Tartrazine.lexer(lexer_name).tokenize(match[match_group].as(Onigmo::Match).value, usingself: true)
      when "usingself"
        # Shunt to another copy of this lexer
        return [] of Token if match.nil?
        new_lexer = Lexer.from_xml(lexer.xml)
        Log.trace { "to tokenize: #{match[match_group]}" }
-        new_lexer.tokenize(match[match_group], usingself: true)
+        new_lexer.tokenize(match[match_group].as(Onigmo::Match).value, usingself: true)
      when "combined"
        # Combine two states into one anonymous state
        states = xml.attributes.select { |attrib|
--- a/src/formatters/html.cr
+++ b/src/formatters/html.cr
@@ -73,7 +73,7 @@ module Tartrazine
          # These are true/false/nil
          outp << "border: none;" if style.border == false
          outp << "font-weight: bold;" if style.bold
-          outp << "font-weight: #{weight_of_bold};" if style.bold == false
+          outp << "font-weight: #{@weight_of_bold};" if style.bold == false
          outp << "font-style: italic;" if style.italic
          outp << "font-style: normal;" if style.italic == false
          outp << "text-decoration: underline;" if style.underline
--- a/src/onigmo.cr
+++ b/src/onigmo.cr
@@ -1,15 +1,14 @@
@[Link("onigmo")]
@[Link(ldflags: "#{__DIR__}/onigmo/onigwrap.o")]
-lib Onigmo
+lib LibOnigmo
  type Regex = Pointer(Void)
  type Region = Pointer(Void)
  fun create = onigwrap_create(pattern : LibC::Char*, len : UInt32,
                               ignoreCase : Int32,
                               multiline : Int32,
-                               dotall : Int32,
+                               dotall : Int32) : Regex
                               anchored : Int32) : Regex
  fun free = onigwrap_free(re : Regex)
  fun region_free = onigwrap_region_free(region : Region)
@@ -19,16 +18,51 @@ lib Onigmo
  fun len = onigwrap_len(region : Region, index : Int32) : Int32
 end
-pattern = "a"
+module Onigmo
  class Match
    property begin : Int32
    property end : Int32
    property value : String
    def initialize(@begin, @end, @value)
    end
  end
  class Regex
    def initialize(@pattern : String, @ignorecase = false, @multiline = false, @dotall = false)
      @re = LibOnigmo.create(@pattern.to_unsafe, @pattern.bytesize, @ignorecase ? 1 : 0, @multiline ? 1 : 0, @dotall ? 1 : 0)
    end
    def finalize
      LibOnigmo.free(@re)
    end
    def match(str : String, offset = 0)
      region = LibOnigmo.search(@re, str.to_unsafe, offset, str.bytesize)
      result = [] of Match?
      num_regs = LibOnigmo.num_regs(region)
      if num_regs > 0
        (0...num_regs).each do |i|
          b = LibOnigmo.pos(region, i)
          e = b + LibOnigmo.len(region, i)
          if b == -1 || e == -1
            result << nil
          else
            v = str[b...e]
            result << Match.new(b, e, v)
          end
        end
      else
        return [] of Match
      end
      LibOnigmo.region_free(region)
      result
    end
  end
 end
 pattern = "#.*x"
 str = "# foobar"
-re = Onigmo.create(pattern, pattern.size, false, true, false, false)
+re = Onigmo::Regex.new(pattern, false, false, false)
-region = Onigmo.search(re, str.to_unsafe, 0, str.bytesize)
+p! re.match(str)
 num_regs = Onigmo.num_regs(region)
 (0...num_regs).each do |i|
  pos = Onigmo.pos(region, i)
  len = Onigmo.len(region, i)
  puts "match #{i}: #{str[pos, len]}"
 end
 Onigmo.region_free(region)
 Onigmo.free(re)
--- a/src/onigmo/onigwrap.c
+++ b/src/onigmo/onigwrap.c
@@ -1,6 +1,6 @@
 #include "onigmo.h"
-regex_t *onigwrap_create(char *pattern, int len, int ignoreCase, int multiline, int dotall, int anchored )
+regex_t *onigwrap_create(char *pattern, int len, int ignoreCase, int multiline, int dotall)
 {
 	regex_t *reg;
--- a/src/rules.cr
+++ b/src/rules.cr
@@ -3,6 +3,7 @@ require "./formatter"
 require "./rules"
 require "./styles"
 require "./lexer"
 require "./onigmo"
 # These are lexer rules. They match with the text being parsed
 # and perform actions, either emitting tokens or changing the
@@ -10,6 +11,8 @@ require "./lexer"
 module Tartrazine
  # This rule matches via a regex pattern
  alias Regex = Onigmo::Regex
  class Rule
    property pattern : Regex = Regex.new ""
    property actions : Array(Action) = [] of Action
@@ -19,7 +22,9 @@ module Tartrazine
      match = pattern.match(text, pos)
      # We don't match if the match doesn't move the cursor
      # because that causes infinite loops
-      return false, pos, [] of Token if match.nil? || match.end == 0
+      # The `match.begin > pos` is the same as the ANCHORED option
      return false, pos, [] of Token if match.empty? || match[0].nil? || match[0].try { |m| m.begin > pos }
      # p! match.map(&.value), text[pos..pos + 20]
      # Log.trace { "#{match}, #{pattern.inspect}, #{text}, #{pos}" }
      tokens = [] of Token
      # Emit the tokens
@@ -27,21 +32,22 @@ module Tartrazine
        # Emit the token
        tokens += action.emit(match, lexer)
      end
-      Log.trace { "#{xml}, #{match.end}, #{tokens}" }
+      # Log.trace { "#{xml}, #{match[0].end}, #{tokens}" }
-      return true, match.end, tokens
+      return true, match[0].as(Onigmo::Match).end, tokens
    end
    def initialize(node : XML::Node, multiline, dotall, ignorecase)
      @xml = node.to_s
      pattern = node["pattern"]
-      flags = Regex::Options::ANCHORED
+      # flags = Regex::Options::ANCHORED
      # flags = Regex::Options::NO_UTF_CHECK
      # MULTILINE implies DOTALL which we don't want, so we
      # use in-pattern flag (?m) instead
      # flags |= Regex::Options::MULTILINE if multiline
      pattern = "(?m)" + pattern if multiline
-      flags |= Regex::Options::DOTALL if dotall
+      # flags |= Regex::Options::DOTALL if dotall
-      flags |= Regex::Options::IGNORE_CASE if ignorecase
+      # flags |= Regex::Options::IGNORE_CASE if ignorecase
-      @pattern = Regex.new(pattern, flags)
+      @pattern = Regex.new(pattern, ignorecase, multiline, dotall)
      add_actions(node)
    end