refactor

2025-08-05 15:55:35 +00:00 · 2024-08-04 20:00:48 -03:00
parent 696227a935
commit 935e72c18e
3 changed files with 11 additions and 47 deletions
--- a/spec/tartrazine_spec.cr
+++ b/spec/tartrazine_spec.cr
@@ -74,7 +74,7 @@ end
 # Helper that creates lexer and tokenizes
 def tokenize(lexer_name, text)
-  lexer = Tartrazine::Lexer.from_xml(File.read("lexers/#{lexer_name}.xml"))
+  lexer = Tartrazine.get_lexer(lexer_name)
  collapse_tokens(lexer.tokenize(text))
 end
--- a/src/actions.cr
+++ b/src/actions.cr
@@ -66,8 +66,8 @@ module Tartrazine
        #
        raise Exception.new "Can't have a token without a match" if match.nil?
-        # Each group matches an action
+        # Each group matches an action. If the group match is empty,
-
+        # the action is skipped.
        result = [] of Token
        @actions.each_with_index do |e, i|
          next if match[i + 1]?.nil?
@@ -79,7 +79,7 @@ module Tartrazine
        return [] of Token if match.nil?
        lexer_name = xml["lexer"].downcase
        # pp! "to tokenize:", match[match_group]
-        LEXERS[lexer_name].tokenize(match[match_group], usingself: true)
+        Tartrazine.get_lexer(lexer_name).tokenize(match[match_group], usingself: true)
      when "usingself"
        # Shunt to another copy of this lexer
        return [] of Token if match.nil?
--- a/src/tartrazine.cr
+++ b/src/tartrazine.cr
@@ -13,6 +13,10 @@ module Tartrazine
  # For explanations on what actions, transformers, etc do
  # the Pygments documentation is a good place to start.
  # https://pygments.org/docs/lexerdevelopment/
  # A Lexer state. A state has a name and a list of rules.
  # The state machine has a state stack containing references
  # to states to decide which rules to apply.
  class State
    property name : String = ""
    property rules = [] of Rule
@@ -25,10 +29,9 @@ module Tartrazine
    end
  end
  # A token, the output of the tokenizer
  alias Token = NamedTuple(type: String, value: String)
  LEXERS = {} of String => Tartrazine::Lexer
  class Lexer
    property config = {
      name:             "",
@@ -135,22 +138,9 @@ module Tartrazine
      l
    end
  end
 end
-# Try loading all lexers
+  def self.get_lexer(name : String) : Lexer
-
+    Lexer.from_xml(File.read("lexers/#{name}.xml"))
 lexers = Tartrazine::LEXERS
 Dir.glob("lexers/*.xml").each do |fname|
  begin
    l = Tartrazine::Lexer.from_xml(File.read(fname))
  rescue ex : Exception
    # p! ex
    next
  end
  lexers[l.config[:name].downcase] = l
  l.config[:aliases].each do |key|
    lexers[key.downcase] = l
  end
 end
@@ -166,29 +156,3 @@ end
 macro xml_to_a(node, name)
 {{node}}.children.select{|n| n.name == "{{name}}".lstrip("_")}.map {|n| n.content.to_s}
 end
 # # #<Regex::Error:Regex match error: match limit exceeded>
 # next if testname == "tests/fortran/test_string_cataback.txt"
 # # Difference is different unicode representation of a string literal
 # next if testname == "tests/java/test_string_literals.txt"
 # next if testname == "tests/systemd/example1.txt"
 # next if testname == "tests/json/test_strings.txt"
 # # Tartrazine agrees with pygments, disagrees with chroma
 # next if testname == "tests/java/test_default.txt"
 # next if testname == "tests/java/test_numeric_literals.txt"
 # next if testname == "tests/java/test_multiline_string.txt"
 # # Tartrazine disagrees with pygments and chroma, but it's fine
 # next if testname == "tests/php/test_string_escaping_run.txt"
 # # Chroma's output is bad, but so is Tartrazine's
 # next if "tests/html/javascript_unclosed.txt" == testname
 # # KNOWN BAD -- TO FIX
 # next if "tests/html/css_backtracking.txt" == testname
 # next if "tests/php/anonymous_class.txt" == testname
 # next if "tests/c/test_string_resembling_decl_end.txt" == testname
 # next if "tests/mcfunction/data.txt" == testname
 # next if "tests/mcfunction/selectors.txt" == testname