From 58e8dac03848fe5d2406b4f265d5969b0d7b1654 Mon Sep 17 00:00:00 2001 From: Roberto Alsina Date: Thu, 15 Aug 2024 19:20:12 -0300 Subject: [PATCH] Make usingself MUCH cheaper, since it was called many times when parsing C --- src/actions.cr | 2 +- src/lexer.cr | 13 +++++++++---- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/src/actions.cr b/src/actions.cr index 126fd47..6baf72b 100644 --- a/src/actions.cr +++ b/src/actions.cr @@ -109,7 +109,7 @@ module Tartrazine when "usingself" # Shunt to another copy of this lexer return [] of Token if match.empty? - new_lexer = Lexer.from_xml(lexer.xml) + new_lexer = lexer.copy new_lexer.tokenize(String.new(match[match_group].value), usingself: true) when "combined" # Combine two states into one anonymous state diff --git a/src/lexer.cr b/src/lexer.cr index b99c821..20f074a 100644 --- a/src/lexer.cr +++ b/src/lexer.cr @@ -56,10 +56,18 @@ module Tartrazine not_multiline: false, ensure_nl: false, } - property xml : String = "" + # property xml : String = "" property states = {} of String => State property state_stack = ["root"] + def copy : Lexer + new_lexer = Lexer.new + new_lexer.config = config + new_lexer.states = states + new_lexer.state_stack = state_stack[0..-1] + new_lexer + end + # Turn the text into a list of tokens. The `usingself` parameter # is true when the lexer is being used to tokenize a string # from a larger text that is already being tokenized. @@ -85,12 +93,10 @@ module Tartrazine if matched # Move position forward, save the tokens, # tokenize from the new position - # Log.trace { "MATCHED: #{rule.xml}" } pos = new_pos tokens += new_tokens break end - # Log.trace { "NOT MATCHED: #{rule.xml}" } end # If no rule matches, emit an error token unless matched @@ -156,7 +162,6 @@ module Tartrazine # ameba:disable Metrics/CyclomaticComplexity def self.from_xml(xml : String) : Lexer l = Lexer.new - l.xml = xml lexer = XML.parse(xml).first_element_child if lexer config = lexer.children.find { |node|