Make usingself MUCH cheaper, since it was called many times when parsing C

This commit is contained in:
Roberto Alsina 2024-08-15 19:20:12 -03:00
parent f72a40f095
commit 58e8dac038
2 changed files with 10 additions and 5 deletions

View File

@ -109,7 +109,7 @@ module Tartrazine
when "usingself"
# Shunt to another copy of this lexer
return [] of Token if match.empty?
new_lexer = Lexer.from_xml(lexer.xml)
new_lexer = lexer.copy
new_lexer.tokenize(String.new(match[match_group].value), usingself: true)
when "combined"
# Combine two states into one anonymous state

View File

@ -56,10 +56,18 @@ module Tartrazine
not_multiline: false,
ensure_nl: false,
}
property xml : String = ""
# property xml : String = ""
property states = {} of String => State
property state_stack = ["root"]
def copy : Lexer
new_lexer = Lexer.new
new_lexer.config = config
new_lexer.states = states
new_lexer.state_stack = state_stack[0..-1]
new_lexer
end
# Turn the text into a list of tokens. The `usingself` parameter
# is true when the lexer is being used to tokenize a string
# from a larger text that is already being tokenized.
@ -85,12 +93,10 @@ module Tartrazine
if matched
# Move position forward, save the tokens,
# tokenize from the new position
# Log.trace { "MATCHED: #{rule.xml}" }
pos = new_pos
tokens += new_tokens
break
end
# Log.trace { "NOT MATCHED: #{rule.xml}" }
end
# If no rule matches, emit an error token
unless matched
@ -156,7 +162,6 @@ module Tartrazine
# ameba:disable Metrics/CyclomaticComplexity
def self.from_xml(xml : String) : Lexer
l = Lexer.new
l.xml = xml
lexer = XML.parse(xml).first_element_child
if lexer
config = lexer.children.find { |node|