Tokenize via an iterator, makes everything much faster

This commit is contained in:
2024-08-16 13:27:02 -03:00
parent 788577b226
commit 7538fc76aa
7 changed files with 142 additions and 122 deletions

View File

@ -73,7 +73,8 @@ end
# Helper that creates lexer and tokenizes
def tokenize(lexer_name, text)
lexer = Tartrazine.lexer(lexer_name)
lexer.tokenize(text)
tokenizer = Tartrazine::Tokenizer.new(lexer, text)
Tartrazine::Lexer.collapse_tokens(tokenizer.to_a)
end
# Helper that tokenizes using chroma to validate the lexer