Let's go nuts and try to tokenize ALL THE PYGMENTS TESTS

This commit is contained in:
Roberto Alsina 2024-08-03 09:42:32 -03:00
parent b3dfa5e28f
commit 7524f4330a

View File

@ -227,14 +227,12 @@ end
lexers = {} of String => Tartrazine::Lexer
Dir.glob("lexers/*.xml").each do |fname|
l = Tartrazine::Lexer.from_xml(File.read(fname))
lexers[l.config[:name]] = l
lexers[l.config[:name].downcase] = l
l.config[:aliases].each do |key|
lexers[key.downcase] = l
end
end
# Parse some plaintext
puts lexers["plaintext"].tokenize("Hello, world!\n")
# Now some bash
puts lexers["Bash"].tokenize("echo 'Hello, world!'\n")
# Convenience macros to parse XML
macro xml_to_s(node, name)
@ -248,3 +246,20 @@ end
macro xml_to_a(node, name)
{{node}}.children.select{|n| n.name == "{{name}}".lstrip("_")}.map {|n| n.content.to_s}
end
# Let's run some tests
Dir.glob("tests/*/") do |lexername|
key = File.basename(lexername).downcase
next unless lexers.has_key? key
lexer = lexers[key]
Dir.glob("#{lexername}*.txt") do |testname|
test = File.read(testname).split("---input---\n").last.split("--tokens---").first
tokens = lexer.tokenize(test)
puts "Testing #{key} with #{testname}"
# puts tokens
end
end