mirror of
https://github.com/ralsina/tartrazine.git
synced 2024-11-10 05:22:23 +00:00
Reorganize tests into a real spec suite
This commit is contained in:
parent
57c160173c
commit
e7c2053222
@ -1,9 +1,112 @@
|
|||||||
require "./spec_helper"
|
require "./spec_helper"
|
||||||
|
|
||||||
describe Tartrazine do
|
# These are the testcases from Pygments
|
||||||
# TODO: Write tests
|
testcases = Dir.glob("#{__DIR__}/tests/**/*txt").sort
|
||||||
|
|
||||||
it "works" do
|
# These lexers don't load because of parsing issues
|
||||||
false.should eq(true)
|
failing_lexers = {
|
||||||
|
"webgpu_shading_language",
|
||||||
|
}
|
||||||
|
|
||||||
|
# These testcases fail because of differences in the way chroma and tartrazine
|
||||||
|
# represent unicode, but they are actually correct
|
||||||
|
unicode_problems = {
|
||||||
|
"#{__DIR__}/tests/java/test_string_literals.txt",
|
||||||
|
"#{__DIR__}/tests/json/test_strings.txt",
|
||||||
|
"#{__DIR__}/tests/systemd/example1.txt",
|
||||||
|
}
|
||||||
|
|
||||||
|
# These testcases fail because of differences in the way chroma and tartrazine tokenize
|
||||||
|
# but tartrazine is correct
|
||||||
|
bad_in_chroma = {
|
||||||
|
"#{__DIR__}/tests/bash_session/test_comment_after_prompt.txt",
|
||||||
|
"#{__DIR__}/tests/java/test_default.txt",
|
||||||
|
"#{__DIR__}/tests/java/test_multiline_string.txt",
|
||||||
|
"#{__DIR__}/tests/java/test_numeric_literals.txt",
|
||||||
|
"#{__DIR__}/tests/php/test_string_escaping_run.txt",
|
||||||
|
"#{__DIR__}/tests/python_2/test_cls_builtin.txt",
|
||||||
|
}
|
||||||
|
|
||||||
|
known_bad = {
|
||||||
|
"#{__DIR__}/tests/bash_session/fake_ps2_prompt.txt",
|
||||||
|
"#{__DIR__}/tests/bash_session/prompt_in_output.txt",
|
||||||
|
"#{__DIR__}/tests/bash_session/test_newline_in_echo_no_ps2.txt",
|
||||||
|
"#{__DIR__}/tests/bash_session/test_newline_in_ls_ps2.txt",
|
||||||
|
"#{__DIR__}/tests/bash_session/ps2_prompt.txt",
|
||||||
|
"#{__DIR__}/tests/bash_session/test_newline_in_ls_no_ps2.txt",
|
||||||
|
"#{__DIR__}/tests/bash_session/test_virtualenv.txt",
|
||||||
|
"#{__DIR__}/tests/bash_session/test_newline_in_echo_ps2.txt",
|
||||||
|
"#{__DIR__}/tests/c/test_string_resembling_decl_end.txt",
|
||||||
|
"#{__DIR__}/tests/html/css_backtracking.txt",
|
||||||
|
"#{__DIR__}/tests/mcfunction/data.txt",
|
||||||
|
"#{__DIR__}/tests/mcfunction/selectors.txt",
|
||||||
|
"#{__DIR__}/tests/php/anonymous_class.txt",
|
||||||
|
"#{__DIR__}/tests/html/javascript_unclosed.txt",
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
# Tests that fail because of a limitation in PCRE2
|
||||||
|
not_my_fault = {
|
||||||
|
"#{__DIR__}/tests/fortran/test_string_cataback.txt",
|
||||||
|
}
|
||||||
|
|
||||||
|
describe Tartrazine do
|
||||||
|
describe "Lexer" do
|
||||||
|
testcases.each do |testcase|
|
||||||
|
if known_bad.includes?(testcase)
|
||||||
|
pending "parses #{testcase}".split("/")[-2...].join("/") do
|
||||||
|
end
|
||||||
|
else
|
||||||
|
it "parses #{testcase}".split("/")[-2...].join("/") do
|
||||||
|
text = File.read(testcase).split("---input---\n").last.split("---tokens---").first
|
||||||
|
lexer_name = File.basename(File.dirname(testcase)).downcase
|
||||||
|
unless failing_lexers.includes?(lexer_name) ||
|
||||||
|
unicode_problems.includes?(testcase) ||
|
||||||
|
bad_in_chroma.includes?(testcase) ||
|
||||||
|
not_my_fault.includes?(testcase)
|
||||||
|
tokenize(lexer_name, text).should eq(chroma_tokenize(lexer_name, text))
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# Helper that creates lexer and tokenizes
|
||||||
|
def tokenize(lexer_name, text)
|
||||||
|
lexer = Tartrazine::Lexer.from_xml(File.read("lexers/#{lexer_name}.xml"))
|
||||||
|
collapse_tokens(lexer.tokenize(text))
|
||||||
|
end
|
||||||
|
|
||||||
|
# Helper that tokenizes using chroma to validate the lexer
|
||||||
|
def chroma_tokenize(lexer_name, text)
|
||||||
|
output = IO::Memory.new
|
||||||
|
input = IO::Memory.new(text)
|
||||||
|
Process.run(
|
||||||
|
"chroma",
|
||||||
|
["-f", "json", "-l", lexer_name],
|
||||||
|
input: input, output: output
|
||||||
|
)
|
||||||
|
collapse_tokens(Array(Tartrazine::Token).from_json(output.to_s))
|
||||||
|
end
|
||||||
|
|
||||||
|
# Collapse consecutive tokens of the same type for easier comparison
|
||||||
|
def collapse_tokens(tokens : Array(Tartrazine::Token))
|
||||||
|
result = [] of Tartrazine::Token
|
||||||
|
|
||||||
|
tokens.each do |token|
|
||||||
|
if result.empty?
|
||||||
|
result << token
|
||||||
|
next
|
||||||
|
end
|
||||||
|
last = result.last
|
||||||
|
if last[:type] == token[:type]
|
||||||
|
new_token = {type: last[:type], value: last[:value] + token[:value]}
|
||||||
|
result.pop
|
||||||
|
result << new_token
|
||||||
|
else
|
||||||
|
result << token
|
||||||
|
end
|
||||||
|
end
|
||||||
|
result
|
||||||
|
end
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user