mirror of
https://github.com/ralsina/tartrazine.git
synced 2025-06-07 12:10:25 -03:00
51 good tests now
This commit is contained in:
parent
e625d6604a
commit
f07db4ae04
@ -1,4 +1,5 @@
|
|||||||
require "xml"
|
require "xml"
|
||||||
|
require "json"
|
||||||
|
|
||||||
module Tartrazine
|
module Tartrazine
|
||||||
VERSION = "0.1.0"
|
VERSION = "0.1.0"
|
||||||
@ -18,6 +19,7 @@ module Tartrazine
|
|||||||
property pattern : Regex = Regex.new ""
|
property pattern : Regex = Regex.new ""
|
||||||
property emitters : Array(Emitter) = [] of Emitter
|
property emitters : Array(Emitter) = [] of Emitter
|
||||||
property transformers : Array(Transformer) = [] of Transformer
|
property transformers : Array(Transformer) = [] of Transformer
|
||||||
|
property xml : String = "foo"
|
||||||
|
|
||||||
def match(text, pos, lexer) : Tuple(Bool, Int32, Array(Token))
|
def match(text, pos, lexer) : Tuple(Bool, Int32, Array(Token))
|
||||||
tokens = [] of Token
|
tokens = [] of Token
|
||||||
@ -30,6 +32,7 @@ module Tartrazine
|
|||||||
# Emit the token
|
# Emit the token
|
||||||
tokens += emitter.emit(match, lexer)
|
tokens += emitter.emit(match, lexer)
|
||||||
end
|
end
|
||||||
|
p! xml, match.end, tokens
|
||||||
return true, match.end, tokens
|
return true, match.end, tokens
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
@ -51,6 +54,7 @@ module Tartrazine
|
|||||||
puts "Including state #{state} from #{lexer.state_stack.last}"
|
puts "Including state #{state} from #{lexer.state_stack.last}"
|
||||||
lexer.states[state].rules.each do |rule|
|
lexer.states[state].rules.each do |rule|
|
||||||
matched, new_pos, new_tokens = rule.match(text, pos, lexer)
|
matched, new_pos, new_tokens = rule.match(text, pos, lexer)
|
||||||
|
p! xml, new_pos, new_tokens if matched
|
||||||
return true, new_pos, new_tokens if matched
|
return true, new_pos, new_tokens if matched
|
||||||
end
|
end
|
||||||
return false, pos, [] of Token
|
return false, pos, [] of Token
|
||||||
@ -85,12 +89,23 @@ module Tartrazine
|
|||||||
when "token"
|
when "token"
|
||||||
raise Exception.new "Can't have a token without a match" if match.nil?
|
raise Exception.new "Can't have a token without a match" if match.nil?
|
||||||
[Token.new(type: xml["type"], value: match[0])]
|
[Token.new(type: xml["type"], value: match[0])]
|
||||||
# TODO handle #push #push:n #pop and multiple states
|
|
||||||
when "push"
|
when "push"
|
||||||
|
states_to_push = xml.attributes.select { |a| a.name == "state" }.map &.content
|
||||||
|
if states_to_push.empty?
|
||||||
# Push without a state means push the current state
|
# Push without a state means push the current state
|
||||||
state = xml["state"]? || lexer.state_stack.last
|
states_to_push = [lexer.state_stack.last]
|
||||||
|
end
|
||||||
|
states_to_push.each do |state|
|
||||||
|
if state == "#pop"
|
||||||
|
# Pop the state
|
||||||
|
puts "Popping state"
|
||||||
|
lexer.state_stack.pop
|
||||||
|
else
|
||||||
|
# Really push
|
||||||
puts "Pushing state #{state}"
|
puts "Pushing state #{state}"
|
||||||
lexer.state_stack << state
|
lexer.state_stack << state
|
||||||
|
end
|
||||||
|
end
|
||||||
[] of Token
|
[] of Token
|
||||||
when "pop"
|
when "pop"
|
||||||
depth = xml["depth"].to_i
|
depth = xml["depth"].to_i
|
||||||
@ -105,11 +120,10 @@ module Tartrazine
|
|||||||
# This takes the groups in the regex and emits them as tokens
|
# This takes the groups in the regex and emits them as tokens
|
||||||
# Get all the token nodes
|
# Get all the token nodes
|
||||||
raise Exception.new "Can't have a token without a match" if match.nil?
|
raise Exception.new "Can't have a token without a match" if match.nil?
|
||||||
tokens = xml.children.select { |n| n.name == "token" }.map { |t| t["type"].to_s }
|
tokens = xml.children.select { |n| n.name == "token" }.map(&.["type"].to_s)
|
||||||
p! match, tokens
|
|
||||||
result = [] of Token
|
result = [] of Token
|
||||||
tokens.each_with_index do |t, i|
|
tokens.each_with_index do |t, i|
|
||||||
result << {type: t, value: match[i]}
|
result << {type: t, value: match[i + 1]}
|
||||||
end
|
end
|
||||||
result
|
result
|
||||||
else
|
else
|
||||||
@ -149,20 +163,23 @@ module Tartrazine
|
|||||||
matched = false
|
matched = false
|
||||||
while pos < text.size
|
while pos < text.size
|
||||||
state = states[state_stack.last]
|
state = states[state_stack.last]
|
||||||
|
p! state_stack.last, pos
|
||||||
state.rules.each do |rule|
|
state.rules.each do |rule|
|
||||||
matched, new_pos, new_tokens = rule.match(text, pos, self)
|
matched, new_pos, new_tokens = rule.match(text, pos, self)
|
||||||
next unless matched
|
next unless matched
|
||||||
|
p! rule.xml
|
||||||
|
|
||||||
pos = new_pos
|
pos = new_pos
|
||||||
tokens += new_tokens
|
tokens += new_tokens
|
||||||
break # We go back to processing with current state
|
break # We go back to processing with current state
|
||||||
end
|
end
|
||||||
# If no rule matches, emit an error token
|
# If no rule matches, emit an error token
|
||||||
unless matched
|
unless matched
|
||||||
tokens << {type: "Error", value: ""}
|
tokens << {type: "Error", value: "?"}
|
||||||
pos += 1
|
pos += 1
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
tokens
|
tokens.reject { |t| t[:type] == "Text" && t[:value] == "" }
|
||||||
end
|
end
|
||||||
|
|
||||||
def self.from_xml(xml : String) : Lexer
|
def self.from_xml(xml : String) : Lexer
|
||||||
@ -197,17 +214,23 @@ module Tartrazine
|
|||||||
when nil
|
when nil
|
||||||
if rule_node.first_element_child.try &.name == "include"
|
if rule_node.first_element_child.try &.name == "include"
|
||||||
rule = IncludeStateRule.new
|
rule = IncludeStateRule.new
|
||||||
|
rule.xml = rule_node.to_s
|
||||||
include_node = rule_node.children.find { |n| n.name == "include" }
|
include_node = rule_node.children.find { |n| n.name == "include" }
|
||||||
rule.state = include_node["state"] if include_node
|
rule.state = include_node["state"] if include_node
|
||||||
state.rules << rule
|
state.rules << rule
|
||||||
else
|
else
|
||||||
rule = Always.new
|
rule = Always.new
|
||||||
|
rule.xml = rule_node.to_s
|
||||||
state.rules << rule
|
state.rules << rule
|
||||||
end
|
end
|
||||||
else
|
else
|
||||||
rule = Rule.new
|
rule = Rule.new
|
||||||
|
rule.xml = rule_node.to_s
|
||||||
begin
|
begin
|
||||||
rule.pattern = /#{rule_node["pattern"]}/m
|
rule.pattern = Regex.new(
|
||||||
|
rule_node["pattern"],
|
||||||
|
Regex::Options::ANCHORED | Regex::Options::MULTILINE
|
||||||
|
)
|
||||||
state.rules << rule
|
state.rules << rule
|
||||||
rescue ex : Exception
|
rescue ex : Exception
|
||||||
puts "Bad regex in #{l.config[:name]}: #{ex}"
|
puts "Bad regex in #{l.config[:name]}: #{ex}"
|
||||||
@ -266,8 +289,44 @@ end
|
|||||||
|
|
||||||
# Let's run some tests
|
# Let's run some tests
|
||||||
|
|
||||||
good = 0
|
def chroma_tokenize(lexer, text)
|
||||||
bad = 0
|
output = IO::Memory.new
|
||||||
|
input = IO::Memory.new(text)
|
||||||
|
Process.run(
|
||||||
|
"chroma",
|
||||||
|
["-f", "json", "-l", lexer],
|
||||||
|
input: input, output: output
|
||||||
|
)
|
||||||
|
Array(Tartrazine::Token).from_json(output.to_s)
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_file(testname, lexer)
|
||||||
|
test = File.read(testname).split("---input---\n").last.split("---tokens---").first
|
||||||
|
pp! test
|
||||||
|
begin
|
||||||
|
tokens = lexer.tokenize(test)
|
||||||
|
rescue ex : Exception
|
||||||
|
puts ">>>ERROR"
|
||||||
|
return
|
||||||
|
end
|
||||||
|
outp = IO::Memory.new
|
||||||
|
i = IO::Memory.new(test)
|
||||||
|
lname = lexer.config[:name]
|
||||||
|
Process.run(
|
||||||
|
"chroma",
|
||||||
|
["-f", "json", "-l", lname], input: i, output: outp
|
||||||
|
)
|
||||||
|
chroma_tokens = Array(Tartrazine::Token).from_json(outp.to_s)
|
||||||
|
if chroma_tokens != tokens
|
||||||
|
pp! tokens
|
||||||
|
pp! chroma_tokens
|
||||||
|
puts ">>>BAD"
|
||||||
|
else
|
||||||
|
puts ">>>GOOD"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
Dir.glob("tests/*/") do |lexername|
|
Dir.glob("tests/*/") do |lexername|
|
||||||
key = File.basename(lexername).downcase
|
key = File.basename(lexername).downcase
|
||||||
next unless lexers.has_key? key
|
next unless lexers.has_key? key
|
||||||
@ -275,15 +334,6 @@ Dir.glob("tests/*/") do |lexername|
|
|||||||
|
|
||||||
Dir.glob("#{lexername}*.txt") do |testname|
|
Dir.glob("#{lexername}*.txt") do |testname|
|
||||||
puts "Testing #{key} with #{testname}"
|
puts "Testing #{key} with #{testname}"
|
||||||
test = File.read(testname).split("---input---\n").last.split("--tokens---").first
|
test_file(testname, lexer)
|
||||||
begin
|
|
||||||
tokens = lexer.tokenize(test)
|
|
||||||
good += 1
|
|
||||||
rescue ex : Exception
|
|
||||||
puts "Error in #{key} with #{testname}: #{ex}"
|
|
||||||
bad += 1
|
|
||||||
raise ex
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
|
||||||
puts "Good: #{good} Bad: #{bad}"
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user