mirror of
https://github.com/ralsina/tartrazine.git
synced 2024-11-10 05:22:23 +00:00
refactor
This commit is contained in:
parent
d47c946e8b
commit
b82a535928
@ -18,6 +18,24 @@ module Tartrazine
|
|||||||
property pattern : Regex = Regex.new ""
|
property pattern : Regex = Regex.new ""
|
||||||
property emitters : Array(Emitter) = [] of Emitter
|
property emitters : Array(Emitter) = [] of Emitter
|
||||||
property transformers : Array(Transformer) = [] of Transformer
|
property transformers : Array(Transformer) = [] of Transformer
|
||||||
|
|
||||||
|
def match(text, pos) : Tuple(Int32, Array(Token))
|
||||||
|
tokens = [] of Token
|
||||||
|
match = pattern.match(text, pos)
|
||||||
|
# We are matched, move post to after the match
|
||||||
|
return pos, [] of Token if match.nil?
|
||||||
|
|
||||||
|
# Emit the tokens
|
||||||
|
emitters.each do |emitter|
|
||||||
|
# Emit the token
|
||||||
|
tokens += emitter.emit(match)
|
||||||
|
end
|
||||||
|
# Transform the state
|
||||||
|
transformers.each do |transformer|
|
||||||
|
transformer.transform
|
||||||
|
end
|
||||||
|
return match.end, tokens
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
# This rule includes another state like this:
|
# This rule includes another state like this:
|
||||||
@ -32,6 +50,11 @@ module Tartrazine
|
|||||||
|
|
||||||
class IncludeStateRule < Rule
|
class IncludeStateRule < Rule
|
||||||
property state : String = ""
|
property state : String = ""
|
||||||
|
|
||||||
|
def match(text, pos) : Tuple(Int32, Array(Token))
|
||||||
|
puts "Including state #{state}"
|
||||||
|
return pos, [] of Token
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
class Emitter
|
class Emitter
|
||||||
@ -44,9 +67,10 @@ module Tartrazine
|
|||||||
def emit(match : Regex::MatchData) : Array(Token)
|
def emit(match : Regex::MatchData) : Array(Token)
|
||||||
case type
|
case type
|
||||||
when "token"
|
when "token"
|
||||||
return [Token.new(type: xml["type"], value: match[0])]
|
[Token.new(type: xml["type"], value: match[0])]
|
||||||
|
else
|
||||||
|
raise Exception.new("Unknown emitter type: #{type}")
|
||||||
end
|
end
|
||||||
[] of Token
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
@ -82,28 +106,16 @@ module Tartrazine
|
|||||||
state = states[state_stack.last]
|
state = states[state_stack.last]
|
||||||
matched = false
|
matched = false
|
||||||
state.rules.each do |rule|
|
state.rules.each do |rule|
|
||||||
case rule
|
new_pos, new_tokens = rule.match(text, pos)
|
||||||
when Rule # A normal regex rule
|
pos = new_pos
|
||||||
match = rule.pattern.match(text, pos)
|
tokens += new_tokens
|
||||||
|
matched = true
|
||||||
# We are matched, move post to after the match
|
break # We go back to processing with current state
|
||||||
next if match.nil?
|
end
|
||||||
matched = true
|
# If no rule matches, emit an error token
|
||||||
pos = match.end
|
if !matched
|
||||||
|
tokens << {type: "Error", value: ""}
|
||||||
# Emit the tokens
|
pos += 1
|
||||||
rule.emitters.each do |emitter|
|
|
||||||
# Emit the token
|
|
||||||
tokens += emitter.emit(match)
|
|
||||||
end
|
|
||||||
# Transform the state
|
|
||||||
rule.transformers.each do |transformer|
|
|
||||||
transformer.transform
|
|
||||||
end
|
|
||||||
when IncludeStateRule
|
|
||||||
# TODO: something
|
|
||||||
end
|
|
||||||
# TODO: Emit error if no rule matched
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
tokens
|
tokens
|
||||||
@ -159,7 +171,7 @@ module Tartrazine
|
|||||||
rule_node.children.each do |node|
|
rule_node.children.each do |node|
|
||||||
next unless node.element?
|
next unless node.element?
|
||||||
case node.name
|
case node.name
|
||||||
when "pop", "push", "include", "multi", "combine"
|
when "pop", "push", "multi", "combine" # "include",
|
||||||
transformer = Transformer.new
|
transformer = Transformer.new
|
||||||
transformer.type = node.name
|
transformer.type = node.name
|
||||||
transformer.xml = node.to_s
|
transformer.xml = node.to_s
|
||||||
@ -188,6 +200,9 @@ end
|
|||||||
# Parse some plaintext
|
# Parse some plaintext
|
||||||
puts lexers["plaintext"].tokenize("Hello, world!\n")
|
puts lexers["plaintext"].tokenize("Hello, world!\n")
|
||||||
|
|
||||||
|
# Now some bash
|
||||||
|
puts lexers["Bash"].tokenize("echo 'Hello, world!'\n")
|
||||||
|
|
||||||
# Convenience macros to parse XML
|
# Convenience macros to parse XML
|
||||||
macro xml_to_s(node, name)
|
macro xml_to_s(node, name)
|
||||||
{{node}}.children.find{|n| n.name == "{{name}}".lstrip("_")}.try &.content.to_s
|
{{node}}.children.find{|n| n.name == "{{name}}".lstrip("_")}.try &.content.to_s
|
||||||
|
Loading…
Reference in New Issue
Block a user