mirror of
https://github.com/ralsina/tartrazine.git
synced 2025-07-04 15:29:25 +00:00
159 lines
4.9 KiB
Crystal
159 lines
4.9 KiB
Crystal
require "./actions"
|
|
require "./formatter"
|
|
require "./rules"
|
|
require "./styles"
|
|
require "./tartrazine"
|
|
|
|
# These are Lexer actions. When a rule matches, it will
|
|
# perform a list of actions. These actions can emit tokens
|
|
# or change the state machine.
|
|
module Tartrazine
|
|
enum ActionType
|
|
Bygroups
|
|
Combined
|
|
Include
|
|
Pop
|
|
Push
|
|
Token
|
|
Using
|
|
Usingbygroup
|
|
Usingself
|
|
end
|
|
|
|
struct Action
|
|
property actions : Array(Action) = [] of Action
|
|
|
|
@content_index : Array(Int32) = [] of Int32
|
|
@depth : Int32 = 0
|
|
@lexer_index : Int32 = 0
|
|
@lexer_name : String = ""
|
|
@states : Array(String) = [] of String
|
|
@states_to_push : Array(String) = [] of String
|
|
@token_type : String = ""
|
|
@type : ActionType = ActionType::Token
|
|
|
|
def initialize(t : String, xml : XML::Node?)
|
|
@type = ActionType.parse(t.capitalize)
|
|
|
|
# Some actions may have actions in them, like this:
|
|
# <bygroups>
|
|
# <token type="GenericPrompt"/>
|
|
# <token type="Text"/>
|
|
# <using lexer="bash"/>
|
|
# </bygroups>
|
|
#
|
|
# The token actions match with the first 2 groups in the regex
|
|
# the using action matches the 3rd and shunts it to another lexer
|
|
xml.children.each do |node|
|
|
next unless node.element?
|
|
@actions << Action.new(node.name, node)
|
|
end
|
|
|
|
# Prefetch the attributes we ned from the XML and keep them
|
|
case @type
|
|
when ActionType::Token
|
|
@token_type = xml["type"]
|
|
when ActionType::Push
|
|
@states_to_push = xml.attributes.select { |attrib|
|
|
attrib.name == "state"
|
|
}.map &.content
|
|
when ActionType::Pop
|
|
@depth = xml["depth"].to_i
|
|
when ActionType::Using
|
|
@lexer_name = xml["lexer"].downcase
|
|
when ActionType::Combined
|
|
@states = xml.attributes.select { |attrib|
|
|
attrib.name == "state"
|
|
}.map &.content
|
|
when ActionType::Usingbygroup
|
|
@lexer_index = xml["lexer"].to_i
|
|
@content_index = xml["content"].split(",").map(&.to_i)
|
|
end
|
|
end
|
|
|
|
# ameba:disable Metrics/CyclomaticComplexity
|
|
def emit(match : MatchData, tokenizer : Tokenizer, match_group = 0) : Array(Token)
|
|
case @type
|
|
when ActionType::Token
|
|
raise Exception.new "Can't have a token without a match" if match.empty?
|
|
[Token.new(type: @token_type, value: String.new(match[match_group].value))]
|
|
when ActionType::Push
|
|
to_push = @states_to_push.empty? ? [tokenizer.state_stack.last] : @states_to_push
|
|
to_push.each do |state|
|
|
if state == "#pop" && tokenizer.state_stack.size > 1
|
|
# Pop the state
|
|
tokenizer.state_stack.pop
|
|
else
|
|
# Really push
|
|
tokenizer.state_stack << state
|
|
end
|
|
end
|
|
[] of Token
|
|
when ActionType::Pop
|
|
to_pop = [@depth, tokenizer.state_stack.size - 1].min
|
|
tokenizer.state_stack.pop(to_pop)
|
|
[] of Token
|
|
when ActionType::Bygroups
|
|
# FIXME: handle
|
|
# ><bygroups>
|
|
# <token type="Punctuation"/>
|
|
# None
|
|
# <token type="LiteralStringRegex"/>
|
|
#
|
|
# where that None means skipping a group
|
|
#
|
|
raise Exception.new "Can't have a token without a match" if match.nil?
|
|
|
|
# Each group matches an action. If the group match is empty,
|
|
# the action is skipped.
|
|
result = [] of Token
|
|
@actions.each_with_index do |e, i|
|
|
begin
|
|
next if match[i + 1].size == 0
|
|
rescue IndexError
|
|
# FIXME: This should not actually happen
|
|
# No match for this group
|
|
next
|
|
end
|
|
result += e.emit(match, tokenizer, i + 1)
|
|
end
|
|
result
|
|
when ActionType::Using
|
|
# Shunt to another lexer entirely
|
|
return [] of Token if match.empty?
|
|
Tartrazine.lexer(@lexer_name).tokenizer(
|
|
String.new(match[match_group].value),
|
|
secondary: true).to_a
|
|
when ActionType::Usingself
|
|
# Shunt to another copy of this lexer
|
|
return [] of Token if match.empty?
|
|
tokenizer.lexer.tokenizer(
|
|
String.new(match[match_group].value),
|
|
secondary: true).to_a
|
|
when ActionType::Combined
|
|
# Combine two or more states into one anonymous state
|
|
new_state = @states.map { |name|
|
|
tokenizer.lexer.states[name]
|
|
}.reduce { |state1, state2|
|
|
state1 + state2
|
|
}
|
|
tokenizer.lexer.states[new_state.name] = new_state
|
|
tokenizer.state_stack << new_state.name
|
|
[] of Token
|
|
when ActionType::Usingbygroup
|
|
# Shunt to content-specified lexer
|
|
return [] of Token if match.empty?
|
|
content = ""
|
|
@content_index.each do |i|
|
|
content += String.new(match[i].value)
|
|
end
|
|
Tartrazine.lexer(String.new(match[@lexer_index].value)).tokenizer(
|
|
content,
|
|
secondary: true).to_a
|
|
else
|
|
raise Exception.new("Unknown action type: #{@type}")
|
|
end
|
|
end
|
|
end
|
|
end
|