mirror of
https://github.com/ralsina/tartrazine.git
synced 2024-11-10 05:22:23 +00:00
Idiomatic changes
This commit is contained in:
parent
20d6b65346
commit
e817aedd60
@ -8,19 +8,29 @@ require "./tartrazine"
|
|||||||
# perform a list of actions. These actions can emit tokens
|
# perform a list of actions. These actions can emit tokens
|
||||||
# or change the state machine.
|
# or change the state machine.
|
||||||
module Tartrazine
|
module Tartrazine
|
||||||
|
enum ActionType
|
||||||
|
Token
|
||||||
|
Push
|
||||||
|
Pop
|
||||||
|
Combined
|
||||||
|
Bygroups
|
||||||
|
Include
|
||||||
|
Using
|
||||||
|
Usingself
|
||||||
|
end
|
||||||
|
|
||||||
struct Action
|
struct Action
|
||||||
property actions : Array(Action) = [] of Action
|
property actions : Array(Action) = [] of Action
|
||||||
property type : String
|
|
||||||
|
|
||||||
@depth : Int32 = 0
|
@depth : Int32 = 0
|
||||||
@lexer_name : String = ""
|
@lexer_name : String = ""
|
||||||
@states : Array(String) = [] of String
|
@states : Array(String) = [] of String
|
||||||
@states_to_push : Array(String) = [] of String
|
@states_to_push : Array(String) = [] of String
|
||||||
@token_type : String = ""
|
@token_type : String = ""
|
||||||
|
@type : ActionType = ActionType::Token
|
||||||
|
|
||||||
def initialize(@type : String, xml : XML::Node?)
|
def initialize(t : String, xml : XML::Node?)
|
||||||
known_types = %w(token push pop combined bygroups include using usingself)
|
@type = ActionType.parse(t.capitalize)
|
||||||
raise Exception.new("Unknown action type: #{type}") unless known_types.includes? type
|
|
||||||
|
|
||||||
# Some actions may have actions in them, like this:
|
# Some actions may have actions in them, like this:
|
||||||
# <bygroups>
|
# <bygroups>
|
||||||
@ -37,18 +47,18 @@ module Tartrazine
|
|||||||
end
|
end
|
||||||
|
|
||||||
# Prefetch the attributes we ned from the XML and keep them
|
# Prefetch the attributes we ned from the XML and keep them
|
||||||
case type
|
case @type
|
||||||
when "token"
|
when ActionType::Token
|
||||||
@token_type = xml["type"]
|
@token_type = xml["type"]
|
||||||
when "push"
|
when ActionType::Push
|
||||||
@states_to_push = xml.attributes.select { |attrib|
|
@states_to_push = xml.attributes.select { |attrib|
|
||||||
attrib.name == "state"
|
attrib.name == "state"
|
||||||
}.map &.content
|
}.map &.content
|
||||||
when "pop"
|
when ActionType::Pop
|
||||||
@depth = xml["depth"].to_i
|
@depth = xml["depth"].to_i
|
||||||
when "using"
|
when ActionType::Using
|
||||||
@lexer_name = xml["lexer"].downcase
|
@lexer_name = xml["lexer"].downcase
|
||||||
when "combined"
|
when ActionType::Combined
|
||||||
@states = xml.attributes.select { |attrib|
|
@states = xml.attributes.select { |attrib|
|
||||||
attrib.name == "state"
|
attrib.name == "state"
|
||||||
}.map &.content
|
}.map &.content
|
||||||
@ -57,11 +67,11 @@ module Tartrazine
|
|||||||
|
|
||||||
# ameba:disable Metrics/CyclomaticComplexity
|
# ameba:disable Metrics/CyclomaticComplexity
|
||||||
def emit(match : MatchData, lexer : Lexer, match_group = 0) : Array(Token)
|
def emit(match : MatchData, lexer : Lexer, match_group = 0) : Array(Token)
|
||||||
case type
|
case @type
|
||||||
when "token"
|
when ActionType::Token
|
||||||
raise Exception.new "Can't have a token without a match" if match.empty?
|
raise Exception.new "Can't have a token without a match" if match.empty?
|
||||||
[Token.new(type: @token_type, value: String.new(match[match_group].value))]
|
[Token.new(type: @token_type, value: String.new(match[match_group].value))]
|
||||||
when "push"
|
when ActionType::Push
|
||||||
to_push = @states_to_push.empty? ? [lexer.state_stack.last] : @states_to_push
|
to_push = @states_to_push.empty? ? [lexer.state_stack.last] : @states_to_push
|
||||||
to_push.each do |state|
|
to_push.each do |state|
|
||||||
if state == "#pop" && lexer.state_stack.size > 1
|
if state == "#pop" && lexer.state_stack.size > 1
|
||||||
@ -73,11 +83,11 @@ module Tartrazine
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
[] of Token
|
[] of Token
|
||||||
when "pop"
|
when ActionType::Pop
|
||||||
to_pop = [@depth, lexer.state_stack.size - 1].min
|
to_pop = [@depth, lexer.state_stack.size - 1].min
|
||||||
lexer.state_stack.pop(to_pop)
|
lexer.state_stack.pop(to_pop)
|
||||||
[] of Token
|
[] of Token
|
||||||
when "bygroups"
|
when ActionType::Bygroups
|
||||||
# FIXME: handle
|
# FIXME: handle
|
||||||
# ><bygroups>
|
# ><bygroups>
|
||||||
# <token type="Punctuation"/>
|
# <token type="Punctuation"/>
|
||||||
@ -102,16 +112,16 @@ module Tartrazine
|
|||||||
result += e.emit(match, lexer, i + 1)
|
result += e.emit(match, lexer, i + 1)
|
||||||
end
|
end
|
||||||
result
|
result
|
||||||
when "using"
|
when ActionType::Using
|
||||||
# Shunt to another lexer entirely
|
# Shunt to another lexer entirely
|
||||||
return [] of Token if match.empty?
|
return [] of Token if match.empty?
|
||||||
Tartrazine.lexer(@lexer_name).tokenize(String.new(match[match_group].value), usingself: true)
|
Tartrazine.lexer(@lexer_name).tokenize(String.new(match[match_group].value), secondary: true)
|
||||||
when "usingself"
|
when ActionType::Usingself
|
||||||
# Shunt to another copy of this lexer
|
# Shunt to another copy of this lexer
|
||||||
return [] of Token if match.empty?
|
return [] of Token if match.empty?
|
||||||
new_lexer = lexer.copy
|
new_lexer = lexer.copy
|
||||||
new_lexer.tokenize(String.new(match[match_group].value), usingself: true)
|
new_lexer.tokenize(String.new(match[match_group].value), secondary: true)
|
||||||
when "combined"
|
when ActionType::Combined
|
||||||
# Combine two states into one anonymous state
|
# Combine two states into one anonymous state
|
||||||
new_state = @states.map { |name|
|
new_state = @states.map { |name|
|
||||||
lexer.states[name]
|
lexer.states[name]
|
||||||
@ -122,7 +132,7 @@ module Tartrazine
|
|||||||
lexer.state_stack << new_state.name
|
lexer.state_stack << new_state.name
|
||||||
[] of Token
|
[] of Token
|
||||||
else
|
else
|
||||||
raise Exception.new("Unknown action type: #{type}")
|
raise Exception.new("Unknown action type: #{@type}")
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -68,18 +68,18 @@ module Tartrazine
|
|||||||
new_lexer
|
new_lexer
|
||||||
end
|
end
|
||||||
|
|
||||||
# Turn the text into a list of tokens. The `usingself` parameter
|
# Turn the text into a list of tokens. The `secondary` parameter
|
||||||
# is true when the lexer is being used to tokenize a string
|
# is true when the lexer is being used to tokenize a string
|
||||||
# from a larger text that is already being tokenized.
|
# from a larger text that is already being tokenized.
|
||||||
# So, when it's true, we don't modify the text.
|
# So, when it's true, we don't modify the text.
|
||||||
def tokenize(text : String, usingself = false) : Array(Token)
|
def tokenize(text : String, secondary = false) : Array(Token)
|
||||||
@state_stack = ["root"]
|
@state_stack = ["root"]
|
||||||
tokens = [] of Token
|
tokens = [] of Token
|
||||||
pos = 0
|
pos = 0
|
||||||
matched = false
|
matched = false
|
||||||
|
|
||||||
# Respect the `ensure_nl` config option
|
# Respect the `ensure_nl` config option
|
||||||
if text.size > 0 && text[-1] != '\n' && config[:ensure_nl] && !usingself
|
if text.size > 0 && text[-1] != '\n' && config[:ensure_nl] && !secondary
|
||||||
text += "\n"
|
text += "\n"
|
||||||
end
|
end
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user