Idiomatic changes

This commit is contained in:
Roberto Alsina 2024-08-15 22:41:24 -03:00
parent 20d6b65346
commit e817aedd60
2 changed files with 34 additions and 24 deletions

View File

@ -8,19 +8,29 @@ require "./tartrazine"
# perform a list of actions. These actions can emit tokens # perform a list of actions. These actions can emit tokens
# or change the state machine. # or change the state machine.
module Tartrazine module Tartrazine
enum ActionType
Token
Push
Pop
Combined
Bygroups
Include
Using
Usingself
end
struct Action struct Action
property actions : Array(Action) = [] of Action property actions : Array(Action) = [] of Action
property type : String
@depth : Int32 = 0 @depth : Int32 = 0
@lexer_name : String = "" @lexer_name : String = ""
@states : Array(String) = [] of String @states : Array(String) = [] of String
@states_to_push : Array(String) = [] of String @states_to_push : Array(String) = [] of String
@token_type : String = "" @token_type : String = ""
@type : ActionType = ActionType::Token
def initialize(@type : String, xml : XML::Node?) def initialize(t : String, xml : XML::Node?)
known_types = %w(token push pop combined bygroups include using usingself) @type = ActionType.parse(t.capitalize)
raise Exception.new("Unknown action type: #{type}") unless known_types.includes? type
# Some actions may have actions in them, like this: # Some actions may have actions in them, like this:
# <bygroups> # <bygroups>
@ -37,18 +47,18 @@ module Tartrazine
end end
# Prefetch the attributes we ned from the XML and keep them # Prefetch the attributes we ned from the XML and keep them
case type case @type
when "token" when ActionType::Token
@token_type = xml["type"] @token_type = xml["type"]
when "push" when ActionType::Push
@states_to_push = xml.attributes.select { |attrib| @states_to_push = xml.attributes.select { |attrib|
attrib.name == "state" attrib.name == "state"
}.map &.content }.map &.content
when "pop" when ActionType::Pop
@depth = xml["depth"].to_i @depth = xml["depth"].to_i
when "using" when ActionType::Using
@lexer_name = xml["lexer"].downcase @lexer_name = xml["lexer"].downcase
when "combined" when ActionType::Combined
@states = xml.attributes.select { |attrib| @states = xml.attributes.select { |attrib|
attrib.name == "state" attrib.name == "state"
}.map &.content }.map &.content
@ -57,11 +67,11 @@ module Tartrazine
# ameba:disable Metrics/CyclomaticComplexity # ameba:disable Metrics/CyclomaticComplexity
def emit(match : MatchData, lexer : Lexer, match_group = 0) : Array(Token) def emit(match : MatchData, lexer : Lexer, match_group = 0) : Array(Token)
case type case @type
when "token" when ActionType::Token
raise Exception.new "Can't have a token without a match" if match.empty? raise Exception.new "Can't have a token without a match" if match.empty?
[Token.new(type: @token_type, value: String.new(match[match_group].value))] [Token.new(type: @token_type, value: String.new(match[match_group].value))]
when "push" when ActionType::Push
to_push = @states_to_push.empty? ? [lexer.state_stack.last] : @states_to_push to_push = @states_to_push.empty? ? [lexer.state_stack.last] : @states_to_push
to_push.each do |state| to_push.each do |state|
if state == "#pop" && lexer.state_stack.size > 1 if state == "#pop" && lexer.state_stack.size > 1
@ -73,11 +83,11 @@ module Tartrazine
end end
end end
[] of Token [] of Token
when "pop" when ActionType::Pop
to_pop = [@depth, lexer.state_stack.size - 1].min to_pop = [@depth, lexer.state_stack.size - 1].min
lexer.state_stack.pop(to_pop) lexer.state_stack.pop(to_pop)
[] of Token [] of Token
when "bygroups" when ActionType::Bygroups
# FIXME: handle # FIXME: handle
# ><bygroups> # ><bygroups>
# <token type="Punctuation"/> # <token type="Punctuation"/>
@ -102,16 +112,16 @@ module Tartrazine
result += e.emit(match, lexer, i + 1) result += e.emit(match, lexer, i + 1)
end end
result result
when "using" when ActionType::Using
# Shunt to another lexer entirely # Shunt to another lexer entirely
return [] of Token if match.empty? return [] of Token if match.empty?
Tartrazine.lexer(@lexer_name).tokenize(String.new(match[match_group].value), usingself: true) Tartrazine.lexer(@lexer_name).tokenize(String.new(match[match_group].value), secondary: true)
when "usingself" when ActionType::Usingself
# Shunt to another copy of this lexer # Shunt to another copy of this lexer
return [] of Token if match.empty? return [] of Token if match.empty?
new_lexer = lexer.copy new_lexer = lexer.copy
new_lexer.tokenize(String.new(match[match_group].value), usingself: true) new_lexer.tokenize(String.new(match[match_group].value), secondary: true)
when "combined" when ActionType::Combined
# Combine two states into one anonymous state # Combine two states into one anonymous state
new_state = @states.map { |name| new_state = @states.map { |name|
lexer.states[name] lexer.states[name]
@ -122,7 +132,7 @@ module Tartrazine
lexer.state_stack << new_state.name lexer.state_stack << new_state.name
[] of Token [] of Token
else else
raise Exception.new("Unknown action type: #{type}") raise Exception.new("Unknown action type: #{@type}")
end end
end end
end end

View File

@ -68,18 +68,18 @@ module Tartrazine
new_lexer new_lexer
end end
# Turn the text into a list of tokens. The `usingself` parameter # Turn the text into a list of tokens. The `secondary` parameter
# is true when the lexer is being used to tokenize a string # is true when the lexer is being used to tokenize a string
# from a larger text that is already being tokenized. # from a larger text that is already being tokenized.
# So, when it's true, we don't modify the text. # So, when it's true, we don't modify the text.
def tokenize(text : String, usingself = false) : Array(Token) def tokenize(text : String, secondary = false) : Array(Token)
@state_stack = ["root"] @state_stack = ["root"]
tokens = [] of Token tokens = [] of Token
pos = 0 pos = 0
matched = false matched = false
# Respect the `ensure_nl` config option # Respect the `ensure_nl` config option
if text.size > 0 && text[-1] != '\n' && config[:ensure_nl] && !usingself if text.size > 0 && text[-1] != '\n' && config[:ensure_nl] && !secondary
text += "\n" text += "\n"
end end