mirror of
https://github.com/ralsina/tartrazine.git
synced 2025-07-01 04:17:08 -03:00
refactor
This commit is contained in:
@ -2,13 +2,14 @@ require "base58"
|
||||
require "json"
|
||||
require "xml"
|
||||
require "./rules"
|
||||
require "./actions"
|
||||
module Tartrazine
|
||||
VERSION = "0.1.0"
|
||||
|
||||
# This implements a lexer for Pygments RegexLexers as expressed
|
||||
# in Chroma's XML serialization.
|
||||
#
|
||||
# For explanations on what emitters, transformers, etc do
|
||||
# For explanations on what actions, transformers, etc do
|
||||
# the Pygments documentation is a good place to start.
|
||||
# https://pygments.org/docs/lexerdevelopment/
|
||||
class State
|
||||
@ -24,103 +25,6 @@ module Tartrazine
|
||||
end
|
||||
|
||||
|
||||
class Emitter
|
||||
property type : String
|
||||
property xml : XML::Node
|
||||
property emitters : Array(Emitter) = [] of Emitter
|
||||
|
||||
def initialize(@type : String, @xml : XML::Node?)
|
||||
# Some emitters may have emitters in them, like this:
|
||||
# <bygroups>
|
||||
# <token type="GenericPrompt"/>
|
||||
# <token type="Text"/>
|
||||
# <using lexer="bash"/>
|
||||
# </bygroups>
|
||||
#
|
||||
# The token emitters match with the first 2 groups in the regex
|
||||
# the using emitter matches the 3rd and shunts it to another lexer
|
||||
@xml.children.each do |node|
|
||||
next unless node.element?
|
||||
@emitters << Emitter.new(node.name, node)
|
||||
end
|
||||
end
|
||||
|
||||
def emit(match : Regex::MatchData?, lexer : Lexer, match_group = 0) : Array(Token)
|
||||
case type
|
||||
when "token"
|
||||
raise Exception.new "Can't have a token without a match" if match.nil?
|
||||
[Token.new(type: xml["type"], value: match[match_group])]
|
||||
when "push"
|
||||
states_to_push = xml.attributes.select { |a| a.name == "state" }.map &.content
|
||||
if states_to_push.empty?
|
||||
# Push without a state means push the current state
|
||||
states_to_push = [lexer.state_stack.last]
|
||||
end
|
||||
states_to_push.each do |state|
|
||||
if state == "#pop"
|
||||
# Pop the state
|
||||
# puts "Popping state"
|
||||
lexer.state_stack.pop
|
||||
else
|
||||
# Really push
|
||||
lexer.state_stack << state
|
||||
# puts "Pushed #{lexer.state_stack}"
|
||||
end
|
||||
end
|
||||
[] of Token
|
||||
when "pop"
|
||||
depth = xml["depth"].to_i
|
||||
# puts "Popping #{depth} states"
|
||||
if lexer.state_stack.size <= depth
|
||||
# puts "Can't pop #{depth} states, only have #{lexer.state_stack.size}"
|
||||
else
|
||||
lexer.state_stack.pop(depth)
|
||||
end
|
||||
[] of Token
|
||||
when "bygroups"
|
||||
# FIXME: handle
|
||||
# ><bygroups>
|
||||
# <token type="Punctuation"/>
|
||||
# None
|
||||
# <token type="LiteralStringRegex"/>
|
||||
#
|
||||
# where that None means skipping a group
|
||||
#
|
||||
raise Exception.new "Can't have a token without a match" if match.nil?
|
||||
|
||||
# Each group matches an emitter
|
||||
|
||||
result = [] of Token
|
||||
@emitters.each_with_index do |e, i|
|
||||
next if match[i + 1]?.nil?
|
||||
result += e.emit(match, lexer, i + 1)
|
||||
end
|
||||
result
|
||||
when "using"
|
||||
# Shunt to another lexer entirely
|
||||
return [] of Token if match.nil?
|
||||
lexer_name = xml["lexer"].downcase
|
||||
# pp! "to tokenize:", match[match_group]
|
||||
LEXERS[lexer_name].tokenize(match[match_group], usingself: true)
|
||||
when "usingself"
|
||||
# Shunt to another copy of this lexer
|
||||
return [] of Token if match.nil?
|
||||
|
||||
new_lexer = Lexer.from_xml(lexer.xml)
|
||||
# pp! "to tokenize:", match[match_group]
|
||||
new_lexer.tokenize(match[match_group], usingself: true)
|
||||
when "combined"
|
||||
# Combine two states into one anonymous state
|
||||
states = xml.attributes.select { |a| a.name == "state" }.map &.content
|
||||
new_state = states.map { |name| lexer.states[name] }.reduce { |s1, s2| s1 + s2 }
|
||||
lexer.states[new_state.name] = new_state
|
||||
lexer.state_stack << new_state.name
|
||||
[] of Token
|
||||
else
|
||||
raise Exception.new("Unknown emitter type: #{type}: #{xml}")
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
alias Token = NamedTuple(type: String, value: String)
|
||||
|
||||
@ -215,7 +119,7 @@ module Tartrazine
|
||||
if rule_node.first_element_child.try &.name == "include"
|
||||
rule = IncludeStateRule.new(rule_node)
|
||||
else
|
||||
rule = Always.new(rule_node)
|
||||
rule = UnconditionalRule.new(rule_node)
|
||||
end
|
||||
else
|
||||
flags = Regex::Options::ANCHORED
|
||||
|
Reference in New Issue
Block a user