mirror of
https://github.com/ralsina/tartrazine.git
synced 2024-11-10 05:22:23 +00:00
refactor
This commit is contained in:
parent
696227a935
commit
935e72c18e
@ -74,7 +74,7 @@ end
|
|||||||
|
|
||||||
# Helper that creates lexer and tokenizes
|
# Helper that creates lexer and tokenizes
|
||||||
def tokenize(lexer_name, text)
|
def tokenize(lexer_name, text)
|
||||||
lexer = Tartrazine::Lexer.from_xml(File.read("lexers/#{lexer_name}.xml"))
|
lexer = Tartrazine.get_lexer(lexer_name)
|
||||||
collapse_tokens(lexer.tokenize(text))
|
collapse_tokens(lexer.tokenize(text))
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -66,8 +66,8 @@ module Tartrazine
|
|||||||
#
|
#
|
||||||
raise Exception.new "Can't have a token without a match" if match.nil?
|
raise Exception.new "Can't have a token without a match" if match.nil?
|
||||||
|
|
||||||
# Each group matches an action
|
# Each group matches an action. If the group match is empty,
|
||||||
|
# the action is skipped.
|
||||||
result = [] of Token
|
result = [] of Token
|
||||||
@actions.each_with_index do |e, i|
|
@actions.each_with_index do |e, i|
|
||||||
next if match[i + 1]?.nil?
|
next if match[i + 1]?.nil?
|
||||||
@ -79,7 +79,7 @@ module Tartrazine
|
|||||||
return [] of Token if match.nil?
|
return [] of Token if match.nil?
|
||||||
lexer_name = xml["lexer"].downcase
|
lexer_name = xml["lexer"].downcase
|
||||||
# pp! "to tokenize:", match[match_group]
|
# pp! "to tokenize:", match[match_group]
|
||||||
LEXERS[lexer_name].tokenize(match[match_group], usingself: true)
|
Tartrazine.get_lexer(lexer_name).tokenize(match[match_group], usingself: true)
|
||||||
when "usingself"
|
when "usingself"
|
||||||
# Shunt to another copy of this lexer
|
# Shunt to another copy of this lexer
|
||||||
return [] of Token if match.nil?
|
return [] of Token if match.nil?
|
||||||
|
@ -13,6 +13,10 @@ module Tartrazine
|
|||||||
# For explanations on what actions, transformers, etc do
|
# For explanations on what actions, transformers, etc do
|
||||||
# the Pygments documentation is a good place to start.
|
# the Pygments documentation is a good place to start.
|
||||||
# https://pygments.org/docs/lexerdevelopment/
|
# https://pygments.org/docs/lexerdevelopment/
|
||||||
|
|
||||||
|
# A Lexer state. A state has a name and a list of rules.
|
||||||
|
# The state machine has a state stack containing references
|
||||||
|
# to states to decide which rules to apply.
|
||||||
class State
|
class State
|
||||||
property name : String = ""
|
property name : String = ""
|
||||||
property rules = [] of Rule
|
property rules = [] of Rule
|
||||||
@ -25,10 +29,9 @@ module Tartrazine
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# A token, the output of the tokenizer
|
||||||
alias Token = NamedTuple(type: String, value: String)
|
alias Token = NamedTuple(type: String, value: String)
|
||||||
|
|
||||||
LEXERS = {} of String => Tartrazine::Lexer
|
|
||||||
|
|
||||||
class Lexer
|
class Lexer
|
||||||
property config = {
|
property config = {
|
||||||
name: "",
|
name: "",
|
||||||
@ -135,22 +138,9 @@ module Tartrazine
|
|||||||
l
|
l
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
|
||||||
|
|
||||||
# Try loading all lexers
|
def self.get_lexer(name : String) : Lexer
|
||||||
|
Lexer.from_xml(File.read("lexers/#{name}.xml"))
|
||||||
lexers = Tartrazine::LEXERS
|
|
||||||
|
|
||||||
Dir.glob("lexers/*.xml").each do |fname|
|
|
||||||
begin
|
|
||||||
l = Tartrazine::Lexer.from_xml(File.read(fname))
|
|
||||||
rescue ex : Exception
|
|
||||||
# p! ex
|
|
||||||
next
|
|
||||||
end
|
|
||||||
lexers[l.config[:name].downcase] = l
|
|
||||||
l.config[:aliases].each do |key|
|
|
||||||
lexers[key.downcase] = l
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
@ -166,29 +156,3 @@ end
|
|||||||
macro xml_to_a(node, name)
|
macro xml_to_a(node, name)
|
||||||
{{node}}.children.select{|n| n.name == "{{name}}".lstrip("_")}.map {|n| n.content.to_s}
|
{{node}}.children.select{|n| n.name == "{{name}}".lstrip("_")}.map {|n| n.content.to_s}
|
||||||
end
|
end
|
||||||
|
|
||||||
# # #<Regex::Error:Regex match error: match limit exceeded>
|
|
||||||
# next if testname == "tests/fortran/test_string_cataback.txt"
|
|
||||||
|
|
||||||
# # Difference is different unicode representation of a string literal
|
|
||||||
# next if testname == "tests/java/test_string_literals.txt"
|
|
||||||
# next if testname == "tests/systemd/example1.txt"
|
|
||||||
# next if testname == "tests/json/test_strings.txt"
|
|
||||||
|
|
||||||
# # Tartrazine agrees with pygments, disagrees with chroma
|
|
||||||
# next if testname == "tests/java/test_default.txt"
|
|
||||||
# next if testname == "tests/java/test_numeric_literals.txt"
|
|
||||||
# next if testname == "tests/java/test_multiline_string.txt"
|
|
||||||
|
|
||||||
# # Tartrazine disagrees with pygments and chroma, but it's fine
|
|
||||||
# next if testname == "tests/php/test_string_escaping_run.txt"
|
|
||||||
|
|
||||||
# # Chroma's output is bad, but so is Tartrazine's
|
|
||||||
# next if "tests/html/javascript_unclosed.txt" == testname
|
|
||||||
|
|
||||||
# # KNOWN BAD -- TO FIX
|
|
||||||
# next if "tests/html/css_backtracking.txt" == testname
|
|
||||||
# next if "tests/php/anonymous_class.txt" == testname
|
|
||||||
# next if "tests/c/test_string_resembling_decl_end.txt" == testname
|
|
||||||
# next if "tests/mcfunction/data.txt" == testname
|
|
||||||
# next if "tests/mcfunction/selectors.txt" == testname
|
|
||||||
|
Loading…
Reference in New Issue
Block a user