mirror of
https://github.com/ralsina/tartrazine.git
synced 2024-11-12 22:42:23 +00:00
refactor
This commit is contained in:
parent
696227a935
commit
935e72c18e
@ -74,7 +74,7 @@ end
|
||||
|
||||
# Helper that creates lexer and tokenizes
|
||||
def tokenize(lexer_name, text)
|
||||
lexer = Tartrazine::Lexer.from_xml(File.read("lexers/#{lexer_name}.xml"))
|
||||
lexer = Tartrazine.get_lexer(lexer_name)
|
||||
collapse_tokens(lexer.tokenize(text))
|
||||
end
|
||||
|
||||
|
@ -66,8 +66,8 @@ module Tartrazine
|
||||
#
|
||||
raise Exception.new "Can't have a token without a match" if match.nil?
|
||||
|
||||
# Each group matches an action
|
||||
|
||||
# Each group matches an action. If the group match is empty,
|
||||
# the action is skipped.
|
||||
result = [] of Token
|
||||
@actions.each_with_index do |e, i|
|
||||
next if match[i + 1]?.nil?
|
||||
@ -79,7 +79,7 @@ module Tartrazine
|
||||
return [] of Token if match.nil?
|
||||
lexer_name = xml["lexer"].downcase
|
||||
# pp! "to tokenize:", match[match_group]
|
||||
LEXERS[lexer_name].tokenize(match[match_group], usingself: true)
|
||||
Tartrazine.get_lexer(lexer_name).tokenize(match[match_group], usingself: true)
|
||||
when "usingself"
|
||||
# Shunt to another copy of this lexer
|
||||
return [] of Token if match.nil?
|
||||
|
@ -13,6 +13,10 @@ module Tartrazine
|
||||
# For explanations on what actions, transformers, etc do
|
||||
# the Pygments documentation is a good place to start.
|
||||
# https://pygments.org/docs/lexerdevelopment/
|
||||
|
||||
# A Lexer state. A state has a name and a list of rules.
|
||||
# The state machine has a state stack containing references
|
||||
# to states to decide which rules to apply.
|
||||
class State
|
||||
property name : String = ""
|
||||
property rules = [] of Rule
|
||||
@ -25,10 +29,9 @@ module Tartrazine
|
||||
end
|
||||
end
|
||||
|
||||
# A token, the output of the tokenizer
|
||||
alias Token = NamedTuple(type: String, value: String)
|
||||
|
||||
LEXERS = {} of String => Tartrazine::Lexer
|
||||
|
||||
class Lexer
|
||||
property config = {
|
||||
name: "",
|
||||
@ -135,22 +138,9 @@ module Tartrazine
|
||||
l
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# Try loading all lexers
|
||||
|
||||
lexers = Tartrazine::LEXERS
|
||||
|
||||
Dir.glob("lexers/*.xml").each do |fname|
|
||||
begin
|
||||
l = Tartrazine::Lexer.from_xml(File.read(fname))
|
||||
rescue ex : Exception
|
||||
# p! ex
|
||||
next
|
||||
end
|
||||
lexers[l.config[:name].downcase] = l
|
||||
l.config[:aliases].each do |key|
|
||||
lexers[key.downcase] = l
|
||||
def self.get_lexer(name : String) : Lexer
|
||||
Lexer.from_xml(File.read("lexers/#{name}.xml"))
|
||||
end
|
||||
end
|
||||
|
||||
@ -166,29 +156,3 @@ end
|
||||
macro xml_to_a(node, name)
|
||||
{{node}}.children.select{|n| n.name == "{{name}}".lstrip("_")}.map {|n| n.content.to_s}
|
||||
end
|
||||
|
||||
# # #<Regex::Error:Regex match error: match limit exceeded>
|
||||
# next if testname == "tests/fortran/test_string_cataback.txt"
|
||||
|
||||
# # Difference is different unicode representation of a string literal
|
||||
# next if testname == "tests/java/test_string_literals.txt"
|
||||
# next if testname == "tests/systemd/example1.txt"
|
||||
# next if testname == "tests/json/test_strings.txt"
|
||||
|
||||
# # Tartrazine agrees with pygments, disagrees with chroma
|
||||
# next if testname == "tests/java/test_default.txt"
|
||||
# next if testname == "tests/java/test_numeric_literals.txt"
|
||||
# next if testname == "tests/java/test_multiline_string.txt"
|
||||
|
||||
# # Tartrazine disagrees with pygments and chroma, but it's fine
|
||||
# next if testname == "tests/php/test_string_escaping_run.txt"
|
||||
|
||||
# # Chroma's output is bad, but so is Tartrazine's
|
||||
# next if "tests/html/javascript_unclosed.txt" == testname
|
||||
|
||||
# # KNOWN BAD -- TO FIX
|
||||
# next if "tests/html/css_backtracking.txt" == testname
|
||||
# next if "tests/php/anonymous_class.txt" == testname
|
||||
# next if "tests/c/test_string_resembling_decl_end.txt" == testname
|
||||
# next if "tests/mcfunction/data.txt" == testname
|
||||
# next if "tests/mcfunction/selectors.txt" == testname
|
||||
|
Loading…
Reference in New Issue
Block a user