diff --git a/spec/tartrazine_spec.cr b/spec/tartrazine_spec.cr index 42309a7..7b564c2 100644 --- a/spec/tartrazine_spec.cr +++ b/spec/tartrazine_spec.cr @@ -74,7 +74,7 @@ end # Helper that creates lexer and tokenizes def tokenize(lexer_name, text) - lexer = Tartrazine::Lexer.from_xml(File.read("lexers/#{lexer_name}.xml")) + lexer = Tartrazine.get_lexer(lexer_name) collapse_tokens(lexer.tokenize(text)) end diff --git a/src/actions.cr b/src/actions.cr index a77f4f9..72eb9f7 100644 --- a/src/actions.cr +++ b/src/actions.cr @@ -66,8 +66,8 @@ module Tartrazine # raise Exception.new "Can't have a token without a match" if match.nil? - # Each group matches an action - + # Each group matches an action. If the group match is empty, + # the action is skipped. result = [] of Token @actions.each_with_index do |e, i| next if match[i + 1]?.nil? @@ -79,7 +79,7 @@ module Tartrazine return [] of Token if match.nil? lexer_name = xml["lexer"].downcase # pp! "to tokenize:", match[match_group] - LEXERS[lexer_name].tokenize(match[match_group], usingself: true) + Tartrazine.get_lexer(lexer_name).tokenize(match[match_group], usingself: true) when "usingself" # Shunt to another copy of this lexer return [] of Token if match.nil? diff --git a/src/tartrazine.cr b/src/tartrazine.cr index 49a6b41..6434b86 100644 --- a/src/tartrazine.cr +++ b/src/tartrazine.cr @@ -13,6 +13,10 @@ module Tartrazine # For explanations on what actions, transformers, etc do # the Pygments documentation is a good place to start. # https://pygments.org/docs/lexerdevelopment/ + + # A Lexer state. A state has a name and a list of rules. + # The state machine has a state stack containing references + # to states to decide which rules to apply. class State property name : String = "" property rules = [] of Rule @@ -25,10 +29,9 @@ module Tartrazine end end + # A token, the output of the tokenizer alias Token = NamedTuple(type: String, value: String) - LEXERS = {} of String => Tartrazine::Lexer - class Lexer property config = { name: "", @@ -135,22 +138,9 @@ module Tartrazine l end end -end -# Try loading all lexers - -lexers = Tartrazine::LEXERS - -Dir.glob("lexers/*.xml").each do |fname| - begin - l = Tartrazine::Lexer.from_xml(File.read(fname)) - rescue ex : Exception - # p! ex - next - end - lexers[l.config[:name].downcase] = l - l.config[:aliases].each do |key| - lexers[key.downcase] = l + def self.get_lexer(name : String) : Lexer + Lexer.from_xml(File.read("lexers/#{name}.xml")) end end @@ -166,29 +156,3 @@ end macro xml_to_a(node, name) {{node}}.children.select{|n| n.name == "{{name}}".lstrip("_")}.map {|n| n.content.to_s} end - -# # # -# next if testname == "tests/fortran/test_string_cataback.txt" - -# # Difference is different unicode representation of a string literal -# next if testname == "tests/java/test_string_literals.txt" -# next if testname == "tests/systemd/example1.txt" -# next if testname == "tests/json/test_strings.txt" - -# # Tartrazine agrees with pygments, disagrees with chroma -# next if testname == "tests/java/test_default.txt" -# next if testname == "tests/java/test_numeric_literals.txt" -# next if testname == "tests/java/test_multiline_string.txt" - -# # Tartrazine disagrees with pygments and chroma, but it's fine -# next if testname == "tests/php/test_string_escaping_run.txt" - -# # Chroma's output is bad, but so is Tartrazine's -# next if "tests/html/javascript_unclosed.txt" == testname - -# # KNOWN BAD -- TO FIX -# next if "tests/html/css_backtracking.txt" == testname -# next if "tests/php/anonymous_class.txt" == testname -# next if "tests/c/test_string_resembling_decl_end.txt" == testname -# next if "tests/mcfunction/data.txt" == testname -# next if "tests/mcfunction/selectors.txt" == testname