tartrazine/spec/examples/crystal/lexer.cr

require "./constants/lexers"
require "./heuristics"
require "baked_file_system"
require "crystal/syntax_highlighter"

module Tartrazine
  class LexerFiles
    extend BakedFileSystem
    bake_folder "../lexers", __DIR__
  end

  # Get the lexer object for a language name
  # FIXME: support mimetypes
  def self.lexer(name : String? = nil, filename : String? = nil, mimetype : String? = nil) : BaseLexer
    return lexer_by_name(name) if name && name != "autodetect"
    return lexer_by_filename(filename) if filename
    return lexer_by_mimetype(mimetype) if mimetype

    RegexLexer.from_xml(LexerFiles.get("/#{LEXERS_BY_NAME["plaintext"]}.xml").gets_to_end)
  end

  private def self.lexer_by_mimetype(mimetype : String) : BaseLexer
    lexer_file_name = LEXERS_BY_MIMETYPE.fetch(mimetype, nil)
    raise Exception.new("Unknown mimetype: #{mimetype}") if lexer_file_name.nil?

    RegexLexer.from_xml(LexerFiles.get("/#{lexer_file_name}.xml").gets_to_end)
  end

  private def self.lexer_by_name(name : String) : BaseLexer
    return CrystalLexer.new if name == "crystal"
    lexer_file_name = LEXERS_BY_NAME.fetch(name.downcase, nil)
    return create_delegating_lexer(name) if lexer_file_name.nil? && name.includes? "+"
    raise Exception.new("Unknown lexer: #{name}") if lexer_file_name.nil?

    RegexLexer.from_xml(LexerFiles.get("/#{lexer_file_name}.xml").gets_to_end)
  end

  private def self.lexer_by_filename(filename : String) : BaseLexer
    if filename.ends_with?(".cr")
      return CrystalLexer.new
    end

    candidates = Set(String).new
    LEXERS_BY_FILENAME.each do |k, v|
      candidates += v.to_set if File.match?(k, File.basename(filename))
    end

    case candidates.size
    when 0
      lexer_file_name = LEXERS_BY_NAME["plaintext"]
    when 1
      lexer_file_name = candidates.first
    else
      lexer_file_name = self.lexer_by_content(filename)
      begin
        return self.lexer(lexer_file_name)
      rescue ex : Exception
        raise Exception.new("Multiple lexers match the filename: #{candidates.to_a.join(", ")}, heuristics suggest #{lexer_file_name} but there is no matching lexer.")
      end
    end

    RegexLexer.from_xml(LexerFiles.get("/#{lexer_file_name}.xml").gets_to_end)
  end

  private def self.lexer_by_content(fname : String) : String?
    h = Linguist::Heuristic.from_yaml(LexerFiles.get("/heuristics.yml").gets_to_end)
    result = h.run(fname, File.read(fname))
    case result
    when Nil
      raise Exception.new "No lexer found for #{fname}"
    when String
      result.as(String)
    when Array(String)
      result.first
    end
  end

  private def self.create_delegating_lexer(name : String) : BaseLexer
    language, root = name.split("+", 2)
    language_lexer = lexer(language)
    root_lexer = lexer(root)
    DelegatingLexer.new(language_lexer, root_lexer)
  end

  # Return a list of all lexers
  def self.lexers : Array(String)
    LEXERS_BY_NAME.keys.sort!
  end

  # A token, the output of the tokenizer
  alias Token = NamedTuple(type: String, value: String)

  abstract class BaseTokenizer
  end

  class Tokenizer < BaseTokenizer
    include Iterator(Token)
    property lexer : BaseLexer
    property text : Bytes
    property pos : Int32 = 0
    @dq = Deque(Token).new
    property state_stack = ["root"]

    def initialize(@lexer : BaseLexer, text : String, secondary = false)
      # Respect the `ensure_nl` config option
      if text.size > 0 && text[-1] != '\n' && @lexer.config[:ensure_nl] && !secondary
        text += "\n"
      end
      @text = text.to_slice
    end

    def next : Iterator::Stop | Token
      if @dq.size > 0
        return @dq.shift
      end
      if pos == @text.size
        return stop
      end

      matched = false
      while @pos < @text.size
        @lexer.states[@state_stack.last].rules.each do |rule|
          matched, new_pos, new_tokens = rule.match(@text, @pos, self)
          if matched
            @pos = new_pos
            split_tokens(new_tokens).each { |token| @dq << token }
            break
          end
        end
        if !matched
          if @text[@pos] == 10u8
            @dq << {type: "Text", value: "\n"}
            @state_stack = ["root"]
          else
            @dq << {type: "Error", value: String.new(@text[@pos..@pos])}
          end
          @pos += 1
          break
        end
      end
      self.next
    end

    # If a token contains a newline, split it into two tokens
    def split_tokens(tokens : Array(Token)) : Array(Token)
      split_tokens = [] of Token
      tokens.each do |token|
        if token[:value].includes?("\n")
          values = token[:value].split("\n")
          values.each_with_index do |value, index|
            value += "\n" if index < values.size - 1
            split_tokens << {type: token[:type], value: value}
          end
        else
          split_tokens << token
        end
      end
      split_tokens
    end
  end

  alias BaseLexer = Lexer

  abstract class Lexer
    property config = {
      name:             "",
      priority:         0.0,
      case_insensitive: false,
      dot_all:          false,
      not_multiline:    false,
      ensure_nl:        false,
    }
    property states = {} of String => State

    def tokenizer(text : String, secondary = false) : BaseTokenizer
      Tokenizer.new(self, text, secondary)
    end
  end

  # This implements a lexer for Pygments RegexLexers as expressed
  # in Chroma's XML serialization.
  #
  # For explanations on what actions and states do
  # the Pygments documentation is a good place to start.
  # https://pygments.org/docs/lexerdevelopment/
  class RegexLexer < BaseLexer
    # Collapse consecutive tokens of the same type for easier comparison
    # and smaller output
    def self.collapse_tokens(tokens : Array(Tartrazine::Token)) : Array(Tartrazine::Token)
      result = [] of Tartrazine::Token
      tokens = tokens.reject { |token| token[:value] == "" }
      tokens.each do |token|
        if result.empty?
          result << token
          next
        end
        last = result.last
        if last[:type] == token[:type]
          new_token = {type: last[:type], value: last[:value] + token[:value]}
          result.pop
          result << new_token
        else
          result << token
        end
      end
      result
    end

    def self.from_xml(xml : String) : Lexer
      l = RegexLexer.new
      lexer = XML.parse(xml).first_element_child
      if lexer
        config = lexer.children.find { |node|
          node.name == "config"
        }
        if config
          l.config = {
            name:             xml_to_s(config, name) || "",
            priority:         xml_to_f(config, priority) || 0.0,
            not_multiline:    xml_to_s(config, not_multiline) == "true",
            dot_all:          xml_to_s(config, dot_all) == "true",
            case_insensitive: xml_to_s(config, case_insensitive) == "true",
            ensure_nl:        xml_to_s(config, ensure_nl) == "true",
          }
        end

        rules = lexer.children.find { |node|
          node.name == "rules"
        }
        if rules
          # Rules contains states 🤷
          rules.children.select { |node|
            node.name == "state"
          }.each do |state_node|
            state = State.new
            state.name = state_node["name"]
            if l.states.has_key?(state.name)
              raise Exception.new("Duplicate state: #{state.name}")
            else
              l.states[state.name] = state
            end
            # And states contain rules 🤷
            state_node.children.select { |node|
              node.name == "rule"
            }.each do |rule_node|
              case rule_node["pattern"]?
              when nil
                if rule_node.first_element_child.try &.name == "include"
                  rule = IncludeStateRule.new(rule_node)
                else
                  rule = UnconditionalRule.new(rule_node)
                end
              else
                rule = Rule.new(rule_node,
                  multiline: !l.config[:not_multiline],
                  dotall: l.config[:dot_all],
                  ignorecase: l.config[:case_insensitive])
              end
              state.rules << rule
            end
          end
        end
      end
      l
    end
  end

  # A lexer that takes two lexers as arguments. A root lexer
  # and a language lexer. Everything is scalled using the
  # language lexer, afterwards all `Other` tokens are lexed
  # using the root lexer.
  #
  # This is useful for things like template languages, where
  # you have Jinja + HTML or Jinja + CSS and so on.
  class DelegatingLexer < Lexer
    property language_lexer : BaseLexer
    property root_lexer : BaseLexer

    def initialize(@language_lexer : BaseLexer, @root_lexer : BaseLexer)
    end

    def tokenizer(text : String, secondary = false) : DelegatingTokenizer
      DelegatingTokenizer.new(self, text, secondary)
    end
  end

  # This Tokenizer works with a DelegatingLexer. It first tokenizes
  # using the language lexer, and "Other" tokens are tokenized using
  # the root lexer.
  class DelegatingTokenizer < BaseTokenizer
    include Iterator(Token)
    @dq = Deque(Token).new
    @language_tokenizer : BaseTokenizer

    def initialize(@lexer : DelegatingLexer, text : String, secondary = false)
      # Respect the `ensure_nl` config option
      if text.size > 0 && text[-1] != '\n' && @lexer.config[:ensure_nl] && !secondary
        text += "\n"
      end
      @language_tokenizer = @lexer.language_lexer.tokenizer(text, true)
    end

    def next : Iterator::Stop | Token
      if @dq.size > 0
        return @dq.shift
      end
      token = @language_tokenizer.next
      if token.is_a? Iterator::Stop
        return stop
      elsif token.as(Token).[:type] == "Other"
        root_tokenizer = @lexer.root_lexer.tokenizer(token.as(Token).[:value], true)
        root_tokenizer.each do |root_token|
          @dq << root_token
        end
      else
        @dq << token.as(Token)
      end
      self.next
    end
  end

  # A Lexer state. A state has a name and a list of rules.
  # The state machine has a state stack containing references
  # to states to decide which rules to apply.
  struct State
    property name : String = ""
    property rules = [] of BaseRule

    def +(other : State)
      new_state = State.new
      new_state.name = Random.base58(8)
      new_state.rules = rules + other.rules
      new_state
    end
  end

  class CustomCrystalHighlighter < Crystal::SyntaxHighlighter
    @tokens = [] of Token

    def render_delimiter(&block)
      @tokens << {type: "LiteralString", value: block.call.to_s}
    end

    def render_interpolation(&block)
      @tokens << {type: "LiteralStringInterpol", value: "\#{"}
      @tokens << {type: "Text", value: block.call.to_s}
      @tokens << {type: "LiteralStringInterpol", value: "}"}
    end

    def render_string_array(&block)
      @tokens << {type: "LiteralString", value: block.call.to_s}
    end

    # ameba:disable Metrics/CyclomaticComplexity
    def render(type : TokenType, value : String)
      case type
      when .comment?
        @tokens << {type: "Comment", value: value}
      when .number?
        @tokens << {type: "LiteralNumber", value: value}
      when .char?
        @tokens << {type: "LiteralStringChar", value: value}
      when .symbol?
        @tokens << {type: "LiteralStringSymbol", value: value}
      when .const?
        @tokens << {type: "NameConstant", value: value}
      when .string?
        @tokens << {type: "LiteralString", value: value}
      when .ident?
        @tokens << {type: "NameVariable", value: value}
      when .keyword?, .self?
        @tokens << {type: "NameKeyword", value: value}
      when .primitive_literal?
        @tokens << {type: "Literal", value: value}
      when .operator?
        @tokens << {type: "Operator", value: value}
      when Crystal::SyntaxHighlighter::TokenType::DELIMITED_TOKEN, Crystal::SyntaxHighlighter::TokenType::DELIMITER_START, Crystal::SyntaxHighlighter::TokenType::DELIMITER_END
        @tokens << {type: "LiteralString", value: value}
      else
        @tokens << {type: "Text", value: value}
      end
    end
  end

  class CrystalTokenizer < Tartrazine::BaseTokenizer
    include Iterator(Token)
    @hl = CustomCrystalHighlighter.new
    @lexer : BaseLexer
    @iter : Iterator(Token)

    # delegate next, to: @iter

    def initialize(@lexer : BaseLexer, text : String, secondary = false)
      # Respect the `ensure_nl` config option
      if text.size > 0 && text[-1] != '\n' && @lexer.config[:ensure_nl] && !secondary
        text += "\n"
      end
      # Just do the tokenizing
      @hl.highlight(text)
      @iter = @hl.@tokens.each
    end

    def next : Iterator::Stop | Token
      @iter.next
    end
  end

  class CrystalLexer < BaseLexer
    def tokenizer(text : String, secondary = false) : BaseTokenizer
      CrystalTokenizer.new(self, text, secondary)
    end
  end
end
test: added tests for CSS generation 2024-09-11 01:33:22 +00:00			`require "./constants/lexers"`
			`require "./heuristics"`
			`require "baked_file_system"`
			`require "crystal/syntax_highlighter"`

			`module Tartrazine`
			`class LexerFiles`
			`extend BakedFileSystem`
			`bake_folder "../lexers", __DIR__`
			`end`

			`# Get the lexer object for a language name`
			`# FIXME: support mimetypes`
			`def self.lexer(name : String? = nil, filename : String? = nil, mimetype : String? = nil) : BaseLexer`
			`return lexer_by_name(name) if name && name != "autodetect"`
			`return lexer_by_filename(filename) if filename`
			`return lexer_by_mimetype(mimetype) if mimetype`

			`RegexLexer.from_xml(LexerFiles.get("/#{LEXERS_BY_NAME["plaintext"]}.xml").gets_to_end)`
			`end`

			`private def self.lexer_by_mimetype(mimetype : String) : BaseLexer`
			`lexer_file_name = LEXERS_BY_MIMETYPE.fetch(mimetype, nil)`
			`raise Exception.new("Unknown mimetype: #{mimetype}") if lexer_file_name.nil?`

			`RegexLexer.from_xml(LexerFiles.get("/#{lexer_file_name}.xml").gets_to_end)`
			`end`

			`private def self.lexer_by_name(name : String) : BaseLexer`
			`return CrystalLexer.new if name == "crystal"`
			`lexer_file_name = LEXERS_BY_NAME.fetch(name.downcase, nil)`
			`return create_delegating_lexer(name) if lexer_file_name.nil? && name.includes? "+"`
			`raise Exception.new("Unknown lexer: #{name}") if lexer_file_name.nil?`

			`RegexLexer.from_xml(LexerFiles.get("/#{lexer_file_name}.xml").gets_to_end)`
			`end`

			`private def self.lexer_by_filename(filename : String) : BaseLexer`
			`if filename.ends_with?(".cr")`
			`return CrystalLexer.new`
			`end`

			`candidates = Set(String).new`
			`LEXERS_BY_FILENAME.each do \|k, v\|`
			`candidates += v.to_set if File.match?(k, File.basename(filename))`
			`end`

			`case candidates.size`
			`when 0`
			`lexer_file_name = LEXERS_BY_NAME["plaintext"]`
			`when 1`
			`lexer_file_name = candidates.first`
			`else`
			`lexer_file_name = self.lexer_by_content(filename)`
			`begin`
			`return self.lexer(lexer_file_name)`
			`rescue ex : Exception`
			`raise Exception.new("Multiple lexers match the filename: #{candidates.to_a.join(", ")}, heuristics suggest #{lexer_file_name} but there is no matching lexer.")`
			`end`
			`end`

			`RegexLexer.from_xml(LexerFiles.get("/#{lexer_file_name}.xml").gets_to_end)`
			`end`

			`private def self.lexer_by_content(fname : String) : String?`
			`h = Linguist::Heuristic.from_yaml(LexerFiles.get("/heuristics.yml").gets_to_end)`
			`result = h.run(fname, File.read(fname))`
			`case result`
			`when Nil`
			`raise Exception.new "No lexer found for #{fname}"`
			`when String`
			`result.as(String)`
			`when Array(String)`
			`result.first`
			`end`
			`end`

			`private def self.create_delegating_lexer(name : String) : BaseLexer`
			`language, root = name.split("+", 2)`
			`language_lexer = lexer(language)`
			`root_lexer = lexer(root)`
			`DelegatingLexer.new(language_lexer, root_lexer)`
			`end`

			`# Return a list of all lexers`
			`def self.lexers : Array(String)`
			`LEXERS_BY_NAME.keys.sort!`
			`end`

			`# A token, the output of the tokenizer`
			`alias Token = NamedTuple(type: String, value: String)`

			`abstract class BaseTokenizer`
			`end`

			`class Tokenizer < BaseTokenizer`
			`include Iterator(Token)`
			`property lexer : BaseLexer`
			`property text : Bytes`
			`property pos : Int32 = 0`
			`@dq = Deque(Token).new`
			`property state_stack = ["root"]`

			`def initialize(@lexer : BaseLexer, text : String, secondary = false)`
			# Respect the `ensure_nl` config option
			`if text.size > 0 && text[-1] != '\n' && @lexer.config[:ensure_nl] && !secondary`
			`text += "\n"`
			`end`
			`@text = text.to_slice`
			`end`

			`def next : Iterator::Stop \| Token`
			`if @dq.size > 0`
			`return @dq.shift`
			`end`
			`if pos == @text.size`
			`return stop`
			`end`

			`matched = false`
			`while @pos < @text.size`
			`@lexer.states[@state_stack.last].rules.each do \|rule\|`
			`matched, new_pos, new_tokens = rule.match(@text, @pos, self)`
			`if matched`
			`@pos = new_pos`
			`split_tokens(new_tokens).each { \|token\| @dq << token }`
			`break`
			`end`
			`end`
			`if !matched`
			`if @text[@pos] == 10u8`
			`@dq << {type: "Text", value: "\n"}`
			`@state_stack = ["root"]`
			`else`
			`@dq << {type: "Error", value: String.new(@text[@pos..@pos])}`
			`end`
			`@pos += 1`
			`break`
			`end`
			`end`
			`self.next`
			`end`

			`# If a token contains a newline, split it into two tokens`
			`def split_tokens(tokens : Array(Token)) : Array(Token)`
			`split_tokens = [] of Token`
			`tokens.each do \|token\|`
			`if token[:value].includes?("\n")`
			`values = token[:value].split("\n")`
			`values.each_with_index do \|value, index\|`
			`value += "\n" if index < values.size - 1`
			`split_tokens << {type: token[:type], value: value}`
			`end`
			`else`
			`split_tokens << token`
			`end`
			`end`
			`split_tokens`
			`end`
			`end`

			`alias BaseLexer = Lexer`

			`abstract class Lexer`
			`property config = {`
			`name: "",`
			`priority: 0.0,`
			`case_insensitive: false,`
			`dot_all: false,`
			`not_multiline: false,`
			`ensure_nl: false,`
			`}`
			`property states = {} of String => State`

			`def tokenizer(text : String, secondary = false) : BaseTokenizer`
			`Tokenizer.new(self, text, secondary)`
			`end`
			`end`

			`# This implements a lexer for Pygments RegexLexers as expressed`
			`# in Chroma's XML serialization.`
			`#`
			`# For explanations on what actions and states do`
			`# the Pygments documentation is a good place to start.`
			`# https://pygments.org/docs/lexerdevelopment/`
			`class RegexLexer < BaseLexer`
			`# Collapse consecutive tokens of the same type for easier comparison`
			`# and smaller output`
			`def self.collapse_tokens(tokens : Array(Tartrazine::Token)) : Array(Tartrazine::Token)`
			`result = [] of Tartrazine::Token`
			`tokens = tokens.reject { \|token\| token[:value] == "" }`
			`tokens.each do \|token\|`
			`if result.empty?`
			`result << token`
			`next`
			`end`
			`last = result.last`
			`if last[:type] == token[:type]`
			`new_token = {type: last[:type], value: last[:value] + token[:value]}`
			`result.pop`
			`result << new_token`
			`else`
			`result << token`
			`end`
			`end`
			`result`
			`end`

			`def self.from_xml(xml : String) : Lexer`
			`l = RegexLexer.new`
			`lexer = XML.parse(xml).first_element_child`
			`if lexer`
			`config = lexer.children.find { \|node\|`
			`node.name == "config"`
			`}`
			`if config`
			`l.config = {`
			`name: xml_to_s(config, name) \|\| "",`
			`priority: xml_to_f(config, priority) \|\| 0.0,`
			`not_multiline: xml_to_s(config, not_multiline) == "true",`
			`dot_all: xml_to_s(config, dot_all) == "true",`
			`case_insensitive: xml_to_s(config, case_insensitive) == "true",`
			`ensure_nl: xml_to_s(config, ensure_nl) == "true",`
			`}`
			`end`

			`rules = lexer.children.find { \|node\|`
			`node.name == "rules"`
			`}`
			`if rules`
			`# Rules contains states 🤷`
			`rules.children.select { \|node\|`
			`node.name == "state"`
			`}.each do \|state_node\|`
			`state = State.new`
			`state.name = state_node["name"]`
			`if l.states.has_key?(state.name)`
			`raise Exception.new("Duplicate state: #{state.name}")`
			`else`
			`l.states[state.name] = state`
			`end`
			`# And states contain rules 🤷`
			`state_node.children.select { \|node\|`
			`node.name == "rule"`
			`}.each do \|rule_node\|`
			`case rule_node["pattern"]?`
			`when nil`
			`if rule_node.first_element_child.try &.name == "include"`
			`rule = IncludeStateRule.new(rule_node)`
			`else`
			`rule = UnconditionalRule.new(rule_node)`
			`end`
			`else`
			`rule = Rule.new(rule_node,`
			`multiline: !l.config[:not_multiline],`
			`dotall: l.config[:dot_all],`
			`ignorecase: l.config[:case_insensitive])`
			`end`
			`state.rules << rule`
			`end`
			`end`
			`end`
			`end`
			`l`
			`end`
			`end`

			`# A lexer that takes two lexers as arguments. A root lexer`
			`# and a language lexer. Everything is scalled using the`
			# language lexer, afterwards all `Other` tokens are lexed
			`# using the root lexer.`
			`#`
			`# This is useful for things like template languages, where`
			`# you have Jinja + HTML or Jinja + CSS and so on.`
			`class DelegatingLexer < Lexer`
			`property language_lexer : BaseLexer`
			`property root_lexer : BaseLexer`

			`def initialize(@language_lexer : BaseLexer, @root_lexer : BaseLexer)`
			`end`

			`def tokenizer(text : String, secondary = false) : DelegatingTokenizer`
			`DelegatingTokenizer.new(self, text, secondary)`
			`end`
			`end`

			`# This Tokenizer works with a DelegatingLexer. It first tokenizes`
			`# using the language lexer, and "Other" tokens are tokenized using`
			`# the root lexer.`
			`class DelegatingTokenizer < BaseTokenizer`
			`include Iterator(Token)`
			`@dq = Deque(Token).new`
			`@language_tokenizer : BaseTokenizer`

			`def initialize(@lexer : DelegatingLexer, text : String, secondary = false)`
			# Respect the `ensure_nl` config option
			`if text.size > 0 && text[-1] != '\n' && @lexer.config[:ensure_nl] && !secondary`
			`text += "\n"`
			`end`
			`@language_tokenizer = @lexer.language_lexer.tokenizer(text, true)`
			`end`

			`def next : Iterator::Stop \| Token`
			`if @dq.size > 0`
			`return @dq.shift`
			`end`
			`token = @language_tokenizer.next`
			`if token.is_a? Iterator::Stop`
			`return stop`
			`elsif token.as(Token).[:type] == "Other"`
			`root_tokenizer = @lexer.root_lexer.tokenizer(token.as(Token).[:value], true)`
			`root_tokenizer.each do \|root_token\|`
			`@dq << root_token`
			`end`
			`else`
			`@dq << token.as(Token)`
			`end`
			`self.next`
			`end`
			`end`

			`# A Lexer state. A state has a name and a list of rules.`
			`# The state machine has a state stack containing references`
			`# to states to decide which rules to apply.`
			`struct State`
			`property name : String = ""`
			`property rules = [] of BaseRule`

			`def +(other : State)`
			`new_state = State.new`
			`new_state.name = Random.base58(8)`
			`new_state.rules = rules + other.rules`
			`new_state`
			`end`
			`end`

			`class CustomCrystalHighlighter < Crystal::SyntaxHighlighter`
			`@tokens = [] of Token`

			`def render_delimiter(&block)`
			`@tokens << {type: "LiteralString", value: block.call.to_s}`
			`end`

			`def render_interpolation(&block)`
			`@tokens << {type: "LiteralStringInterpol", value: "\#{"}`
			`@tokens << {type: "Text", value: block.call.to_s}`
			`@tokens << {type: "LiteralStringInterpol", value: "}"}`
			`end`

			`def render_string_array(&block)`
			`@tokens << {type: "LiteralString", value: block.call.to_s}`
			`end`

			`# ameba:disable Metrics/CyclomaticComplexity`
			`def render(type : TokenType, value : String)`
			`case type`
			`when .comment?`
			`@tokens << {type: "Comment", value: value}`
			`when .number?`
			`@tokens << {type: "LiteralNumber", value: value}`
			`when .char?`
			`@tokens << {type: "LiteralStringChar", value: value}`
			`when .symbol?`
			`@tokens << {type: "LiteralStringSymbol", value: value}`
			`when .const?`
			`@tokens << {type: "NameConstant", value: value}`
			`when .string?`
			`@tokens << {type: "LiteralString", value: value}`
			`when .ident?`
			`@tokens << {type: "NameVariable", value: value}`
			`when .keyword?, .self?`
			`@tokens << {type: "NameKeyword", value: value}`
			`when .primitive_literal?`
			`@tokens << {type: "Literal", value: value}`
			`when .operator?`
			`@tokens << {type: "Operator", value: value}`
			`when Crystal::SyntaxHighlighter::TokenType::DELIMITED_TOKEN, Crystal::SyntaxHighlighter::TokenType::DELIMITER_START, Crystal::SyntaxHighlighter::TokenType::DELIMITER_END`
			`@tokens << {type: "LiteralString", value: value}`
			`else`
			`@tokens << {type: "Text", value: value}`
			`end`
			`end`
			`end`

			`class CrystalTokenizer < Tartrazine::BaseTokenizer`
			`include Iterator(Token)`
			`@hl = CustomCrystalHighlighter.new`
			`@lexer : BaseLexer`
			`@iter : Iterator(Token)`

			`# delegate next, to: @iter`

			`def initialize(@lexer : BaseLexer, text : String, secondary = false)`
			# Respect the `ensure_nl` config option
			`if text.size > 0 && text[-1] != '\n' && @lexer.config[:ensure_nl] && !secondary`
			`text += "\n"`
			`end`
			`# Just do the tokenizing`
			`@hl.highlight(text)`
			`@iter = @hl.@tokens.each`
			`end`

			`def next : Iterator::Stop \| Token`
			`@iter.next`
			`end`
			`end`

			`class CrystalLexer < BaseLexer`
			`def tokenizer(text : String, secondary = false) : BaseTokenizer`
			`CrystalTokenizer.new(self, text, secondary)`
			`end`
			`end`
			`end`