feat: SVG formatter

2025-10-01 01:22:33 +00:00 · 2024-09-21 00:17:41 -03:00
parent d26393d8c9
commit 9c70fbf389
10 changed files with 164 additions and 430 deletions
--- a/spec/examples/crystal/hello.cr
+++ b/spec/examples/crystal/hello.cr
@@ -0,0 +1 @@
+puts "Hello Crystal!"
--- a/spec/examples/crystal/hello.cr.json
+++ b/spec/examples/crystal/hello.cr.json
@@ -0,0 +1 @@
+[{"type":"Text","value":"puts "},{"type":"LiteralString","value":"\"Hello Crystal!\""},{"type":"Text","value":"\n"}]
--- a/spec/examples/crystal/lexer.cr
+++ b/spec/examples/crystal/lexer.cr
@@ -1,413 +0,0 @@
-require "./constants/lexers"
-require "./heuristics"
-require "baked_file_system"
-require "crystal/syntax_highlighter"
-
-module Tartrazine
-  class LexerFiles
-    extend BakedFileSystem
-    bake_folder "../lexers", __DIR__
-  end
-
-  # Get the lexer object for a language name
-  # FIXME: support mimetypes
-  def self.lexer(name : String? = nil, filename : String? = nil, mimetype : String? = nil) : BaseLexer
-    return lexer_by_name(name) if name && name != "autodetect"
-    return lexer_by_filename(filename) if filename
-    return lexer_by_mimetype(mimetype) if mimetype
-
-    RegexLexer.from_xml(LexerFiles.get("/#{LEXERS_BY_NAME["plaintext"]}.xml").gets_to_end)
-  end
-
-  private def self.lexer_by_mimetype(mimetype : String) : BaseLexer
-    lexer_file_name = LEXERS_BY_MIMETYPE.fetch(mimetype, nil)
-    raise Exception.new("Unknown mimetype: #{mimetype}") if lexer_file_name.nil?
-
-    RegexLexer.from_xml(LexerFiles.get("/#{lexer_file_name}.xml").gets_to_end)
-  end
-
-  private def self.lexer_by_name(name : String) : BaseLexer
-    return CrystalLexer.new if name == "crystal"
-    lexer_file_name = LEXERS_BY_NAME.fetch(name.downcase, nil)
-    return create_delegating_lexer(name) if lexer_file_name.nil? && name.includes? "+"
-    raise Exception.new("Unknown lexer: #{name}") if lexer_file_name.nil?
-
-    RegexLexer.from_xml(LexerFiles.get("/#{lexer_file_name}.xml").gets_to_end)
-  end
-
-  private def self.lexer_by_filename(filename : String) : BaseLexer
-    if filename.ends_with?(".cr")
-      return CrystalLexer.new
-    end
-
-    candidates = Set(String).new
-    LEXERS_BY_FILENAME.each do |k, v|
-      candidates += v.to_set if File.match?(k, File.basename(filename))
-    end
-
-    case candidates.size
-    when 0
-      lexer_file_name = LEXERS_BY_NAME["plaintext"]
-    when 1
-      lexer_file_name = candidates.first
-    else
-      lexer_file_name = self.lexer_by_content(filename)
-      begin
-        return self.lexer(lexer_file_name)
-      rescue ex : Exception
-        raise Exception.new("Multiple lexers match the filename: #{candidates.to_a.join(", ")}, heuristics suggest #{lexer_file_name} but there is no matching lexer.")
-      end
-    end
-
-    RegexLexer.from_xml(LexerFiles.get("/#{lexer_file_name}.xml").gets_to_end)
-  end
-
-  private def self.lexer_by_content(fname : String) : String?
-    h = Linguist::Heuristic.from_yaml(LexerFiles.get("/heuristics.yml").gets_to_end)
-    result = h.run(fname, File.read(fname))
-    case result
-    when Nil
-      raise Exception.new "No lexer found for #{fname}"
-    when String
-      result.as(String)
-    when Array(String)
-      result.first
-    end
-  end
-
-  private def self.create_delegating_lexer(name : String) : BaseLexer
-    language, root = name.split("+", 2)
-    language_lexer = lexer(language)
-    root_lexer = lexer(root)
-    DelegatingLexer.new(language_lexer, root_lexer)
-  end
-
-  # Return a list of all lexers
-  def self.lexers : Array(String)
-    LEXERS_BY_NAME.keys.sort!
-  end
-
-  # A token, the output of the tokenizer
-  alias Token = NamedTuple(type: String, value: String)
-
-  abstract class BaseTokenizer
-  end
-
-  class Tokenizer < BaseTokenizer
-    include Iterator(Token)
-    property lexer : BaseLexer
-    property text : Bytes
-    property pos : Int32 = 0
-    @dq = Deque(Token).new
-    property state_stack = ["root"]
-
-    def initialize(@lexer : BaseLexer, text : String, secondary = false)
-      # Respect the `ensure_nl` config option
-      if text.size > 0 && text[-1] != '\n' && @lexer.config[:ensure_nl] && !secondary
-        text += "\n"
-      end
-      @text = text.to_slice
-    end
-
-    def next : Iterator::Stop | Token
-      if @dq.size > 0
-        return @dq.shift
-      end
-      if pos == @text.size
-        return stop
-      end
-
-      matched = false
-      while @pos < @text.size
-        @lexer.states[@state_stack.last].rules.each do |rule|
-          matched, new_pos, new_tokens = rule.match(@text, @pos, self)
-          if matched
-            @pos = new_pos
-            split_tokens(new_tokens).each { |token| @dq << token }
-            break
-          end
-        end
-        if !matched
-          if @text[@pos] == 10u8
-            @dq << {type: "Text", value: "\n"}
-            @state_stack = ["root"]
-          else
-            @dq << {type: "Error", value: String.new(@text[@pos..@pos])}
-          end
-          @pos += 1
-          break
-        end
-      end
-      self.next
-    end
-
-    # If a token contains a newline, split it into two tokens
-    def split_tokens(tokens : Array(Token)) : Array(Token)
-      split_tokens = [] of Token
-      tokens.each do |token|
-        if token[:value].includes?("\n")
-          values = token[:value].split("\n")
-          values.each_with_index do |value, index|
-            value += "\n" if index < values.size - 1
-            split_tokens << {type: token[:type], value: value}
-          end
-        else
-          split_tokens << token
-        end
-      end
-      split_tokens
-    end
-  end
-
-  alias BaseLexer = Lexer
-
-  abstract class Lexer
-    property config = {
-      name:             "",
-      priority:         0.0,
-      case_insensitive: false,
-      dot_all:          false,
-      not_multiline:    false,
-      ensure_nl:        false,
-    }
-    property states = {} of String => State
-
-    def tokenizer(text : String, secondary = false) : BaseTokenizer
-      Tokenizer.new(self, text, secondary)
-    end
-  end
-
-  # This implements a lexer for Pygments RegexLexers as expressed
-  # in Chroma's XML serialization.
-  #
-  # For explanations on what actions and states do
-  # the Pygments documentation is a good place to start.
-  # https://pygments.org/docs/lexerdevelopment/
-  class RegexLexer < BaseLexer
-    # Collapse consecutive tokens of the same type for easier comparison
-    # and smaller output
-    def self.collapse_tokens(tokens : Array(Tartrazine::Token)) : Array(Tartrazine::Token)
-      result = [] of Tartrazine::Token
-      tokens = tokens.reject { |token| token[:value] == "" }
-      tokens.each do |token|
-        if result.empty?
-          result << token
-          next
-        end
-        last = result.last
-        if last[:type] == token[:type]
-          new_token = {type: last[:type], value: last[:value] + token[:value]}
-          result.pop
-          result << new_token
-        else
-          result << token
-        end
-      end
-      result
-    end
-
-    def self.from_xml(xml : String) : Lexer
-      l = RegexLexer.new
-      lexer = XML.parse(xml).first_element_child
-      if lexer
-        config = lexer.children.find { |node|
-          node.name == "config"
-        }
-        if config
-          l.config = {
-            name:             xml_to_s(config, name) || "",
-            priority:         xml_to_f(config, priority) || 0.0,
-            not_multiline:    xml_to_s(config, not_multiline) == "true",
-            dot_all:          xml_to_s(config, dot_all) == "true",
-            case_insensitive: xml_to_s(config, case_insensitive) == "true",
-            ensure_nl:        xml_to_s(config, ensure_nl) == "true",
-          }
-        end
-
-        rules = lexer.children.find { |node|
-          node.name == "rules"
-        }
-        if rules
-          # Rules contains states 🤷
-          rules.children.select { |node|
-            node.name == "state"
-          }.each do |state_node|
-            state = State.new
-            state.name = state_node["name"]
-            if l.states.has_key?(state.name)
-              raise Exception.new("Duplicate state: #{state.name}")
-            else
-              l.states[state.name] = state
-            end
-            # And states contain rules 🤷
-            state_node.children.select { |node|
-              node.name == "rule"
-            }.each do |rule_node|
-              case rule_node["pattern"]?
-              when nil
-                if rule_node.first_element_child.try &.name == "include"
-                  rule = IncludeStateRule.new(rule_node)
-                else
-                  rule = UnconditionalRule.new(rule_node)
-                end
-              else
-                rule = Rule.new(rule_node,
-                  multiline: !l.config[:not_multiline],
-                  dotall: l.config[:dot_all],
-                  ignorecase: l.config[:case_insensitive])
-              end
-              state.rules << rule
-            end
-          end
-        end
-      end
-      l
-    end
-  end
-
-  # A lexer that takes two lexers as arguments. A root lexer
-  # and a language lexer. Everything is scalled using the
-  # language lexer, afterwards all `Other` tokens are lexed
-  # using the root lexer.
-  #
-  # This is useful for things like template languages, where
-  # you have Jinja + HTML or Jinja + CSS and so on.
-  class DelegatingLexer < Lexer
-    property language_lexer : BaseLexer
-    property root_lexer : BaseLexer
-
-    def initialize(@language_lexer : BaseLexer, @root_lexer : BaseLexer)
-    end
-
-    def tokenizer(text : String, secondary = false) : DelegatingTokenizer
-      DelegatingTokenizer.new(self, text, secondary)
-    end
-  end
-
-  # This Tokenizer works with a DelegatingLexer. It first tokenizes
-  # using the language lexer, and "Other" tokens are tokenized using
-  # the root lexer.
-  class DelegatingTokenizer < BaseTokenizer
-    include Iterator(Token)
-    @dq = Deque(Token).new
-    @language_tokenizer : BaseTokenizer
-
-    def initialize(@lexer : DelegatingLexer, text : String, secondary = false)
-      # Respect the `ensure_nl` config option
-      if text.size > 0 && text[-1] != '\n' && @lexer.config[:ensure_nl] && !secondary
-        text += "\n"
-      end
-      @language_tokenizer = @lexer.language_lexer.tokenizer(text, true)
-    end
-
-    def next : Iterator::Stop | Token
-      if @dq.size > 0
-        return @dq.shift
-      end
-      token = @language_tokenizer.next
-      if token.is_a? Iterator::Stop
-        return stop
-      elsif token.as(Token).[:type] == "Other"
-        root_tokenizer = @lexer.root_lexer.tokenizer(token.as(Token).[:value], true)
-        root_tokenizer.each do |root_token|
-          @dq << root_token
-        end
-      else
-        @dq << token.as(Token)
-      end
-      self.next
-    end
-  end
-
-  # A Lexer state. A state has a name and a list of rules.
-  # The state machine has a state stack containing references
-  # to states to decide which rules to apply.
-  struct State
-    property name : String = ""
-    property rules = [] of BaseRule
-
-    def +(other : State)
-      new_state = State.new
-      new_state.name = Random.base58(8)
-      new_state.rules = rules + other.rules
-      new_state
-    end
-  end
-
-  class CustomCrystalHighlighter < Crystal::SyntaxHighlighter
-    @tokens = [] of Token
-
-    def render_delimiter(&block)
-      @tokens << {type: "LiteralString", value: block.call.to_s}
-    end
-
-    def render_interpolation(&block)
-      @tokens << {type: "LiteralStringInterpol", value: "\#{"}
-      @tokens << {type: "Text", value: block.call.to_s}
-      @tokens << {type: "LiteralStringInterpol", value: "}"}
-    end
-
-    def render_string_array(&block)
-      @tokens << {type: "LiteralString", value: block.call.to_s}
-    end
-
-    # ameba:disable Metrics/CyclomaticComplexity
-    def render(type : TokenType, value : String)
-      case type
-      when .comment?
-        @tokens << {type: "Comment", value: value}
-      when .number?
-        @tokens << {type: "LiteralNumber", value: value}
-      when .char?
-        @tokens << {type: "LiteralStringChar", value: value}
-      when .symbol?
-        @tokens << {type: "LiteralStringSymbol", value: value}
-      when .const?
-        @tokens << {type: "NameConstant", value: value}
-      when .string?
-        @tokens << {type: "LiteralString", value: value}
-      when .ident?
-        @tokens << {type: "NameVariable", value: value}
-      when .keyword?, .self?
-        @tokens << {type: "NameKeyword", value: value}
-      when .primitive_literal?
-        @tokens << {type: "Literal", value: value}
-      when .operator?
-        @tokens << {type: "Operator", value: value}
-      when Crystal::SyntaxHighlighter::TokenType::DELIMITED_TOKEN, Crystal::SyntaxHighlighter::TokenType::DELIMITER_START, Crystal::SyntaxHighlighter::TokenType::DELIMITER_END
-        @tokens << {type: "LiteralString", value: value}
-      else
-        @tokens << {type: "Text", value: value}
-      end
-    end
-  end
-
-  class CrystalTokenizer < Tartrazine::BaseTokenizer
-    include Iterator(Token)
-    @hl = CustomCrystalHighlighter.new
-    @lexer : BaseLexer
-    @iter : Iterator(Token)
-
-    # delegate next, to: @iter
-
-    def initialize(@lexer : BaseLexer, text : String, secondary = false)
-      # Respect the `ensure_nl` config option
-      if text.size > 0 && text[-1] != '\n' && @lexer.config[:ensure_nl] && !secondary
-        text += "\n"
-      end
-      # Just do the tokenizing
-      @hl.highlight(text)
-      @iter = @hl.@tokens.each
-    end
-
-    def next : Iterator::Stop | Token
-      @iter.next
-    end
-  end
-
-  class CrystalLexer < BaseLexer
-    def tokenizer(text : String, secondary = false) : BaseTokenizer
-      CrystalTokenizer.new(self, text, secondary)
-    end
-  end
-end
--- a/spec/examples/crystal/lexer.cr.json
+++ b/spec/examples/crystal/lexer.cr.json
				`@@ -0,0 +1 @@`
				`[{"type":"Text","value":"puts "},{"type":"LiteralString","value":"\"Hello Crystal!\""},{"type":"Text","value":"\n"}]`