fix: HTML formatter was setting bold wrong

feat: SVG formatter
2024-12-05 00:00:34 +00:00 · 2024-09-21 12:56:40 -03:00 · 2024-09-21 12:56:40 -03:00
10 changed files with 165 additions and 432 deletions
--- a/.ameba.yml
+++ b/.ameba.yml
@ -1,9 +1,9 @@
 # This configuration file was generated by `ameba --gen-config`
-# on 2024-09-11 00:56:14 UTC using Ameba version 1.6.1.
+# on 2024-09-21 14:59:30 UTC using Ameba version 1.6.1.
 # The point is for the user to remove these configuration records
 # one by one as the reported problems are removed from the code base.
-# Problems found: 4
+# Problems found: 3
 # Run `ameba --only Documentation/DocumentationAdmonition` for details
 Documentation/DocumentationAdmonition:
  Description: Reports documentation admonitions
@ -11,10 +11,24 @@ Documentation/DocumentationAdmonition:
  Excluded:
  - src/lexer.cr
  - src/actions.cr
  - spec/examples/crystal/lexer_spec.cr
  Admonitions:
  - TODO
  - FIXME
  - BUG
  Enabled: true
  Severity: Warning
 # Problems found: 1
 # Run `ameba --only Lint/SpecFilename` for details
 Lint/SpecFilename:
  Description: Enforces spec filenames to have `_spec` suffix
  Excluded:
  - spec/examples/crystal/hello.cr
  IgnoredDirs:
  - spec/support
  - spec/fixtures
  - spec/data
  IgnoredFilenames:
  - spec_helper
  Enabled: true
  Severity: Warning
--- a/spec/examples/crystal/hello.cr
+++ b/spec/examples/crystal/hello.cr
@ -0,0 +1 @@
 puts "Hello Crystal!"
--- a/spec/examples/crystal/hello.cr.json
+++ b/spec/examples/crystal/hello.cr.json
@ -0,0 +1 @@
 [{"type":"Text","value":"puts "},{"type":"LiteralString","value":"\"Hello Crystal!\""},{"type":"Text","value":"\n"}]
--- a/spec/examples/crystal/lexer.cr
+++ b/spec/examples/crystal/lexer.cr
@ -1,413 +0,0 @@
 require "./constants/lexers"
 require "./heuristics"
 require "baked_file_system"
 require "crystal/syntax_highlighter"
 module Tartrazine
  class LexerFiles
    extend BakedFileSystem
    bake_folder "../lexers", __DIR__
  end
  # Get the lexer object for a language name
  # FIXME: support mimetypes
  def self.lexer(name : String? = nil, filename : String? = nil, mimetype : String? = nil) : BaseLexer
    return lexer_by_name(name) if name && name != "autodetect"
    return lexer_by_filename(filename) if filename
    return lexer_by_mimetype(mimetype) if mimetype
    RegexLexer.from_xml(LexerFiles.get("/#{LEXERS_BY_NAME["plaintext"]}.xml").gets_to_end)
  end
  private def self.lexer_by_mimetype(mimetype : String) : BaseLexer
    lexer_file_name = LEXERS_BY_MIMETYPE.fetch(mimetype, nil)
    raise Exception.new("Unknown mimetype: #{mimetype}") if lexer_file_name.nil?
    RegexLexer.from_xml(LexerFiles.get("/#{lexer_file_name}.xml").gets_to_end)
  end
  private def self.lexer_by_name(name : String) : BaseLexer
    return CrystalLexer.new if name == "crystal"
    lexer_file_name = LEXERS_BY_NAME.fetch(name.downcase, nil)
    return create_delegating_lexer(name) if lexer_file_name.nil? && name.includes? "+"
    raise Exception.new("Unknown lexer: #{name}") if lexer_file_name.nil?
    RegexLexer.from_xml(LexerFiles.get("/#{lexer_file_name}.xml").gets_to_end)
  end
  private def self.lexer_by_filename(filename : String) : BaseLexer
    if filename.ends_with?(".cr")
      return CrystalLexer.new
    end
    candidates = Set(String).new
    LEXERS_BY_FILENAME.each do |k, v|
      candidates += v.to_set if File.match?(k, File.basename(filename))
    end
    case candidates.size
    when 0
      lexer_file_name = LEXERS_BY_NAME["plaintext"]
    when 1
      lexer_file_name = candidates.first
    else
      lexer_file_name = self.lexer_by_content(filename)
      begin
        return self.lexer(lexer_file_name)
      rescue ex : Exception
        raise Exception.new("Multiple lexers match the filename: #{candidates.to_a.join(", ")}, heuristics suggest #{lexer_file_name} but there is no matching lexer.")
      end
    end
    RegexLexer.from_xml(LexerFiles.get("/#{lexer_file_name}.xml").gets_to_end)
  end
  private def self.lexer_by_content(fname : String) : String?
    h = Linguist::Heuristic.from_yaml(LexerFiles.get("/heuristics.yml").gets_to_end)
    result = h.run(fname, File.read(fname))
    case result
    when Nil
      raise Exception.new "No lexer found for #{fname}"
    when String
      result.as(String)
    when Array(String)
      result.first
    end
  end
  private def self.create_delegating_lexer(name : String) : BaseLexer
    language, root = name.split("+", 2)
    language_lexer = lexer(language)
    root_lexer = lexer(root)
    DelegatingLexer.new(language_lexer, root_lexer)
  end
  # Return a list of all lexers
  def self.lexers : Array(String)
    LEXERS_BY_NAME.keys.sort!
  end
  # A token, the output of the tokenizer
  alias Token = NamedTuple(type: String, value: String)
  abstract class BaseTokenizer
  end
  class Tokenizer < BaseTokenizer
    include Iterator(Token)
    property lexer : BaseLexer
    property text : Bytes
    property pos : Int32 = 0
    @dq = Deque(Token).new
    property state_stack = ["root"]
    def initialize(@lexer : BaseLexer, text : String, secondary = false)
      # Respect the `ensure_nl` config option
      if text.size > 0 && text[-1] != '\n' && @lexer.config[:ensure_nl] && !secondary
        text += "\n"
      end
      @text = text.to_slice
    end
    def next : Iterator::Stop | Token
      if @dq.size > 0
        return @dq.shift
      end
      if pos == @text.size
        return stop
      end
      matched = false
      while @pos < @text.size
        @lexer.states[@state_stack.last].rules.each do |rule|
          matched, new_pos, new_tokens = rule.match(@text, @pos, self)
          if matched
            @pos = new_pos
            split_tokens(new_tokens).each { |token| @dq << token }
            break
          end
        end
        if !matched
          if @text[@pos] == 10u8
            @dq << {type: "Text", value: "\n"}
            @state_stack = ["root"]
          else
            @dq << {type: "Error", value: String.new(@text[@pos..@pos])}
          end
          @pos += 1
          break
        end
      end
      self.next
    end
    # If a token contains a newline, split it into two tokens
    def split_tokens(tokens : Array(Token)) : Array(Token)
      split_tokens = [] of Token
      tokens.each do |token|
        if token[:value].includes?("\n")
          values = token[:value].split("\n")
          values.each_with_index do |value, index|
            value += "\n" if index < values.size - 1
            split_tokens << {type: token[:type], value: value}
          end
        else
          split_tokens << token
        end
      end
      split_tokens
    end
  end
  alias BaseLexer = Lexer
  abstract class Lexer
    property config = {
      name:             "",
      priority:         0.0,
      case_insensitive: false,
      dot_all:          false,
      not_multiline:    false,
      ensure_nl:        false,
    }
    property states = {} of String => State
    def tokenizer(text : String, secondary = false) : BaseTokenizer
      Tokenizer.new(self, text, secondary)
    end
  end
  # This implements a lexer for Pygments RegexLexers as expressed
  # in Chroma's XML serialization.
  #
  # For explanations on what actions and states do
  # the Pygments documentation is a good place to start.
  # https://pygments.org/docs/lexerdevelopment/
  class RegexLexer < BaseLexer
    # Collapse consecutive tokens of the same type for easier comparison
    # and smaller output
    def self.collapse_tokens(tokens : Array(Tartrazine::Token)) : Array(Tartrazine::Token)
      result = [] of Tartrazine::Token
      tokens = tokens.reject { |token| token[:value] == "" }
      tokens.each do |token|
        if result.empty?
          result << token
          next
        end
        last = result.last
        if last[:type] == token[:type]
          new_token = {type: last[:type], value: last[:value] + token[:value]}
          result.pop
          result << new_token
        else
          result << token
        end
      end
      result
    end
    def self.from_xml(xml : String) : Lexer
      l = RegexLexer.new
      lexer = XML.parse(xml).first_element_child
      if lexer
        config = lexer.children.find { |node|
          node.name == "config"
        }
        if config
          l.config = {
            name:             xml_to_s(config, name) || "",
            priority:         xml_to_f(config, priority) || 0.0,
            not_multiline:    xml_to_s(config, not_multiline) == "true",
            dot_all:          xml_to_s(config, dot_all) == "true",
            case_insensitive: xml_to_s(config, case_insensitive) == "true",
            ensure_nl:        xml_to_s(config, ensure_nl) == "true",
          }
        end
        rules = lexer.children.find { |node|
          node.name == "rules"
        }
        if rules
          # Rules contains states 🤷
          rules.children.select { |node|
            node.name == "state"
          }.each do |state_node|
            state = State.new
            state.name = state_node["name"]
            if l.states.has_key?(state.name)
              raise Exception.new("Duplicate state: #{state.name}")
            else
              l.states[state.name] = state
            end
            # And states contain rules 🤷
            state_node.children.select { |node|
              node.name == "rule"
            }.each do |rule_node|
              case rule_node["pattern"]?
              when nil
                if rule_node.first_element_child.try &.name == "include"
                  rule = IncludeStateRule.new(rule_node)
                else
                  rule = UnconditionalRule.new(rule_node)
                end
              else
                rule = Rule.new(rule_node,
                  multiline: !l.config[:not_multiline],
                  dotall: l.config[:dot_all],
                  ignorecase: l.config[:case_insensitive])
              end
              state.rules << rule
            end
          end
        end
      end
      l
    end
  end
  # A lexer that takes two lexers as arguments. A root lexer
  # and a language lexer. Everything is scalled using the
  # language lexer, afterwards all `Other` tokens are lexed
  # using the root lexer.
  #
  # This is useful for things like template languages, where
  # you have Jinja + HTML or Jinja + CSS and so on.
  class DelegatingLexer < Lexer
    property language_lexer : BaseLexer
    property root_lexer : BaseLexer
    def initialize(@language_lexer : BaseLexer, @root_lexer : BaseLexer)
    end
    def tokenizer(text : String, secondary = false) : DelegatingTokenizer
      DelegatingTokenizer.new(self, text, secondary)
    end
  end
  # This Tokenizer works with a DelegatingLexer. It first tokenizes
  # using the language lexer, and "Other" tokens are tokenized using
  # the root lexer.
  class DelegatingTokenizer < BaseTokenizer
    include Iterator(Token)
    @dq = Deque(Token).new
    @language_tokenizer : BaseTokenizer
    def initialize(@lexer : DelegatingLexer, text : String, secondary = false)
      # Respect the `ensure_nl` config option
      if text.size > 0 && text[-1] != '\n' && @lexer.config[:ensure_nl] && !secondary
        text += "\n"
      end
      @language_tokenizer = @lexer.language_lexer.tokenizer(text, true)
    end
    def next : Iterator::Stop | Token
      if @dq.size > 0
        return @dq.shift
      end
      token = @language_tokenizer.next
      if token.is_a? Iterator::Stop
        return stop
      elsif token.as(Token).[:type] == "Other"
        root_tokenizer = @lexer.root_lexer.tokenizer(token.as(Token).[:value], true)
        root_tokenizer.each do |root_token|
          @dq << root_token
        end
      else
        @dq << token.as(Token)
      end
      self.next
    end
  end
  # A Lexer state. A state has a name and a list of rules.
  # The state machine has a state stack containing references
  # to states to decide which rules to apply.
  struct State
    property name : String = ""
    property rules = [] of BaseRule
    def +(other : State)
      new_state = State.new
      new_state.name = Random.base58(8)
      new_state.rules = rules + other.rules
      new_state
    end
  end
  class CustomCrystalHighlighter < Crystal::SyntaxHighlighter
    @tokens = [] of Token
    def render_delimiter(&block)
      @tokens << {type: "LiteralString", value: block.call.to_s}
    end
    def render_interpolation(&block)
      @tokens << {type: "LiteralStringInterpol", value: "\#{"}
      @tokens << {type: "Text", value: block.call.to_s}
      @tokens << {type: "LiteralStringInterpol", value: "}"}
    end
    def render_string_array(&block)
      @tokens << {type: "LiteralString", value: block.call.to_s}
    end
    # ameba:disable Metrics/CyclomaticComplexity
    def render(type : TokenType, value : String)
      case type
      when .comment?
        @tokens << {type: "Comment", value: value}
      when .number?
        @tokens << {type: "LiteralNumber", value: value}
      when .char?
        @tokens << {type: "LiteralStringChar", value: value}
      when .symbol?
        @tokens << {type: "LiteralStringSymbol", value: value}
      when .const?
        @tokens << {type: "NameConstant", value: value}
      when .string?
        @tokens << {type: "LiteralString", value: value}
      when .ident?
        @tokens << {type: "NameVariable", value: value}
      when .keyword?, .self?
        @tokens << {type: "NameKeyword", value: value}
      when .primitive_literal?
        @tokens << {type: "Literal", value: value}
      when .operator?
        @tokens << {type: "Operator", value: value}
      when Crystal::SyntaxHighlighter::TokenType::DELIMITED_TOKEN, Crystal::SyntaxHighlighter::TokenType::DELIMITER_START, Crystal::SyntaxHighlighter::TokenType::DELIMITER_END
        @tokens << {type: "LiteralString", value: value}
      else
        @tokens << {type: "Text", value: value}
      end
    end
  end
  class CrystalTokenizer < Tartrazine::BaseTokenizer
    include Iterator(Token)
    @hl = CustomCrystalHighlighter.new
    @lexer : BaseLexer
    @iter : Iterator(Token)
    # delegate next, to: @iter
    def initialize(@lexer : BaseLexer, text : String, secondary = false)
      # Respect the `ensure_nl` config option
      if text.size > 0 && text[-1] != '\n' && @lexer.config[:ensure_nl] && !secondary
        text += "\n"
      end
      # Just do the tokenizing
      @hl.highlight(text)
      @iter = @hl.@tokens.each
    end
    def next : Iterator::Stop | Token
      @iter.next
    end
  end
  class CrystalLexer < BaseLexer
    def tokenizer(text : String, secondary = false) : BaseTokenizer
      CrystalTokenizer.new(self, text, secondary)
    end
  end
 end
--- a/spec/examples/crystal/lexer.cr.json
+++ b/spec/examples/crystal/lexer.cr.json
--- a/src/formatter.cr
+++ b/src/formatter.cr
@ -17,12 +17,19 @@ module Tartrazine
    end
    def format(text : String, lexer : Lexer) : String
-      raise Exception.new("Not implemented")
+      outp = String::Builder.new("")
      format(text, lexer, outp)
      outp.to_s
    end
    # Return the styles, if the formatter supports it.
    def style_defs : String
      raise Exception.new("Not implemented")
    end
    # Is this line in the highlighted ranges?
    def highlighted?(line : Int) : Bool
      highlight_lines.any?(&.includes?(line))
    end
  end
 end
--- a/src/formatters/ansi.cr
+++ b/src/formatters/ansi.cr
@ -20,12 +20,6 @@ module Tartrazine
      "#{i + 1}".rjust(4).ljust(5)
    end
    def format(text : String, lexer : BaseLexer) : String
      outp = String::Builder.new("")
      format(text, lexer, outp)
      outp.to_s
    end
    def format(text : String, lexer : BaseLexer, outp : IO) : Nil
      tokenizer = lexer.tokenizer(text)
      i = 0
--- a/src/formatters/html.cr
+++ b/src/formatters/html.cr
@ -106,8 +106,7 @@ module Tartrazine
          # These are true/false/nil
          outp << "border: none;" if style.border == false
-          outp << "font-weight: bold;" if style.bold
+          outp << "font-weight: #{@weight_of_bold};" if style.bold
          outp << "font-weight: #{@weight_of_bold};" if style.bold == false
          outp << "font-style: italic;" if style.italic
          outp << "font-style: normal;" if style.italic == false
          outp << "text-decoration: underline;" if style.underline
@ -134,10 +133,5 @@ module Tartrazine
      end
      class_prefix + Abbreviations[token]
    end
    # Is this line in the highlighted ranges?
    def highlighted?(line : Int) : Bool
      highlight_lines.any?(&.includes?(line))
    end
  end
 end
--- a/src/formatters/svg.cr
+++ b/src/formatters/svg.cr
@ -0,0 +1,129 @@
 require "../constants/token_abbrevs.cr"
 require "../formatter"
 require "html"
 module Tartrazine
  def self.to_svg(text : String, language : String,
                  theme : String = "default-dark",
                  standalone : Bool = true,
                  line_numbers : Bool = false) : String
    Tartrazine::Svg.new(
      theme: Tartrazine.theme(theme),
      standalone: standalone,
      line_numbers: line_numbers
    ).format(text, Tartrazine.lexer(name: language))
  end
  class Svg < Formatter
    property highlight_lines : Array(Range(Int32, Int32)) = [] of Range(Int32, Int32)
    property line_number_id_prefix : String = "line-"
    property line_number_start : Int32 = 1
    property tab_width = 8
    property? line_numbers : Bool = false
    property? linkable_line_numbers : Bool = true
    property? standalone : Bool = false
    property weight_of_bold : Int32 = 600
    property fs : Int32
    property ystep : Int32
    property theme : Theme
    def initialize(@theme : Theme = Tartrazine.theme("default-dark"), *,
                   @highlight_lines = [] of Range(Int32, Int32),
                   @class_prefix : String = "",
                   @line_number_id_prefix = "line-",
                   @line_number_start = 1,
                   @tab_width = 8,
                   @line_numbers : Bool = false,
                   @linkable_line_numbers : Bool = true,
                   @standalone : Bool = false,
                   @weight_of_bold : Int32 = 600,
                   @font_family : String = "monospace",
                   @font_size : String = "14px")
      if font_size.ends_with? "px"
        @fs = font_size[0...-2].to_i
      else
        @fs = font_size.to_i
      end
      @ystep = @fs + 5
    end
    def format(text : String, lexer : BaseLexer, io : IO) : Nil
      pre, post = wrap_standalone
      io << pre if standalone?
      format_text(text, lexer, io)
      io << post if standalone?
    end
    # Wrap text into a full HTML document, including the CSS for the theme
    def wrap_standalone
      output = String.build do |outp|
        outp << %(<?xml version="1.0" encoding="utf-8"?>
        <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.0//EN" "http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd">
        <svg xmlns="http://www.w3.org/2000/svg">
        <g font-family="#{self.@font_family}" font-size="#{self.@font_size}">)
      end
      {output.to_s, "</g></svg>"}
    end
    private def line_label(i : Int32, x : Int32, y : Int32) : String
      line_label = "#{i + 1}".rjust(4).ljust(5)
      line_style = highlighted?(i + 1) ? "font-weight=\"#{@weight_of_bold}\"" : ""
      line_id = linkable_line_numbers? ? "id=\"#{line_number_id_prefix}#{i + 1}\"" : ""
      %(<text #{line_style} #{line_id}  x="#{4*ystep}" y="#{y}" text-anchor="end">#{line_label}</text>)
    end
    def format_text(text : String, lexer : BaseLexer, outp : IO)
      x = 0
      y = ystep
      i = 0
      line_x = x
      line_x += 5 * ystep if line_numbers?
      tokenizer = lexer.tokenizer(text)
      outp << line_label(i, x, y) if line_numbers?
      outp << %(<text x="#{line_x}" y="#{y}" xml:space="preserve">)
      tokenizer.each do |token|
        if token[:value].ends_with? "\n"
          outp << "<tspan #{get_style(token[:type])}>#{HTML.escape(token[:value][0...-1])}</tspan>"
          outp << "</text>"
          x = 0
          y += ystep
          i += 1
          outp << line_label(i, x, y) if line_numbers?
          outp << %(<text x="#{line_x}" y="#{y}" xml:space="preserve">)
        else
          outp << "<tspan#{get_style(token[:type])}>#{HTML.escape(token[:value])}</tspan>"
          x += token[:value].size * ystep
        end
      end
      outp << "</text>"
    end
    # Given a token type, return the style.
    def get_style(token : String) : String
      if !theme.styles.has_key? token
        # Themes don't contain information for each specific
        # token type. However, they may contain information
        # for a parent style. Worst case, we go to the root
        # (Background) style.
        parent = theme.style_parents(token).reverse.find { |dad|
          theme.styles.has_key?(dad)
        }
        theme.styles[token] = theme.styles[parent]
      end
      output = String.build do |outp|
        style = theme.styles[token]
        outp << " fill=\"##{style.color.try &.hex}\"" if style.color
        # No support for background color or border in SVG
        outp << " font-weight=\"#{@weight_of_bold}\"" if style.bold
        outp << " font-weight=\"normal\"" if style.bold == false
        outp << " font-style=\"italic\"" if style.italic
        outp << " font-style=\"normal\"" if style.italic == false
        outp << " text-decoration=\"underline\"" if style.underline
        outp << " text-decoration=\"none" if style.underline == false
      end
      output
    end
  end
 end
--- a/src/main.cr
+++ b/src/main.cr
@ -11,6 +11,8 @@ Usage:
  tartrazine -f html -t theme --css
  tartrazine FILE -f terminal [-t theme][-l lexer][--line-numbers]
                              [-o output]
  tartrazine FILE -f svg  [-t theme][--standalone][--line-numbers]
                          [-l lexer][-o output]
  tartrazine FILE -f json [-o output]
  tartrazine --list-themes
  tartrazine --list-lexers
@ -18,7 +20,7 @@ Usage:
  tartrazine --version
 Options:
-  -f <formatter>      Format to use (html, terminal, json)
+  -f <formatter>      Format to use (html, terminal, json, svg)
  -t <theme>          Theme to use, see --list-themes [default: default-dark]
  -l <lexer>          Lexer (language) to use, see --list-lexers. Use more than
                      one lexer with "+" (e.g. jinja+yaml) [default: autodetect]
@ -71,6 +73,11 @@ if options["-f"]
    formatter.theme = theme
  when "json"
    formatter = Tartrazine::Json.new
  when "svg"
    formatter = Tartrazine::Svg.new
    formatter.standalone = options["--standalone"] != nil
    formatter.line_numbers = options["--line-numbers"] != nil
    formatter.theme = theme
  else
    puts "Invalid formatter: #{formatter}"
    exit 1
Author	SHA1	Message	Date
Roberto Alsina	56c2b4599a	fix: HTML formatter was setting bold wrong Some checks are pending Tests / build (push) Waiting to run Details	2024-09-21 12:56:40 -03:00
Roberto Alsina	9c70fbf389	feat: SVG formatter	2024-09-21 12:56:40 -03:00
		`@ -0,0 +1 @@`
							`[{"type":"Text","value":"puts "},{"type":"LiteralString","value":"\"Hello Crystal!\""},{"type":"Text","value":"\n"}]`