Tweaks

2025-08-05 07:45:33 +00:00 · 2024-08-06 17:01:14 -03:00
parent fe917da89d
commit a2394a7313
6 changed files with 133 additions and 42 deletions
--- a/README.md
+++ b/README.md
@@ -4,10 +4,11 @@ Tartrazine is a library to syntax-highlight code. It is
 a port of [Pygments](https://pygments.org/) to
 [Crystal](https://crystal-lang.org/). Kind of.
-It's not currently usable unless what you need is a way
+It's not currently usable because it's not finished, but:
-to turn your files into a pile of json describing its
+
-constituent tokens, because I have not implemented any
+* The lexers work for the implemented languages
-formatters, yet, only the part that parses the code (the lexers).
+* The provided styles work
 * There is a very very simple HTML formatter
 # A port of what? Why "kind of"?
@@ -32,15 +33,21 @@ Currently Tartrazine supports ... 241 languages.
 ## Installation
-If you need to ask how to install this, it's not ready for you yet.
+This will have a CLI tool that can be installed, but it's not
 there yet.
 ## Usage
-If you need to ask how to use this, it's not ready for you yet.
+This works:
-## Development
+```crystal
 require "tartrazine"
-TODO: Write development instructions here
+lexer = Tartrazine.lexer("crystal")
 theme = Tartrazine.theme("catppuccin-macchiato")
 puts Tartrazine::Html.new.format(File.read(ARGV[0]), lexer, theme)
 ```
 ## Contributing
--- a/spec/tartrazine_spec.cr
+++ b/spec/tartrazine_spec.cr
@@ -74,8 +74,8 @@ end
 # Helper that creates lexer and tokenizes
 def tokenize(lexer_name, text)
-  lexer = Tartrazine.get_lexer(lexer_name)
+  lexer = Tartrazine.lexer(lexer_name)
-  collapse_tokens(lexer.tokenize(text))
+  lexer.tokenize(text)
 end
 # Helper that tokenizes using chroma to validate the lexer
@@ -87,26 +87,5 @@ def chroma_tokenize(lexer_name, text)
    ["-f", "json", "-l", lexer_name],
    input: input, output: output
  )
-  collapse_tokens(Array(Tartrazine::Token).from_json(output.to_s))
+  Tartrazine::Lexer.collapse_tokens(Array(Tartrazine::Token).from_json(output.to_s))
 end
 # Collapse consecutive tokens of the same type for easier comparison
 def collapse_tokens(tokens : Array(Tartrazine::Token))
  result = [] of Tartrazine::Token
  tokens.each do |token|
    if result.empty?
      result << token
      next
    end
    last = result.last
    if last[:type] == token[:type]
      new_token = {type: last[:type], value: last[:value] + token[:value]}
      result.pop
      result << new_token
    else
      result << token
    end
  end
  result
 end
--- a/src/actions.cr
+++ b/src/actions.cr
@@ -82,7 +82,7 @@ module Tartrazine
        return [] of Token if match.nil?
        lexer_name = xml["lexer"].downcase
        Log.trace { "to tokenize: #{match[match_group]}" }
-        Tartrazine.get_lexer(lexer_name).tokenize(match[match_group], usingself: true)
+        Tartrazine.lexer(lexer_name).tokenize(match[match_group], usingself: true)
      when "usingself"
        # Shunt to another copy of this lexer
        return [] of Token if match.nil?
--- a/src/formatter.cr
+++ b/src/formatter.cr
@@ -0,0 +1,72 @@
 require "./tartrazine.cr"
 require "./styles.cr"
 module Tartrazine
  # This is the base class for all formatters.
  abstract class Formatter
    property name : String = ""
    def format(text : String, lexer : Lexer, theme : Theme) : String
      raise Exception.new("Not implemented")
    end
    # ameba:disable Metrics/CyclomaticComplexity
    def get_style_defs(theme : Theme) : String
      output = String.build do |outp|
        theme.styles.each do |token, style|
          outp << ".#{token} {"
          # These are set or nil
          outp << "color: #{style.color};" if style.color
          outp << "background-color: #{style.background};" if style.background
          outp << "border: 1px solid #{style.border};" if style.border
          # These are true/false/nil
          outp << "border: none;" if style.border == false
          outp << "font-weight: bold;" if style.bold
          outp << "font-weight: 400;" if style.bold == false
          outp << "font-style: italic;" if style.italic
          outp << "font-style: normal;" if style.italic == false
          outp << "text-decoration: underline;" if style.underline
          outp << "text-decoration: none;" if style.underline == false
          outp << "}"
        end
      end
      output
    end
  end
  class Html < Formatter
    def format(text : String, lexer : Lexer, theme : Theme) : String
      output = String.build do |outp|
        outp << "<html><head><style>"
        outp << get_style_defs(theme)
        outp << "</style></head><body>"
        outp << "<pre class=\"Background\"><code class=\"Background\">"
        lexer.tokenize(text).each do |token|
          fragment = "<span class=\"#{get_css_class(token[:type], theme)}\">#{token[:value]}</span>"
          outp << fragment
        end
        outp << "</code></pre></body></html>"
      end
      output
    end
    # Given a token type, return the CSS class to use.
    def get_css_class(token, theme)
      return token if theme.styles.has_key?(token)
      # Themes don't contain information for each specific
      # token type. However, they may contain information
      # for a parent style. Worst case, we go to the root
      # (Background) style.
      theme.style_parents(token).reverse.find { |parent|
        theme.styles.has_key?(parent)
      }
    end
  end
 end
 lexer = Tartrazine.lexer("crystal")
 theme = Tartrazine.theme("catppuccin-macchiato")
 puts Tartrazine::Html.new.format(File.read(ARGV[0]), lexer, theme)
--- a/src/styles.cr
+++ b/src/styles.cr
@@ -1,6 +1,11 @@
 require "xml"
 module Tartrazine
  def self.theme(name : String) : Theme
    path = File.join("styles", "#{name}.xml")
    Theme.from_xml(File.read(path))
  end
  class Style
    # These properties are tri-state.
    # true means it's set
@@ -52,11 +57,7 @@ module Tartrazine
      return s if s.complete?
      # Form the hierarchy of parent styles
-      parents = ["Background"]
+      parents = style_parents(token)
      parts = token.underscore.split("_").map(&.capitalize)
      parts.each_with_index do |_, i|
        parents << parts[..i].join("")
      end
      s = parents.map do |parent|
        styles[parent]
@@ -68,6 +69,15 @@ module Tartrazine
      s
    end
    def style_parents(token)
      parents = ["Background"]
      parts = token.underscore.split("_").map(&.capitalize)
      parts.each_with_index do |_, i|
        parents << parts[..i].join("")
      end
      parents
    end
    # Load from a Chroma XML file
    def self.from_xml(xml : String) : Theme
      document = XML.parse(xml)
--- a/src/tartrazine.cr
+++ b/src/tartrazine.cr
@@ -6,6 +6,7 @@ require "log"
 require "xml"
 module Tartrazine
  extend self
  VERSION = "0.1.0"
  Log = ::Log.for("tartrazine")
@@ -13,7 +14,7 @@ module Tartrazine
  # This implements a lexer for Pygments RegexLexers as expressed
  # in Chroma's XML serialization.
  #
-  # For explanations on what actions, transformers, etc do
+  # For explanations on what actions and states do
  # the Pygments documentation is a good place to start.
  # https://pygments.org/docs/lexerdevelopment/
@@ -82,7 +83,29 @@ module Tartrazine
          pos += 1
        end
      end
-      tokens.reject { |token| token[:value] == "" }
+      Lexer.collapse_tokens(tokens)
    end
    # Collapse consecutive tokens of the same type for easier comparison
    # and smaller output
    def self.collapse_tokens(tokens : Array(Tartrazine::Token)) : Array(Tartrazine::Token)
      result = [] of Tartrazine::Token
      tokens = tokens.reject { |token| token[:value] == "" }
      tokens.each do |token|
        if result.empty?
          result << token
          next
        end
        last = result.last
        if last[:type] == token[:type]
          new_token = {type: last[:type], value: last[:value] + token[:value]}
          result.pop
          result << new_token
        else
          result << token
        end
      end
      result
    end
    # ameba:disable Metrics/CyclomaticComplexity
@@ -149,7 +172,7 @@ module Tartrazine
    end
  end
-  def self.get_lexer(name : String) : Lexer
+  def self.lexer(name : String) : Lexer
    Lexer.from_xml(File.read("lexers/#{name}.xml"))
  end