diff --git a/src/formatter.cr b/src/formatter.cr index a64863d..418c4c9 100644 --- a/src/formatter.cr +++ b/src/formatter.cr @@ -19,93 +19,4 @@ module Tartrazine raise Exception.new("Not implemented") end end - - class Ansi < Formatter - def format(text : String, lexer : Lexer, theme : Theme) : String - output = String.build do |outp| - lexer.tokenize(text).each do |token| - outp << self.colorize(token[:value], token[:type], theme) - end - end - output - end - - def colorize(text : String, token : String, theme : Theme) : String - style = theme.styles.fetch(token, nil) - return text if style.nil? - if theme.styles.has_key?(token) - s = theme.styles[token] - else - # Themes don't contain information for each specific - # token type. However, they may contain information - # for a parent style. Worst case, we go to the root - # (Background) style. - s = theme.styles[theme.style_parents(token).reverse.find { |parent| - theme.styles.has_key?(parent) - }] - end - colorized = text.colorize - s.color.try { |c| colorized = colorized.fore(c.colorize) } - # Intentionally not setting background color - colorized.mode(:bold) if s.bold - colorized.mode(:italic) if s.italic - colorized.mode(:underline) if s.underline - colorized.to_s - end - end - - class Html < Formatter - def format(text : String, lexer : Lexer, theme : Theme) : String - output = String.build do |outp| - outp << "
" - outp << ""
- lexer.tokenize(text).each do |token|
- fragment = "#{token[:value]}"
- outp << fragment
- end
- outp << "
"
- end
- output
- end
-
- # ameba:disable Metrics/CyclomaticComplexity
- def get_style_defs(theme : Theme) : String
- output = String.build do |outp|
- theme.styles.each do |token, style|
- outp << ".#{get_css_class(token, theme)} {"
- # These are set or nil
- outp << "color: #{style.color.try &.hex};" if style.color
- outp << "background-color: #{style.background.try &.hex};" if style.background
- outp << "border: 1px solid #{style.border.try &.hex};" if style.border
-
- # These are true/false/nil
- outp << "border: none;" if style.border == false
- outp << "font-weight: bold;" if style.bold
- outp << "font-weight: 400;" if style.bold == false
- outp << "font-style: italic;" if style.italic
- outp << "font-style: normal;" if style.italic == false
- outp << "text-decoration: underline;" if style.underline
- outp << "text-decoration: none;" if style.underline == false
-
- outp << "}"
- end
- end
- output
- end
-
- # Given a token type, return the CSS class to use.
- def get_css_class(token, theme)
- return Abbreviations[token] if theme.styles.has_key?(token)
-
- # Themes don't contain information for each specific
- # token type. However, they may contain information
- # for a parent style. Worst case, we go to the root
- # (Background) style.
- Abbreviations[theme.style_parents(token).reverse.find { |parent|
- theme.styles.has_key?(parent)
- }]
- end
- end
end
diff --git a/src/formatters/ansi.cr b/src/formatters/ansi.cr
new file mode 100644
index 0000000..ba8b740
--- /dev/null
+++ b/src/formatters/ansi.cr
@@ -0,0 +1,37 @@
+require "../formatter"
+
+module Tartrazine
+ class Ansi < Formatter
+ def format(text : String, lexer : Lexer, theme : Theme) : String
+ output = String.build do |outp|
+ lexer.tokenize(text).each do |token|
+ outp << self.colorize(token[:value], token[:type], theme)
+ end
+ end
+ output
+ end
+
+ def colorize(text : String, token : String, theme : Theme) : String
+ style = theme.styles.fetch(token, nil)
+ return text if style.nil?
+ if theme.styles.has_key?(token)
+ s = theme.styles[token]
+ else
+ # Themes don't contain information for each specific
+ # token type. However, they may contain information
+ # for a parent style. Worst case, we go to the root
+ # (Background) style.
+ s = theme.styles[theme.style_parents(token).reverse.find { |parent|
+ theme.styles.has_key?(parent)
+ }]
+ end
+ colorized = text.colorize
+ s.color.try { |c| colorized = colorized.fore(c.colorize) }
+ # Intentionally not setting background color
+ colorized.mode(:bold) if s.bold
+ colorized.mode(:italic) if s.italic
+ colorized.mode(:underline) if s.underline
+ colorized.to_s
+ end
+ end
+end
diff --git a/src/formatters/html.cr b/src/formatters/html.cr
new file mode 100644
index 0000000..6110f36
--- /dev/null
+++ b/src/formatters/html.cr
@@ -0,0 +1,59 @@
+require "../formatter"
+
+module Tartrazine
+ class Html < Formatter
+ def format(text : String, lexer : Lexer, theme : Theme) : String
+ output = String.build do |outp|
+ outp << ""
+ outp << ""
+ lexer.tokenize(text).each do |token|
+ fragment = "#{token[:value]}"
+ outp << fragment
+ end
+ outp << "
"
+ end
+ output
+ end
+
+ # ameba:disable Metrics/CyclomaticComplexity
+ def get_style_defs(theme : Theme) : String
+ output = String.build do |outp|
+ theme.styles.each do |token, style|
+ outp << ".#{get_css_class(token, theme)} {"
+ # These are set or nil
+ outp << "color: #{style.color.try &.hex};" if style.color
+ outp << "background-color: #{style.background.try &.hex};" if style.background
+ outp << "border: 1px solid #{style.border.try &.hex};" if style.border
+
+ # These are true/false/nil
+ outp << "border: none;" if style.border == false
+ outp << "font-weight: bold;" if style.bold
+ outp << "font-weight: 400;" if style.bold == false
+ outp << "font-style: italic;" if style.italic
+ outp << "font-style: normal;" if style.italic == false
+ outp << "text-decoration: underline;" if style.underline
+ outp << "text-decoration: none;" if style.underline == false
+
+ outp << "}"
+ end
+ end
+ output
+ end
+
+ # Given a token type, return the CSS class to use.
+ def get_css_class(token, theme)
+ return Abbreviations[token] if theme.styles.has_key?(token)
+
+ # Themes don't contain information for each specific
+ # token type. However, they may contain information
+ # for a parent style. Worst case, we go to the root
+ # (Background) style.
+ Abbreviations[theme.style_parents(token).reverse.find { |parent|
+ theme.styles.has_key?(parent)
+ }]
+ end
+ end
+
+end
\ No newline at end of file
diff --git a/src/lexer.cr b/src/lexer.cr
new file mode 100644
index 0000000..c18ab31
--- /dev/null
+++ b/src/lexer.cr
@@ -0,0 +1,180 @@
+module Tartrazine
+ class LexerFiles
+ extend BakedFileSystem
+
+ bake_folder "../lexers", __DIR__
+ end
+
+ # This implements a lexer for Pygments RegexLexers as expressed
+ # in Chroma's XML serialization.
+ #
+ # For explanations on what actions and states do
+ # the Pygments documentation is a good place to start.
+ # https://pygments.org/docs/lexerdevelopment/
+ class Lexer
+ property config = {
+ name: "",
+ aliases: [] of String,
+ filenames: [] of String,
+ mime_types: [] of String,
+ priority: 0.0,
+ case_insensitive: false,
+ dot_all: false,
+ not_multiline: false,
+ ensure_nl: false,
+ }
+ property xml : String = ""
+
+ property states = {} of String => State
+
+ property state_stack = ["root"]
+
+ # Turn the text into a list of tokens. The `usingself` parameter
+ # is true when the lexer is being used to tokenize a string
+ # from a larger text that is already being tokenized.
+ # So, when it's true, we don't modify the text.
+ def tokenize(text, usingself = false) : Array(Token)
+ @state_stack = ["root"]
+ tokens = [] of Token
+ pos = 0
+ matched = false
+
+ # Respect the `ensure_nl` config option
+ if text.size > 0 && text[-1] != '\n' && config[:ensure_nl] && !usingself
+ text += "\n"
+ end
+
+ # Loop through the text, applying rules
+ while pos < text.size
+ state = states[@state_stack.last]
+ # Log.trace { "Stack is #{@state_stack} State is #{state.name}, pos is #{pos}, text is #{text[pos..pos + 10]}" }
+ state.rules.each do |rule|
+ matched, new_pos, new_tokens = rule.match(text, pos, self)
+ if matched
+ # Move position forward, save the tokens,
+ # tokenize from the new position
+ # Log.trace { "MATCHED: #{rule.xml}" }
+ pos = new_pos
+ tokens += new_tokens
+ break
+ end
+ # Log.trace { "NOT MATCHED: #{rule.xml}" }
+ end
+ # If no rule matches, emit an error token
+ unless matched
+ # Log.trace { "Error at #{pos}" }
+ tokens << {type: "Error", value: "#{text[pos]}"}
+ pos += 1
+ end
+ end
+ Lexer.collapse_tokens(tokens)
+ end
+
+ # Collapse consecutive tokens of the same type for easier comparison
+ # and smaller output
+ def self.collapse_tokens(tokens : Array(Tartrazine::Token)) : Array(Tartrazine::Token)
+ result = [] of Tartrazine::Token
+ tokens = tokens.reject { |token| token[:value] == "" }
+ tokens.each do |token|
+ if result.empty?
+ result << token
+ next
+ end
+ last = result.last
+ if last[:type] == token[:type]
+ new_token = {type: last[:type], value: last[:value] + token[:value]}
+ result.pop
+ result << new_token
+ else
+ result << token
+ end
+ end
+ result
+ end
+
+ # ameba:disable Metrics/CyclomaticComplexity
+ def self.from_xml(xml : String) : Lexer
+ l = Lexer.new
+ l.xml = xml
+ lexer = XML.parse(xml).first_element_child
+ if lexer
+ config = lexer.children.find { |node|
+ node.name == "config"
+ }
+ if config
+ l.config = {
+ name: xml_to_s(config, name) || "",
+ aliases: xml_to_a(config, _alias) || [] of String,
+ filenames: xml_to_a(config, filename) || [] of String,
+ mime_types: xml_to_a(config, mime_type) || [] of String,
+ priority: xml_to_f(config, priority) || 0.0,
+ not_multiline: xml_to_s(config, not_multiline) == "true",
+ dot_all: xml_to_s(config, dot_all) == "true",
+ case_insensitive: xml_to_s(config, case_insensitive) == "true",
+ ensure_nl: xml_to_s(config, ensure_nl) == "true",
+ }
+ end
+
+ rules = lexer.children.find { |node|
+ node.name == "rules"
+ }
+ if rules
+ # Rules contains states 🤷
+ rules.children.select { |node|
+ node.name == "state"
+ }.each do |state_node|
+ state = State.new
+ state.name = state_node["name"]
+ if l.states.has_key?(state.name)
+ raise Exception.new("Duplicate state: #{state.name}")
+ else
+ l.states[state.name] = state
+ end
+ # And states contain rules 🤷
+ state_node.children.select { |node|
+ node.name == "rule"
+ }.each do |rule_node|
+ case rule_node["pattern"]?
+ when nil
+ if rule_node.first_element_child.try &.name == "include"
+ rule = IncludeStateRule.new(rule_node)
+ else
+ rule = UnconditionalRule.new(rule_node)
+ end
+ else
+ rule = Rule.new(rule_node,
+ multiline: !l.config[:not_multiline],
+ dotall: l.config[:dot_all],
+ ignorecase: l.config[:case_insensitive])
+ end
+ state.rules << rule
+ end
+ end
+ end
+ end
+ l
+ end
+ end
+
+ # A Lexer state. A state has a name and a list of rules.
+ # The state machine has a state stack containing references
+ # to states to decide which rules to apply.
+ class State
+ property name : String = ""
+ property rules = [] of Rule
+
+ def +(other : State)
+ new_state = State.new
+ new_state.name = Random.base58(8)
+ new_state.rules = rules + other.rules
+ new_state
+ end
+ end
+
+ # A token, the output of the tokenizer
+ alias Token = NamedTuple(type: String, value: String)
+
+ def self.lexer(name : String) : Lexer
+ Lexer.from_xml(LexerFiles.get("/#{name}.xml").gets_to_end)
+ end
+end
diff --git a/src/rules.cr b/src/rules.cr
index 7761e4a..d84e85b 100644
--- a/src/rules.cr
+++ b/src/rules.cr
@@ -3,7 +3,7 @@ require "./constants"
require "./formatter"
require "./rules"
require "./styles"
-require "./tartrazine"
+require "./lexer"
# These are lexer rules. They match with the text being parsed
# and perform actions, either emitting tokens or changing the
diff --git a/src/tartrazine.cr b/src/tartrazine.cr
index e6a7a84..8665f0f 100644
--- a/src/tartrazine.cr
+++ b/src/tartrazine.cr
@@ -15,186 +15,6 @@ module Tartrazine
VERSION = "0.1.1"
Log = ::Log.for("tartrazine")
-
- # This implements a lexer for Pygments RegexLexers as expressed
- # in Chroma's XML serialization.
- #
- # For explanations on what actions and states do
- # the Pygments documentation is a good place to start.
- # https://pygments.org/docs/lexerdevelopment/
-
- # A Lexer state. A state has a name and a list of rules.
- # The state machine has a state stack containing references
- # to states to decide which rules to apply.
- class State
- property name : String = ""
- property rules = [] of Rule
-
- def +(other : State)
- new_state = State.new
- new_state.name = Random.base58(8)
- new_state.rules = rules + other.rules
- new_state
- end
- end
-
- class LexerFiles
- extend BakedFileSystem
-
- bake_folder "../lexers", __DIR__
- end
-
- # A token, the output of the tokenizer
- alias Token = NamedTuple(type: String, value: String)
-
- class Lexer
- property config = {
- name: "",
- aliases: [] of String,
- filenames: [] of String,
- mime_types: [] of String,
- priority: 0.0,
- case_insensitive: false,
- dot_all: false,
- not_multiline: false,
- ensure_nl: false,
- }
- property xml : String = ""
-
- property states = {} of String => State
-
- property state_stack = ["root"]
-
- # Turn the text into a list of tokens. The `usingself` parameter
- # is true when the lexer is being used to tokenize a string
- # from a larger text that is already being tokenized.
- # So, when it's true, we don't modify the text.
- def tokenize(text, usingself = false) : Array(Token)
- @state_stack = ["root"]
- tokens = [] of Token
- pos = 0
- matched = false
-
- # Respect the `ensure_nl` config option
- if text.size > 0 && text[-1] != '\n' && config[:ensure_nl] && !usingself
- text += "\n"
- end
-
- # Loop through the text, applying rules
- while pos < text.size
- state = states[@state_stack.last]
- # Log.trace { "Stack is #{@state_stack} State is #{state.name}, pos is #{pos}, text is #{text[pos..pos + 10]}" }
- state.rules.each do |rule|
- matched, new_pos, new_tokens = rule.match(text, pos, self)
- if matched
- # Move position forward, save the tokens,
- # tokenize from the new position
- # Log.trace { "MATCHED: #{rule.xml}" }
- pos = new_pos
- tokens += new_tokens
- break
- end
- # Log.trace { "NOT MATCHED: #{rule.xml}" }
- end
- # If no rule matches, emit an error token
- unless matched
- # Log.trace { "Error at #{pos}" }
- tokens << {type: "Error", value: "#{text[pos]}"}
- pos += 1
- end
- end
- Lexer.collapse_tokens(tokens)
- end
-
- # Collapse consecutive tokens of the same type for easier comparison
- # and smaller output
- def self.collapse_tokens(tokens : Array(Tartrazine::Token)) : Array(Tartrazine::Token)
- result = [] of Tartrazine::Token
- tokens = tokens.reject { |token| token[:value] == "" }
- tokens.each do |token|
- if result.empty?
- result << token
- next
- end
- last = result.last
- if last[:type] == token[:type]
- new_token = {type: last[:type], value: last[:value] + token[:value]}
- result.pop
- result << new_token
- else
- result << token
- end
- end
- result
- end
-
- # ameba:disable Metrics/CyclomaticComplexity
- def self.from_xml(xml : String) : Lexer
- l = Lexer.new
- l.xml = xml
- lexer = XML.parse(xml).first_element_child
- if lexer
- config = lexer.children.find { |node|
- node.name == "config"
- }
- if config
- l.config = {
- name: xml_to_s(config, name) || "",
- aliases: xml_to_a(config, _alias) || [] of String,
- filenames: xml_to_a(config, filename) || [] of String,
- mime_types: xml_to_a(config, mime_type) || [] of String,
- priority: xml_to_f(config, priority) || 0.0,
- not_multiline: xml_to_s(config, not_multiline) == "true",
- dot_all: xml_to_s(config, dot_all) == "true",
- case_insensitive: xml_to_s(config, case_insensitive) == "true",
- ensure_nl: xml_to_s(config, ensure_nl) == "true",
- }
- end
-
- rules = lexer.children.find { |node|
- node.name == "rules"
- }
- if rules
- # Rules contains states 🤷
- rules.children.select { |node|
- node.name == "state"
- }.each do |state_node|
- state = State.new
- state.name = state_node["name"]
- if l.states.has_key?(state.name)
- raise Exception.new("Duplicate state: #{state.name}")
- else
- l.states[state.name] = state
- end
- # And states contain rules 🤷
- state_node.children.select { |node|
- node.name == "rule"
- }.each do |rule_node|
- case rule_node["pattern"]?
- when nil
- if rule_node.first_element_child.try &.name == "include"
- rule = IncludeStateRule.new(rule_node)
- else
- rule = UnconditionalRule.new(rule_node)
- end
- else
- rule = Rule.new(rule_node,
- multiline: !l.config[:not_multiline],
- dotall: l.config[:dot_all],
- ignorecase: l.config[:case_insensitive])
- end
- state.rules << rule
- end
- end
- end
- end
- l
- end
- end
-
- def self.lexer(name : String) : Lexer
- Lexer.from_xml(LexerFiles.get("/#{name}.xml").gets_to_end)
- end
end
# Convenience macros to parse XML