mirror of
https://github.com/ralsina/tartrazine.git
synced 2025-08-02 13:59:51 +00:00
Compare commits
1 Commits
ac8b7e3800
...
v0.7.0
Author | SHA1 | Date | |
---|---|---|---|
f2d802e391 |
107
.ameba.yml
107
.ameba.yml
@@ -1,9 +1,9 @@
|
|||||||
# This configuration file was generated by `ameba --gen-config`
|
# This configuration file was generated by `ameba --gen-config`
|
||||||
# on 2024-09-11 00:56:14 UTC using Ameba version 1.6.1.
|
# on 2024-08-12 22:00:49 UTC using Ameba version 1.6.1.
|
||||||
# The point is for the user to remove these configuration records
|
# The point is for the user to remove these configuration records
|
||||||
# one by one as the reported problems are removed from the code base.
|
# one by one as the reported problems are removed from the code base.
|
||||||
|
|
||||||
# Problems found: 4
|
# Problems found: 2
|
||||||
# Run `ameba --only Documentation/DocumentationAdmonition` for details
|
# Run `ameba --only Documentation/DocumentationAdmonition` for details
|
||||||
Documentation/DocumentationAdmonition:
|
Documentation/DocumentationAdmonition:
|
||||||
Description: Reports documentation admonitions
|
Description: Reports documentation admonitions
|
||||||
@@ -11,10 +11,111 @@ Documentation/DocumentationAdmonition:
|
|||||||
Excluded:
|
Excluded:
|
||||||
- src/lexer.cr
|
- src/lexer.cr
|
||||||
- src/actions.cr
|
- src/actions.cr
|
||||||
- spec/examples/crystal/lexer_spec.cr
|
|
||||||
Admonitions:
|
Admonitions:
|
||||||
- TODO
|
- TODO
|
||||||
- FIXME
|
- FIXME
|
||||||
- BUG
|
- BUG
|
||||||
Enabled: true
|
Enabled: true
|
||||||
Severity: Warning
|
Severity: Warning
|
||||||
|
|
||||||
|
# Problems found: 22
|
||||||
|
# Run `ameba --only Lint/MissingBlockArgument` for details
|
||||||
|
Lint/MissingBlockArgument:
|
||||||
|
Description: Disallows yielding method definitions without block argument
|
||||||
|
Excluded:
|
||||||
|
- pygments/tests/examplefiles/cr/test.cr
|
||||||
|
Enabled: true
|
||||||
|
Severity: Warning
|
||||||
|
|
||||||
|
# Problems found: 1
|
||||||
|
# Run `ameba --only Lint/NotNil` for details
|
||||||
|
Lint/NotNil:
|
||||||
|
Description: Identifies usage of `not_nil!` calls
|
||||||
|
Excluded:
|
||||||
|
- pygments/tests/examplefiles/cr/test.cr
|
||||||
|
Enabled: true
|
||||||
|
Severity: Warning
|
||||||
|
|
||||||
|
# Problems found: 34
|
||||||
|
# Run `ameba --only Lint/ShadowingOuterLocalVar` for details
|
||||||
|
Lint/ShadowingOuterLocalVar:
|
||||||
|
Description: Disallows the usage of the same name as outer local variables for block
|
||||||
|
or proc arguments
|
||||||
|
Excluded:
|
||||||
|
- pygments/tests/examplefiles/cr/test.cr
|
||||||
|
Enabled: true
|
||||||
|
Severity: Warning
|
||||||
|
|
||||||
|
# Problems found: 1
|
||||||
|
# Run `ameba --only Lint/UnreachableCode` for details
|
||||||
|
Lint/UnreachableCode:
|
||||||
|
Description: Reports unreachable code
|
||||||
|
Excluded:
|
||||||
|
- pygments/tests/examplefiles/cr/test.cr
|
||||||
|
Enabled: true
|
||||||
|
Severity: Warning
|
||||||
|
|
||||||
|
# Problems found: 6
|
||||||
|
# Run `ameba --only Lint/UselessAssign` for details
|
||||||
|
Lint/UselessAssign:
|
||||||
|
Description: Disallows useless variable assignments
|
||||||
|
ExcludeTypeDeclarations: false
|
||||||
|
Excluded:
|
||||||
|
- pygments/tests/examplefiles/cr/test.cr
|
||||||
|
Enabled: true
|
||||||
|
Severity: Warning
|
||||||
|
|
||||||
|
# Problems found: 3
|
||||||
|
# Run `ameba --only Naming/BlockParameterName` for details
|
||||||
|
Naming/BlockParameterName:
|
||||||
|
Description: Disallows non-descriptive block parameter names
|
||||||
|
MinNameLength: 3
|
||||||
|
AllowNamesEndingInNumbers: true
|
||||||
|
Excluded:
|
||||||
|
- pygments/tests/examplefiles/cr/test.cr
|
||||||
|
AllowedNames:
|
||||||
|
- _
|
||||||
|
- e
|
||||||
|
- i
|
||||||
|
- j
|
||||||
|
- k
|
||||||
|
- v
|
||||||
|
- x
|
||||||
|
- y
|
||||||
|
- ex
|
||||||
|
- io
|
||||||
|
- ws
|
||||||
|
- op
|
||||||
|
- tx
|
||||||
|
- id
|
||||||
|
- ip
|
||||||
|
- k1
|
||||||
|
- k2
|
||||||
|
- v1
|
||||||
|
- v2
|
||||||
|
ForbiddenNames: []
|
||||||
|
Enabled: true
|
||||||
|
Severity: Convention
|
||||||
|
|
||||||
|
# Problems found: 1
|
||||||
|
# Run `ameba --only Naming/RescuedExceptionsVariableName` for details
|
||||||
|
Naming/RescuedExceptionsVariableName:
|
||||||
|
Description: Makes sure that rescued exceptions variables are named as expected
|
||||||
|
Excluded:
|
||||||
|
- pygments/tests/examplefiles/cr/test.cr
|
||||||
|
AllowedNames:
|
||||||
|
- e
|
||||||
|
- ex
|
||||||
|
- exception
|
||||||
|
- error
|
||||||
|
Enabled: true
|
||||||
|
Severity: Convention
|
||||||
|
|
||||||
|
# Problems found: 6
|
||||||
|
# Run `ameba --only Naming/TypeNames` for details
|
||||||
|
Naming/TypeNames:
|
||||||
|
Description: Enforces type names in camelcase manner
|
||||||
|
Excluded:
|
||||||
|
- pygments/tests/examplefiles/cr/test.cr
|
||||||
|
Enabled: true
|
||||||
|
Severity: Convention
|
||||||
|
19
CHANGELOG.md
19
CHANGELOG.md
@@ -2,17 +2,29 @@
|
|||||||
|
|
||||||
All notable changes to this project will be documented in this file.
|
All notable changes to this project will be documented in this file.
|
||||||
|
|
||||||
## [0.6.4] - 2024-08-28
|
## [0.7.0] - 2024-09-10
|
||||||
|
|
||||||
|
### 🚀 Features
|
||||||
|
|
||||||
|
- Higher level API (`to_html` and `to_ansi`)
|
||||||
|
- Use the native crystal highlighter
|
||||||
|
|
||||||
### 🐛 Bug Fixes
|
### 🐛 Bug Fixes
|
||||||
|
|
||||||
- Ameba
|
- Ameba
|
||||||
- Variable bame in Hacefile
|
- Variable bame in Hacefile
|
||||||
|
- Make it easier to import the Ansi formatter
|
||||||
|
- Renamed BaseLexer to Lexer and Lexer to RegexLexer to make API nicer
|
||||||
|
- Make install work
|
||||||
|
|
||||||
### 📚 Documentation
|
### 📚 Documentation
|
||||||
|
|
||||||
- Mention AUR package
|
- Mention AUR package
|
||||||
|
|
||||||
|
### 🧪 Testing
|
||||||
|
|
||||||
|
- Add CI workflows
|
||||||
|
|
||||||
### ⚙️ Miscellaneous Tasks
|
### ⚙️ Miscellaneous Tasks
|
||||||
|
|
||||||
- Pre-commit hooks
|
- Pre-commit hooks
|
||||||
@@ -21,6 +33,10 @@ All notable changes to this project will be documented in this file.
|
|||||||
- Force conventional commit messages
|
- Force conventional commit messages
|
||||||
- Force conventional commit messages
|
- Force conventional commit messages
|
||||||
- Updated pre-commit
|
- Updated pre-commit
|
||||||
|
- *(ignore)* Fix tests
|
||||||
|
- Added badges
|
||||||
|
- Added badges
|
||||||
|
- *(ignore)* Removed random file
|
||||||
|
|
||||||
### Build
|
### Build
|
||||||
|
|
||||||
@@ -30,6 +46,7 @@ All notable changes to this project will be documented in this file.
|
|||||||
|
|
||||||
### Bump
|
### Bump
|
||||||
|
|
||||||
|
- Release v0.6.4
|
||||||
- Release v0.6.4
|
- Release v0.6.4
|
||||||
|
|
||||||
## [0.6.1] - 2024-08-25
|
## [0.6.1] - 2024-08-25
|
||||||
|
@@ -1,5 +1,5 @@
|
|||||||
name: tartrazine
|
name: tartrazine
|
||||||
version: 0.6.4
|
version: 0.7.0
|
||||||
|
|
||||||
authors:
|
authors:
|
||||||
- Roberto Alsina <roberto.alsina@gmail.com>
|
- Roberto Alsina <roberto.alsina@gmail.com>
|
||||||
|
@@ -1 +0,0 @@
|
|||||||
.e {color: #aa0000;background-color: #ffaaaa;}.b {background-color: #f0f3f3;tab-size: 8;}.k {color: #006699;font-weight: bold;}.kp {font-weight: 600;}.kt {color: #007788;}.na {color: #330099;}.nb {color: #336666;}.nc {color: #00aa88;font-weight: bold;}.nc {color: #336600;}.nd {color: #9999ff;}.ne {color: #999999;font-weight: bold;}.ne {color: #cc0000;font-weight: bold;}.nf {color: #cc00ff;}.nl {color: #9999ff;}.nn {color: #00ccff;font-weight: bold;}.nt {color: #330099;font-weight: bold;}.nv {color: #003333;}.ls {color: #cc3300;}.lsd {font-style: italic;}.lse {color: #cc3300;font-weight: bold;}.lsi {color: #aa0000;}.lso {color: #cc3300;}.lsr {color: #33aaaa;}.lss {color: #ffcc33;}.ln {color: #ff6600;}.o {color: #555555;}.ow {color: #000000;font-weight: bold;}.c {color: #0099ff;font-style: italic;}.cs {font-weight: bold;}.cp {color: #009999;font-style: normal;}.gd {background-color: #ffcccc;border: 1px solid #cc0000;}.ge {font-style: italic;}.ge {color: #ff0000;}.gh {color: #003300;font-weight: bold;}.gi {background-color: #ccffcc;border: 1px solid #00cc00;}.go {color: #aaaaaa;}.gp {color: #000099;font-weight: bold;}.gs {font-weight: bold;}.gs {color: #003300;font-weight: bold;}.gt {color: #99cc66;}.gu {text-decoration: underline;}.tw {color: #bbbbbb;}.lh {}
|
|
@@ -1 +0,0 @@
|
|||||||
.b {color: #b7b7b7;background-color: #101010;font-weight: bold;tab-size: 8;}.lh {color: #8eaaaa;background-color: #232323;}.t {color: #b7b7b7;}.e {color: #de6e6e;}.c {color: #333333;}.cp {color: #876c4f;}.cpf {color: #5f8787;}.k {color: #d69094;}.kt {color: #de6e6e;}.na {color: #8eaaaa;}.nb {color: #de6e6e;}.nbp {color: #de6e6e;}.nc {color: #8eaaaa;}.nc {color: #dab083;}.nd {color: #dab083;}.nf {color: #8eaaaa;}.nn {color: #8eaaaa;}.nt {color: #d69094;}.nv {color: #8eaaaa;}.nvi {color: #de6e6e;}.ln {color: #dab083;}.o {color: #60a592;}.ow {color: #d69094;}.l {color: #5f8787;}.ls {color: #5f8787;}.lsi {color: #876c4f;}.lsr {color: #60a592;}.lss {color: #dab083;}
|
|
@@ -1,413 +0,0 @@
|
|||||||
require "./constants/lexers"
|
|
||||||
require "./heuristics"
|
|
||||||
require "baked_file_system"
|
|
||||||
require "crystal/syntax_highlighter"
|
|
||||||
|
|
||||||
module Tartrazine
|
|
||||||
class LexerFiles
|
|
||||||
extend BakedFileSystem
|
|
||||||
bake_folder "../lexers", __DIR__
|
|
||||||
end
|
|
||||||
|
|
||||||
# Get the lexer object for a language name
|
|
||||||
# FIXME: support mimetypes
|
|
||||||
def self.lexer(name : String? = nil, filename : String? = nil, mimetype : String? = nil) : BaseLexer
|
|
||||||
return lexer_by_name(name) if name && name != "autodetect"
|
|
||||||
return lexer_by_filename(filename) if filename
|
|
||||||
return lexer_by_mimetype(mimetype) if mimetype
|
|
||||||
|
|
||||||
RegexLexer.from_xml(LexerFiles.get("/#{LEXERS_BY_NAME["plaintext"]}.xml").gets_to_end)
|
|
||||||
end
|
|
||||||
|
|
||||||
private def self.lexer_by_mimetype(mimetype : String) : BaseLexer
|
|
||||||
lexer_file_name = LEXERS_BY_MIMETYPE.fetch(mimetype, nil)
|
|
||||||
raise Exception.new("Unknown mimetype: #{mimetype}") if lexer_file_name.nil?
|
|
||||||
|
|
||||||
RegexLexer.from_xml(LexerFiles.get("/#{lexer_file_name}.xml").gets_to_end)
|
|
||||||
end
|
|
||||||
|
|
||||||
private def self.lexer_by_name(name : String) : BaseLexer
|
|
||||||
return CrystalLexer.new if name == "crystal"
|
|
||||||
lexer_file_name = LEXERS_BY_NAME.fetch(name.downcase, nil)
|
|
||||||
return create_delegating_lexer(name) if lexer_file_name.nil? && name.includes? "+"
|
|
||||||
raise Exception.new("Unknown lexer: #{name}") if lexer_file_name.nil?
|
|
||||||
|
|
||||||
RegexLexer.from_xml(LexerFiles.get("/#{lexer_file_name}.xml").gets_to_end)
|
|
||||||
end
|
|
||||||
|
|
||||||
private def self.lexer_by_filename(filename : String) : BaseLexer
|
|
||||||
if filename.ends_with?(".cr")
|
|
||||||
return CrystalLexer.new
|
|
||||||
end
|
|
||||||
|
|
||||||
candidates = Set(String).new
|
|
||||||
LEXERS_BY_FILENAME.each do |k, v|
|
|
||||||
candidates += v.to_set if File.match?(k, File.basename(filename))
|
|
||||||
end
|
|
||||||
|
|
||||||
case candidates.size
|
|
||||||
when 0
|
|
||||||
lexer_file_name = LEXERS_BY_NAME["plaintext"]
|
|
||||||
when 1
|
|
||||||
lexer_file_name = candidates.first
|
|
||||||
else
|
|
||||||
lexer_file_name = self.lexer_by_content(filename)
|
|
||||||
begin
|
|
||||||
return self.lexer(lexer_file_name)
|
|
||||||
rescue ex : Exception
|
|
||||||
raise Exception.new("Multiple lexers match the filename: #{candidates.to_a.join(", ")}, heuristics suggest #{lexer_file_name} but there is no matching lexer.")
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
RegexLexer.from_xml(LexerFiles.get("/#{lexer_file_name}.xml").gets_to_end)
|
|
||||||
end
|
|
||||||
|
|
||||||
private def self.lexer_by_content(fname : String) : String?
|
|
||||||
h = Linguist::Heuristic.from_yaml(LexerFiles.get("/heuristics.yml").gets_to_end)
|
|
||||||
result = h.run(fname, File.read(fname))
|
|
||||||
case result
|
|
||||||
when Nil
|
|
||||||
raise Exception.new "No lexer found for #{fname}"
|
|
||||||
when String
|
|
||||||
result.as(String)
|
|
||||||
when Array(String)
|
|
||||||
result.first
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
private def self.create_delegating_lexer(name : String) : BaseLexer
|
|
||||||
language, root = name.split("+", 2)
|
|
||||||
language_lexer = lexer(language)
|
|
||||||
root_lexer = lexer(root)
|
|
||||||
DelegatingLexer.new(language_lexer, root_lexer)
|
|
||||||
end
|
|
||||||
|
|
||||||
# Return a list of all lexers
|
|
||||||
def self.lexers : Array(String)
|
|
||||||
LEXERS_BY_NAME.keys.sort!
|
|
||||||
end
|
|
||||||
|
|
||||||
# A token, the output of the tokenizer
|
|
||||||
alias Token = NamedTuple(type: String, value: String)
|
|
||||||
|
|
||||||
abstract class BaseTokenizer
|
|
||||||
end
|
|
||||||
|
|
||||||
class Tokenizer < BaseTokenizer
|
|
||||||
include Iterator(Token)
|
|
||||||
property lexer : BaseLexer
|
|
||||||
property text : Bytes
|
|
||||||
property pos : Int32 = 0
|
|
||||||
@dq = Deque(Token).new
|
|
||||||
property state_stack = ["root"]
|
|
||||||
|
|
||||||
def initialize(@lexer : BaseLexer, text : String, secondary = false)
|
|
||||||
# Respect the `ensure_nl` config option
|
|
||||||
if text.size > 0 && text[-1] != '\n' && @lexer.config[:ensure_nl] && !secondary
|
|
||||||
text += "\n"
|
|
||||||
end
|
|
||||||
@text = text.to_slice
|
|
||||||
end
|
|
||||||
|
|
||||||
def next : Iterator::Stop | Token
|
|
||||||
if @dq.size > 0
|
|
||||||
return @dq.shift
|
|
||||||
end
|
|
||||||
if pos == @text.size
|
|
||||||
return stop
|
|
||||||
end
|
|
||||||
|
|
||||||
matched = false
|
|
||||||
while @pos < @text.size
|
|
||||||
@lexer.states[@state_stack.last].rules.each do |rule|
|
|
||||||
matched, new_pos, new_tokens = rule.match(@text, @pos, self)
|
|
||||||
if matched
|
|
||||||
@pos = new_pos
|
|
||||||
split_tokens(new_tokens).each { |token| @dq << token }
|
|
||||||
break
|
|
||||||
end
|
|
||||||
end
|
|
||||||
if !matched
|
|
||||||
if @text[@pos] == 10u8
|
|
||||||
@dq << {type: "Text", value: "\n"}
|
|
||||||
@state_stack = ["root"]
|
|
||||||
else
|
|
||||||
@dq << {type: "Error", value: String.new(@text[@pos..@pos])}
|
|
||||||
end
|
|
||||||
@pos += 1
|
|
||||||
break
|
|
||||||
end
|
|
||||||
end
|
|
||||||
self.next
|
|
||||||
end
|
|
||||||
|
|
||||||
# If a token contains a newline, split it into two tokens
|
|
||||||
def split_tokens(tokens : Array(Token)) : Array(Token)
|
|
||||||
split_tokens = [] of Token
|
|
||||||
tokens.each do |token|
|
|
||||||
if token[:value].includes?("\n")
|
|
||||||
values = token[:value].split("\n")
|
|
||||||
values.each_with_index do |value, index|
|
|
||||||
value += "\n" if index < values.size - 1
|
|
||||||
split_tokens << {type: token[:type], value: value}
|
|
||||||
end
|
|
||||||
else
|
|
||||||
split_tokens << token
|
|
||||||
end
|
|
||||||
end
|
|
||||||
split_tokens
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
alias BaseLexer = Lexer
|
|
||||||
|
|
||||||
abstract class Lexer
|
|
||||||
property config = {
|
|
||||||
name: "",
|
|
||||||
priority: 0.0,
|
|
||||||
case_insensitive: false,
|
|
||||||
dot_all: false,
|
|
||||||
not_multiline: false,
|
|
||||||
ensure_nl: false,
|
|
||||||
}
|
|
||||||
property states = {} of String => State
|
|
||||||
|
|
||||||
def tokenizer(text : String, secondary = false) : BaseTokenizer
|
|
||||||
Tokenizer.new(self, text, secondary)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
# This implements a lexer for Pygments RegexLexers as expressed
|
|
||||||
# in Chroma's XML serialization.
|
|
||||||
#
|
|
||||||
# For explanations on what actions and states do
|
|
||||||
# the Pygments documentation is a good place to start.
|
|
||||||
# https://pygments.org/docs/lexerdevelopment/
|
|
||||||
class RegexLexer < BaseLexer
|
|
||||||
# Collapse consecutive tokens of the same type for easier comparison
|
|
||||||
# and smaller output
|
|
||||||
def self.collapse_tokens(tokens : Array(Tartrazine::Token)) : Array(Tartrazine::Token)
|
|
||||||
result = [] of Tartrazine::Token
|
|
||||||
tokens = tokens.reject { |token| token[:value] == "" }
|
|
||||||
tokens.each do |token|
|
|
||||||
if result.empty?
|
|
||||||
result << token
|
|
||||||
next
|
|
||||||
end
|
|
||||||
last = result.last
|
|
||||||
if last[:type] == token[:type]
|
|
||||||
new_token = {type: last[:type], value: last[:value] + token[:value]}
|
|
||||||
result.pop
|
|
||||||
result << new_token
|
|
||||||
else
|
|
||||||
result << token
|
|
||||||
end
|
|
||||||
end
|
|
||||||
result
|
|
||||||
end
|
|
||||||
|
|
||||||
def self.from_xml(xml : String) : Lexer
|
|
||||||
l = RegexLexer.new
|
|
||||||
lexer = XML.parse(xml).first_element_child
|
|
||||||
if lexer
|
|
||||||
config = lexer.children.find { |node|
|
|
||||||
node.name == "config"
|
|
||||||
}
|
|
||||||
if config
|
|
||||||
l.config = {
|
|
||||||
name: xml_to_s(config, name) || "",
|
|
||||||
priority: xml_to_f(config, priority) || 0.0,
|
|
||||||
not_multiline: xml_to_s(config, not_multiline) == "true",
|
|
||||||
dot_all: xml_to_s(config, dot_all) == "true",
|
|
||||||
case_insensitive: xml_to_s(config, case_insensitive) == "true",
|
|
||||||
ensure_nl: xml_to_s(config, ensure_nl) == "true",
|
|
||||||
}
|
|
||||||
end
|
|
||||||
|
|
||||||
rules = lexer.children.find { |node|
|
|
||||||
node.name == "rules"
|
|
||||||
}
|
|
||||||
if rules
|
|
||||||
# Rules contains states 🤷
|
|
||||||
rules.children.select { |node|
|
|
||||||
node.name == "state"
|
|
||||||
}.each do |state_node|
|
|
||||||
state = State.new
|
|
||||||
state.name = state_node["name"]
|
|
||||||
if l.states.has_key?(state.name)
|
|
||||||
raise Exception.new("Duplicate state: #{state.name}")
|
|
||||||
else
|
|
||||||
l.states[state.name] = state
|
|
||||||
end
|
|
||||||
# And states contain rules 🤷
|
|
||||||
state_node.children.select { |node|
|
|
||||||
node.name == "rule"
|
|
||||||
}.each do |rule_node|
|
|
||||||
case rule_node["pattern"]?
|
|
||||||
when nil
|
|
||||||
if rule_node.first_element_child.try &.name == "include"
|
|
||||||
rule = IncludeStateRule.new(rule_node)
|
|
||||||
else
|
|
||||||
rule = UnconditionalRule.new(rule_node)
|
|
||||||
end
|
|
||||||
else
|
|
||||||
rule = Rule.new(rule_node,
|
|
||||||
multiline: !l.config[:not_multiline],
|
|
||||||
dotall: l.config[:dot_all],
|
|
||||||
ignorecase: l.config[:case_insensitive])
|
|
||||||
end
|
|
||||||
state.rules << rule
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
l
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
# A lexer that takes two lexers as arguments. A root lexer
|
|
||||||
# and a language lexer. Everything is scalled using the
|
|
||||||
# language lexer, afterwards all `Other` tokens are lexed
|
|
||||||
# using the root lexer.
|
|
||||||
#
|
|
||||||
# This is useful for things like template languages, where
|
|
||||||
# you have Jinja + HTML or Jinja + CSS and so on.
|
|
||||||
class DelegatingLexer < Lexer
|
|
||||||
property language_lexer : BaseLexer
|
|
||||||
property root_lexer : BaseLexer
|
|
||||||
|
|
||||||
def initialize(@language_lexer : BaseLexer, @root_lexer : BaseLexer)
|
|
||||||
end
|
|
||||||
|
|
||||||
def tokenizer(text : String, secondary = false) : DelegatingTokenizer
|
|
||||||
DelegatingTokenizer.new(self, text, secondary)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
# This Tokenizer works with a DelegatingLexer. It first tokenizes
|
|
||||||
# using the language lexer, and "Other" tokens are tokenized using
|
|
||||||
# the root lexer.
|
|
||||||
class DelegatingTokenizer < BaseTokenizer
|
|
||||||
include Iterator(Token)
|
|
||||||
@dq = Deque(Token).new
|
|
||||||
@language_tokenizer : BaseTokenizer
|
|
||||||
|
|
||||||
def initialize(@lexer : DelegatingLexer, text : String, secondary = false)
|
|
||||||
# Respect the `ensure_nl` config option
|
|
||||||
if text.size > 0 && text[-1] != '\n' && @lexer.config[:ensure_nl] && !secondary
|
|
||||||
text += "\n"
|
|
||||||
end
|
|
||||||
@language_tokenizer = @lexer.language_lexer.tokenizer(text, true)
|
|
||||||
end
|
|
||||||
|
|
||||||
def next : Iterator::Stop | Token
|
|
||||||
if @dq.size > 0
|
|
||||||
return @dq.shift
|
|
||||||
end
|
|
||||||
token = @language_tokenizer.next
|
|
||||||
if token.is_a? Iterator::Stop
|
|
||||||
return stop
|
|
||||||
elsif token.as(Token).[:type] == "Other"
|
|
||||||
root_tokenizer = @lexer.root_lexer.tokenizer(token.as(Token).[:value], true)
|
|
||||||
root_tokenizer.each do |root_token|
|
|
||||||
@dq << root_token
|
|
||||||
end
|
|
||||||
else
|
|
||||||
@dq << token.as(Token)
|
|
||||||
end
|
|
||||||
self.next
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
# A Lexer state. A state has a name and a list of rules.
|
|
||||||
# The state machine has a state stack containing references
|
|
||||||
# to states to decide which rules to apply.
|
|
||||||
struct State
|
|
||||||
property name : String = ""
|
|
||||||
property rules = [] of BaseRule
|
|
||||||
|
|
||||||
def +(other : State)
|
|
||||||
new_state = State.new
|
|
||||||
new_state.name = Random.base58(8)
|
|
||||||
new_state.rules = rules + other.rules
|
|
||||||
new_state
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
class CustomCrystalHighlighter < Crystal::SyntaxHighlighter
|
|
||||||
@tokens = [] of Token
|
|
||||||
|
|
||||||
def render_delimiter(&block)
|
|
||||||
@tokens << {type: "LiteralString", value: block.call.to_s}
|
|
||||||
end
|
|
||||||
|
|
||||||
def render_interpolation(&block)
|
|
||||||
@tokens << {type: "LiteralStringInterpol", value: "\#{"}
|
|
||||||
@tokens << {type: "Text", value: block.call.to_s}
|
|
||||||
@tokens << {type: "LiteralStringInterpol", value: "}"}
|
|
||||||
end
|
|
||||||
|
|
||||||
def render_string_array(&block)
|
|
||||||
@tokens << {type: "LiteralString", value: block.call.to_s}
|
|
||||||
end
|
|
||||||
|
|
||||||
# ameba:disable Metrics/CyclomaticComplexity
|
|
||||||
def render(type : TokenType, value : String)
|
|
||||||
case type
|
|
||||||
when .comment?
|
|
||||||
@tokens << {type: "Comment", value: value}
|
|
||||||
when .number?
|
|
||||||
@tokens << {type: "LiteralNumber", value: value}
|
|
||||||
when .char?
|
|
||||||
@tokens << {type: "LiteralStringChar", value: value}
|
|
||||||
when .symbol?
|
|
||||||
@tokens << {type: "LiteralStringSymbol", value: value}
|
|
||||||
when .const?
|
|
||||||
@tokens << {type: "NameConstant", value: value}
|
|
||||||
when .string?
|
|
||||||
@tokens << {type: "LiteralString", value: value}
|
|
||||||
when .ident?
|
|
||||||
@tokens << {type: "NameVariable", value: value}
|
|
||||||
when .keyword?, .self?
|
|
||||||
@tokens << {type: "NameKeyword", value: value}
|
|
||||||
when .primitive_literal?
|
|
||||||
@tokens << {type: "Literal", value: value}
|
|
||||||
when .operator?
|
|
||||||
@tokens << {type: "Operator", value: value}
|
|
||||||
when Crystal::SyntaxHighlighter::TokenType::DELIMITED_TOKEN, Crystal::SyntaxHighlighter::TokenType::DELIMITER_START, Crystal::SyntaxHighlighter::TokenType::DELIMITER_END
|
|
||||||
@tokens << {type: "LiteralString", value: value}
|
|
||||||
else
|
|
||||||
@tokens << {type: "Text", value: value}
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
class CrystalTokenizer < Tartrazine::BaseTokenizer
|
|
||||||
include Iterator(Token)
|
|
||||||
@hl = CustomCrystalHighlighter.new
|
|
||||||
@lexer : BaseLexer
|
|
||||||
@iter : Iterator(Token)
|
|
||||||
|
|
||||||
# delegate next, to: @iter
|
|
||||||
|
|
||||||
def initialize(@lexer : BaseLexer, text : String, secondary = false)
|
|
||||||
# Respect the `ensure_nl` config option
|
|
||||||
if text.size > 0 && text[-1] != '\n' && @lexer.config[:ensure_nl] && !secondary
|
|
||||||
text += "\n"
|
|
||||||
end
|
|
||||||
# Just do the tokenizing
|
|
||||||
@hl.highlight(text)
|
|
||||||
@iter = @hl.@tokens.each
|
|
||||||
end
|
|
||||||
|
|
||||||
def next : Iterator::Stop | Token
|
|
||||||
@iter.next
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
class CrystalLexer < BaseLexer
|
|
||||||
def tokenizer(text : String, secondary = false) : BaseTokenizer
|
|
||||||
CrystalTokenizer.new(self, text, secondary)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
File diff suppressed because one or more lines are too long
@@ -1,11 +0,0 @@
|
|||||||
from flask import Flask, request
|
|
||||||
|
|
||||||
app = Flask("{{name}}")
|
|
||||||
|
|
||||||
@app.route('/')
|
|
||||||
def handle():
|
|
||||||
return "Hello World from Flask!"
|
|
||||||
|
|
||||||
@app.route('/ping')
|
|
||||||
def ping():
|
|
||||||
return "OK"
|
|
@@ -1 +0,0 @@
|
|||||||
[{"type":"KeywordNamespace","value":"from"},{"type":"Text","value":" "},{"type":"NameNamespace","value":"flask"},{"type":"Text","value":" "},{"type":"KeywordNamespace","value":"import"},{"type":"Text","value":" "},{"type":"Name","value":"Flask"},{"type":"Punctuation","value":","},{"type":"Text","value":" "},{"type":"Name","value":"request"},{"type":"Text","value":"\n\n"},{"type":"Name","value":"app"},{"type":"Text","value":" "},{"type":"Operator","value":"="},{"type":"Text","value":" "},{"type":"Name","value":"Flask"},{"type":"Punctuation","value":"("},{"type":"LiteralStringDouble","value":"\""},{"type":"CommentPreproc","value":"{{"},{"type":"NameVariable","value":"name"},{"type":"CommentPreproc","value":"}}"},{"type":"LiteralStringDouble","value":"\")"},{"type":"Text","value":"\n\n"},{"type":"NameDecorator","value":"@app.route"},{"type":"Punctuation","value":"("},{"type":"LiteralStringSingle","value":"'/'"},{"type":"Punctuation","value":")"},{"type":"Text","value":"\n"},{"type":"Keyword","value":"def"},{"type":"Text","value":" "},{"type":"NameFunction","value":"handle"},{"type":"Punctuation","value":"():"},{"type":"Text","value":"\n "},{"type":"Keyword","value":"return"},{"type":"Text","value":" "},{"type":"LiteralStringDouble","value":"\"Hello World from Flask!\""},{"type":"Text","value":"\n\n"},{"type":"NameDecorator","value":"@app.route"},{"type":"Punctuation","value":"("},{"type":"LiteralStringSingle","value":"'/ping'"},{"type":"Punctuation","value":")"},{"type":"Text","value":"\n"},{"type":"Keyword","value":"def"},{"type":"Text","value":" "},{"type":"NameFunction","value":"ping"},{"type":"Punctuation","value":"():"},{"type":"Text","value":"\n "},{"type":"Keyword","value":"return"},{"type":"Text","value":" "},{"type":"LiteralStringDouble","value":"\"OK\""},{"type":"Text","value":"\n"}]
|
|
@@ -3,12 +3,6 @@ require "./spec_helper"
|
|||||||
# These are the testcases from Pygments
|
# These are the testcases from Pygments
|
||||||
testcases = Dir.glob("#{__DIR__}/tests/**/*txt").sort
|
testcases = Dir.glob("#{__DIR__}/tests/**/*txt").sort
|
||||||
|
|
||||||
# These are custom testcases
|
|
||||||
examples = Dir.glob("#{__DIR__}/examples/**/*.*").reject(&.ends_with? ".json").sort!
|
|
||||||
|
|
||||||
# CSS Stylesheets
|
|
||||||
css_files = Dir.glob("#{__DIR__}/css/*.css")
|
|
||||||
|
|
||||||
# These lexers don't load because of parsing issues
|
# These lexers don't load because of parsing issues
|
||||||
failing_lexers = {
|
failing_lexers = {
|
||||||
"webgpu_shading_language",
|
"webgpu_shading_language",
|
||||||
@@ -57,14 +51,6 @@ not_my_fault = {
|
|||||||
|
|
||||||
describe Tartrazine do
|
describe Tartrazine do
|
||||||
describe "Lexer" do
|
describe "Lexer" do
|
||||||
examples.each do |example|
|
|
||||||
it "parses #{example}".split("/")[-2...].join("/") do
|
|
||||||
lexer = Tartrazine.lexer(name: File.basename(File.dirname(example)).downcase)
|
|
||||||
text = File.read(example)
|
|
||||||
expected = Array(Tartrazine::Token).from_json(File.read("#{example}.json"))
|
|
||||||
Tartrazine::RegexLexer.collapse_tokens(lexer.tokenizer(text).to_a).should eq expected
|
|
||||||
end
|
|
||||||
end
|
|
||||||
testcases.each do |testcase|
|
testcases.each do |testcase|
|
||||||
if known_bad.includes?(testcase)
|
if known_bad.includes?(testcase)
|
||||||
pending "parses #{testcase}".split("/")[-2...].join("/") do
|
pending "parses #{testcase}".split("/")[-2...].join("/") do
|
||||||
@@ -84,17 +70,6 @@ describe Tartrazine do
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
describe "formatter" do
|
|
||||||
css_files.each do |css_file|
|
|
||||||
it "generates #{css_file}" do
|
|
||||||
css = File.read(css_file)
|
|
||||||
theme = Tartrazine.theme(File.basename(css_file, ".css"))
|
|
||||||
formatter = Tartrazine::Html.new(theme: theme)
|
|
||||||
formatter.style_defs.strip.should eq css.strip
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
describe "to_html" do
|
describe "to_html" do
|
||||||
it "should do basic highlighting" do
|
it "should do basic highlighting" do
|
||||||
html = Tartrazine.to_html("puts 'Hello, World!'", "ruby", standalone: false)
|
html = Tartrazine.to_html("puts 'Hello, World!'", "ruby", standalone: false)
|
||||||
|
@@ -1 +1 @@
|
|||||||
require "../spec/tartrazine_spec.cr"
|
require "../spec/**"
|
||||||
|
Reference in New Issue
Block a user