Implemented usingbygroup action, so code-in-markdown works

This commit is contained in:
Roberto Alsina 2024-08-23 10:20:03 -03:00
parent a2884c4c78
commit 3d5d073471
4 changed files with 36 additions and 21 deletions

View File

@ -9,4 +9,4 @@
* ✅ Implement lexer loader by file extension * ✅ Implement lexer loader by file extension
* ✅ Add --line-numbers to terminal formatter * ✅ Add --line-numbers to terminal formatter
* Implement lexer loader by mime type * Implement lexer loader by mime type
* Implement Delegating lexers * Implement Delegating lexers

View File

@ -18,19 +18,21 @@
<rule pattern="^(\s*)([*-])(\s)(.+\n)"><bygroups><token type="TextWhitespace"/><token type="Keyword"/><token type="TextWhitespace"/><usingself state="inline"/></bygroups></rule> <rule pattern="^(\s*)([*-])(\s)(.+\n)"><bygroups><token type="TextWhitespace"/><token type="Keyword"/><token type="TextWhitespace"/><usingself state="inline"/></bygroups></rule>
<rule pattern="^(\s*)([0-9]+\.)( .+\n)"><bygroups><token type="TextWhitespace"/><token type="Keyword"/><usingself state="inline"/></bygroups></rule> <rule pattern="^(\s*)([0-9]+\.)( .+\n)"><bygroups><token type="TextWhitespace"/><token type="Keyword"/><usingself state="inline"/></bygroups></rule>
<rule pattern="^(\s*&gt;\s)(.+\n)"><bygroups><token type="Keyword"/><token type="GenericEmph"/></bygroups></rule> <rule pattern="^(\s*&gt;\s)(.+\n)"><bygroups><token type="Keyword"/><token type="GenericEmph"/></bygroups></rule>
<rule pattern="^(\s*```\n[\w\W]*?^\s*```$\n)"><token type="LiteralStringBacktick"/></rule> <rule pattern="^(```\n)([\w\W]*?)(^```$)">
<rule pattern="(?x) <bygroups>
^(?P&lt;initial&gt;\s*```) <token type="LiteralStringBacktick"/>
(?P&lt;lang&gt;[\w\-]+) <token type="Text"/>
(?P&lt;afterlang&gt; <token type="LiteralStringBacktick"/>
(?P&lt;whitespace&gt;[^\S\n]+) </bygroups>
(?P&lt;extra&gt;.*))? </rule>
(?P&lt;newline&gt;\n) <rule pattern="^(```)(\w+)(\n)([\w\W]*?)(^```$)">
(?P&lt;code&gt;(.|\n)*?) <bygroups>
(?P&lt;terminator&gt;^\s*```$\n) <token type="LiteralStringBacktick"/>
"> <token type="NameLabel"/>
<!-- Implement actions for delegating via a capture group --> <token type="TextWhitespace"/>
<token type="Text"/> <UsingByGroup lexer="2" content="4"/>
<token type="LiteralStringBacktick"/>
</bygroups>
</rule> </rule>
<rule><include state="inline"/></rule> <rule><include state="inline"/></rule>
</state> </state>

View File

@ -16,13 +16,16 @@ module Tartrazine
Push Push
Token Token
Using Using
Usingbygroup
Usingself Usingself
end end
struct Action struct Action
property actions : Array(Action) = [] of Action property actions : Array(Action) = [] of Action
@content_index : Int32 = 0
@depth : Int32 = 0 @depth : Int32 = 0
@lexer_index : Int32 = 0
@lexer_name : String = "" @lexer_name : String = ""
@states : Array(String) = [] of String @states : Array(String) = [] of String
@states_to_push : Array(String) = [] of String @states_to_push : Array(String) = [] of String
@ -62,6 +65,9 @@ module Tartrazine
@states = xml.attributes.select { |attrib| @states = xml.attributes.select { |attrib|
attrib.name == "state" attrib.name == "state"
}.map &.content }.map &.content
when ActionType::Usingbygroup
@lexer_index = xml["lexer"].to_i
@content_index = xml["content"].to_i
end end
end end
@ -134,6 +140,12 @@ module Tartrazine
tokenizer.lexer.states[new_state.name] = new_state tokenizer.lexer.states[new_state.name] = new_state
tokenizer.state_stack << new_state.name tokenizer.state_stack << new_state.name
[] of Token [] of Token
when ActionType::Usingbygroup
# Shunt to content-specified lexer
return [] of Token if match.empty?
Tartrazine.lexer(String.new(match[@lexer_index].value)).tokenizer(
String.new(match[@content_index].value),
secondary: true).to_a
else else
raise Exception.new("Unknown action type: #{@type}") raise Exception.new("Unknown action type: #{@type}")
end end

View File

@ -12,24 +12,24 @@ module Tartrazine
def self.lexer(name : String? = nil, filename : String? = nil) : BaseLexer def self.lexer(name : String? = nil, filename : String? = nil) : BaseLexer
return lexer_by_name(name) if name && name != "autodetect" return lexer_by_name(name) if name && name != "autodetect"
return lexer_by_filename(filename) if filename return lexer_by_filename(filename) if filename
Lexer.from_xml(LexerFiles.get("/#{LEXERS_BY_NAME["plaintext"]}.xml").gets_to_end) Lexer.from_xml(LexerFiles.get("/#{LEXERS_BY_NAME["plaintext"]}.xml").gets_to_end)
end end
private def self.lexer_by_name(name : String) : BaseLexer private def self.lexer_by_name(name : String) : BaseLexer
lexer_file_name = LEXERS_BY_NAME.fetch(name.downcase, nil) lexer_file_name = LEXERS_BY_NAME.fetch(name.downcase, nil)
return create_delegating_lexer(name) if lexer_file_name.nil? && name.includes? "+" return create_delegating_lexer(name) if lexer_file_name.nil? && name.includes? "+"
raise Exception.new("Unknown lexer: #{name}") if lexer_file_name.nil? raise Exception.new("Unknown lexer: #{name}") if lexer_file_name.nil?
Lexer.from_xml(LexerFiles.get("/#{lexer_file_name}.xml").gets_to_end) Lexer.from_xml(LexerFiles.get("/#{lexer_file_name}.xml").gets_to_end)
end end
private def self.lexer_by_filename(filename : String) : BaseLexer private def self.lexer_by_filename(filename : String) : BaseLexer
candidates = Set(String).new candidates = Set(String).new
LEXERS_BY_FILENAME.each do |k, v| LEXERS_BY_FILENAME.each do |k, v|
candidates += v.to_set if File.match?(k, File.basename(filename)) candidates += v.to_set if File.match?(k, File.basename(filename))
end end
case candidates.size case candidates.size
when 0 when 0
lexer_file_name = LEXERS_BY_NAME["plaintext"] lexer_file_name = LEXERS_BY_NAME["plaintext"]
@ -38,16 +38,17 @@ module Tartrazine
else else
raise Exception.new("Multiple lexers match the filename: #{candidates.to_a.join(", ")}") raise Exception.new("Multiple lexers match the filename: #{candidates.to_a.join(", ")}")
end end
Lexer.from_xml(LexerFiles.get("/#{lexer_file_name}.xml").gets_to_end) Lexer.from_xml(LexerFiles.get("/#{lexer_file_name}.xml").gets_to_end)
end end
private def self.create_delegating_lexer(name : String) : BaseLexer private def self.create_delegating_lexer(name : String) : BaseLexer
language, root = name.split("+", 2) language, root = name.split("+", 2)
language_lexer = lexer(language) language_lexer = lexer(language)
root_lexer = lexer(root) root_lexer = lexer(root)
DelegatingLexer.new(language_lexer, root_lexer) DelegatingLexer.new(language_lexer, root_lexer)
end end
# Return a list of all lexers # Return a list of all lexers
def self.lexers : Array(String) def self.lexers : Array(String)
LEXERS_BY_NAME.keys.sort! LEXERS_BY_NAME.keys.sort!