Implemented usingbygroup action, so code-in-markdown works

This commit is contained in:
Roberto Alsina 2024-08-23 10:20:03 -03:00
parent a2884c4c78
commit 3d5d073471
4 changed files with 36 additions and 21 deletions

View File

@ -9,4 +9,4 @@
* ✅ Implement lexer loader by file extension
* ✅ Add --line-numbers to terminal formatter
* Implement lexer loader by mime type
* Implement Delegating lexers
* Implement Delegating lexers

View File

@ -18,19 +18,21 @@
<rule pattern="^(\s*)([*-])(\s)(.+\n)"><bygroups><token type="TextWhitespace"/><token type="Keyword"/><token type="TextWhitespace"/><usingself state="inline"/></bygroups></rule>
<rule pattern="^(\s*)([0-9]+\.)( .+\n)"><bygroups><token type="TextWhitespace"/><token type="Keyword"/><usingself state="inline"/></bygroups></rule>
<rule pattern="^(\s*&gt;\s)(.+\n)"><bygroups><token type="Keyword"/><token type="GenericEmph"/></bygroups></rule>
<rule pattern="^(\s*```\n[\w\W]*?^\s*```$\n)"><token type="LiteralStringBacktick"/></rule>
<rule pattern="(?x)
^(?P&lt;initial&gt;\s*```)
(?P&lt;lang&gt;[\w\-]+)
(?P&lt;afterlang&gt;
(?P&lt;whitespace&gt;[^\S\n]+)
(?P&lt;extra&gt;.*))?
(?P&lt;newline&gt;\n)
(?P&lt;code&gt;(.|\n)*?)
(?P&lt;terminator&gt;^\s*```$\n)
">
<!-- Implement actions for delegating via a capture group -->
<token type="Text"/>
<rule pattern="^(```\n)([\w\W]*?)(^```$)">
<bygroups>
<token type="LiteralStringBacktick"/>
<token type="Text"/>
<token type="LiteralStringBacktick"/>
</bygroups>
</rule>
<rule pattern="^(```)(\w+)(\n)([\w\W]*?)(^```$)">
<bygroups>
<token type="LiteralStringBacktick"/>
<token type="NameLabel"/>
<token type="TextWhitespace"/>
<UsingByGroup lexer="2" content="4"/>
<token type="LiteralStringBacktick"/>
</bygroups>
</rule>
<rule><include state="inline"/></rule>
</state>

View File

@ -16,13 +16,16 @@ module Tartrazine
Push
Token
Using
Usingbygroup
Usingself
end
struct Action
property actions : Array(Action) = [] of Action
@content_index : Int32 = 0
@depth : Int32 = 0
@lexer_index : Int32 = 0
@lexer_name : String = ""
@states : Array(String) = [] of String
@states_to_push : Array(String) = [] of String
@ -62,6 +65,9 @@ module Tartrazine
@states = xml.attributes.select { |attrib|
attrib.name == "state"
}.map &.content
when ActionType::Usingbygroup
@lexer_index = xml["lexer"].to_i
@content_index = xml["content"].to_i
end
end
@ -134,6 +140,12 @@ module Tartrazine
tokenizer.lexer.states[new_state.name] = new_state
tokenizer.state_stack << new_state.name
[] of Token
when ActionType::Usingbygroup
# Shunt to content-specified lexer
return [] of Token if match.empty?
Tartrazine.lexer(String.new(match[@lexer_index].value)).tokenizer(
String.new(match[@content_index].value),
secondary: true).to_a
else
raise Exception.new("Unknown action type: #{@type}")
end

View File

@ -12,24 +12,24 @@ module Tartrazine
def self.lexer(name : String? = nil, filename : String? = nil) : BaseLexer
return lexer_by_name(name) if name && name != "autodetect"
return lexer_by_filename(filename) if filename
Lexer.from_xml(LexerFiles.get("/#{LEXERS_BY_NAME["plaintext"]}.xml").gets_to_end)
end
private def self.lexer_by_name(name : String) : BaseLexer
lexer_file_name = LEXERS_BY_NAME.fetch(name.downcase, nil)
return create_delegating_lexer(name) if lexer_file_name.nil? && name.includes? "+"
raise Exception.new("Unknown lexer: #{name}") if lexer_file_name.nil?
Lexer.from_xml(LexerFiles.get("/#{lexer_file_name}.xml").gets_to_end)
end
private def self.lexer_by_filename(filename : String) : BaseLexer
candidates = Set(String).new
LEXERS_BY_FILENAME.each do |k, v|
candidates += v.to_set if File.match?(k, File.basename(filename))
end
case candidates.size
when 0
lexer_file_name = LEXERS_BY_NAME["plaintext"]
@ -38,16 +38,17 @@ module Tartrazine
else
raise Exception.new("Multiple lexers match the filename: #{candidates.to_a.join(", ")}")
end
Lexer.from_xml(LexerFiles.get("/#{lexer_file_name}.xml").gets_to_end)
end
private def self.create_delegating_lexer(name : String) : BaseLexer
language, root = name.split("+", 2)
language_lexer = lexer(language)
root_lexer = lexer(root)
DelegatingLexer.new(language_lexer, root_lexer)
end
# Return a list of all lexers
def self.lexers : Array(String)
LEXERS_BY_NAME.keys.sort!