Support guessing lexer by filename

This commit is contained in:
Roberto Alsina 2024-08-11 13:04:35 -03:00
parent b4f38e00e1
commit e3a1ce37b4
3 changed files with 30 additions and 13 deletions

View File

@ -5,5 +5,6 @@
* ✅ Implement styles
* ✅ Implement formatters
* ✅ Implement CLI
* Implement lexer loader that respects aliases, etc
* Implement language guessing for file names
* ✅ Implement lexer loader that respects aliases
* ✅ Implement lexer loader by file extension
* Implement lexer loader by mime type

View File

@ -1,3 +1,5 @@
require "./constants/lexers"
module Tartrazine
class LexerFiles
extend BakedFileSystem
@ -6,19 +8,33 @@ module Tartrazine
end
# Get the lexer object for a language name
# FIXME: support aliases, paths, mimetypes, etc
def self.lexer(name : String) : Lexer
Lexer.from_xml(LexerFiles.get("/#{name}.xml").gets_to_end)
# FIXME: support mimetypes
def self.lexer(name : String? = nil, filename : String? = nil) : Lexer
if name.nil? && filename.nil?
lexer_file_name = LEXERS_BY_NAME["plaintext"]
elsif name && name != "autodetect"
lexer_file_name = LEXERS_BY_NAME[name.downcase]
else
# Guess by filename
candidates = Set(String).new
LEXERS_BY_FILENAME.each do |k, v|
candidates += v.to_set if File.match?(k, File.basename(filename.to_s))
end
case candidates.size
when 0
lexer_file_name = LEXERS_BY_NAME["plaintext"]
when 1
lexer_file_name = candidates.first
else
raise Exception.new("Multiple lexers match the filename: #{candidates.to_a.join(", ")}")
end
end
Lexer.from_xml(LexerFiles.get("/#{lexer_file_name}.xml").gets_to_end)
end
# Return a list of all lexers
# FIXME: support aliases
def self.lexers : Array(String)
lexers = Set(String).new
LexerFiles.files.each do |file|
lexers << file.path.split("/").last.split(".").first
end
lexers.to_a.sort!
LEXERS_BY_NAME.keys.sort!
end
# This implements a lexer for Pygments RegexLexers as expressed

View File

@ -18,7 +18,7 @@ Usage:
Options:
-f <formatter> Format to use (html, terminal, json)
-t <theme> Theme to use, see --list-themes [default: default-dark]
-l <lexer> Lexer (language) to use, see --list-lexers [default: plaintext]
-l <lexer> Lexer (language) to use, see --list-lexers [default: autodetect]
-o <output> Output file. Default is stdout.
--standalone Generate a standalone HTML file, which includes
all style information. If not given, it will generate just
@ -77,7 +77,7 @@ if options["-f"]
exit 0
end
lexer = Tartrazine.lexer(options["-l"].as(String))
lexer = Tartrazine.lexer(name: options["-l"].as(String), filename: options["FILE"].as(String))
input = File.open(options["FILE"].as(String)).gets_to_end
output = formatter.format(input, lexer, theme)