mirror of
https://github.com/ralsina/tartrazine.git
synced 2025-07-03 23:09:21 +00:00
Compare commits
6 Commits
heuristics
...
v0.6.3
Author | SHA1 | Date | |
---|---|---|---|
4dd2e925b0 | |||
7bda19cdea | |||
0e7dafe711 | |||
082241eb0f | |||
df88047ca8 | |||
5a3b50d7a3 |
59
README.md
59
README.md
@ -2,36 +2,11 @@
|
|||||||
|
|
||||||
Tartrazine is a library to syntax-highlight code. It is
|
Tartrazine is a library to syntax-highlight code. It is
|
||||||
a port of [Pygments](https://pygments.org/) to
|
a port of [Pygments](https://pygments.org/) to
|
||||||
[Crystal](https://crystal-lang.org/). Kind of.
|
[Crystal](https://crystal-lang.org/).
|
||||||
|
|
||||||
The CLI tool can be used to highlight many things in many styles.
|
It also provides a CLI tool which can be used to highlight many things in many styles.
|
||||||
|
|
||||||
# A port of what? Why "kind of"?
|
Currently Tartrazine supports 247 languages. and it has 331 themes (63 from Chroma, the rest are base16 themes via
|
||||||
|
|
||||||
Pygments is a staple of the Python ecosystem, and it's great.
|
|
||||||
It lets you highlight code in many languages, and it has many
|
|
||||||
themes. Chroma is "Pygments for Go", it's actually a port of
|
|
||||||
Pygments to Go, and it's great too.
|
|
||||||
|
|
||||||
I wanted that in Crystal, so I started this project. But I did
|
|
||||||
not read much of the Pygments code. Or much of Chroma's.
|
|
||||||
|
|
||||||
Chroma has taken most of the Pygments lexers and turned them into
|
|
||||||
XML descriptions. What I did was take those XML files from Chroma
|
|
||||||
and a pile of test cases from Pygments, and I slapped them together
|
|
||||||
until the tests passed and my code produced the same output as
|
|
||||||
Chroma. Think of it as *extreme TDD*.
|
|
||||||
|
|
||||||
Currently the pass rate for tests in the supported languages
|
|
||||||
is `96.8%`, which is *not bad for a couple days hacking*.
|
|
||||||
|
|
||||||
This only covers the RegexLexers, which are the most common ones,
|
|
||||||
but it means the supported languages are a subset of Chroma's, which
|
|
||||||
is a subset of Pygments'.
|
|
||||||
|
|
||||||
Currently Tartrazine supports ... 248 languages.
|
|
||||||
|
|
||||||
It has 331 themes (63 from Chroma, the rest are base16 themes via
|
|
||||||
[Sixteen](https://github.com/ralsina/sixteen)
|
[Sixteen](https://github.com/ralsina/sixteen)
|
||||||
|
|
||||||
## Installation
|
## Installation
|
||||||
@ -58,7 +33,7 @@ $ tartrazine whatever.c -l c -t catppuccin-macchiato --line-numbers -f terminal
|
|||||||
Generate a standalone HTML file from a C source file with the syntax highlighted:
|
Generate a standalone HTML file from a C source file with the syntax highlighted:
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
$ tartrazine whatever.c -l c -t catppuccin-macchiato --line-numbers \
|
$ tartrazine whatever.c -t catppuccin-macchiato --line-numbers \
|
||||||
--standalone -f html -o whatever.html
|
--standalone -f html -o whatever.html
|
||||||
```
|
```
|
||||||
|
|
||||||
@ -87,3 +62,29 @@ puts formatter.format(File.read(ARGV[0]), lexer)
|
|||||||
## Contributors
|
## Contributors
|
||||||
|
|
||||||
- [Roberto Alsina](https://github.com/ralsina) - creator and maintainer
|
- [Roberto Alsina](https://github.com/ralsina) - creator and maintainer
|
||||||
|
|
||||||
|
## A port of what? Why "kind of"?
|
||||||
|
|
||||||
|
Pygments is a staple of the Python ecosystem, and it's great.
|
||||||
|
It lets you highlight code in many languages, and it has many
|
||||||
|
themes. Chroma is "Pygments for Go", it's actually a port of
|
||||||
|
Pygments to Go, and it's great too.
|
||||||
|
|
||||||
|
I wanted that in Crystal, so I started this project. But I did
|
||||||
|
not read much of the Pygments code. Or much of Chroma's.
|
||||||
|
|
||||||
|
Chroma has taken most of the Pygments lexers and turned them into
|
||||||
|
XML descriptions. What I did was take those XML files from Chroma
|
||||||
|
and a pile of test cases from Pygments, and I slapped them together
|
||||||
|
until the tests passed and my code produced the same output as
|
||||||
|
Chroma. Think of it as [*extreme TDD*](https://ralsina.me/weblog/posts/tartrazine-reimplementing-pygments.html)
|
||||||
|
|
||||||
|
Currently the pass rate for tests in the supported languages
|
||||||
|
is `96.8%`, which is *not bad for a couple days hacking*.
|
||||||
|
|
||||||
|
This only covers the RegexLexers, which are the most common ones,
|
||||||
|
but it means the supported languages are a subset of Chroma's, which
|
||||||
|
is a subset of Pygments' and DelegatingLexers (useful for things like template languages)
|
||||||
|
|
||||||
|
Then performance was bad, so I hacked and hacked and made it
|
||||||
|
significantly [faster than chroma](https://ralsina.me/weblog/posts/a-tale-of-optimization.html) which is fun.
|
4
TODO.md
4
TODO.md
@ -8,8 +8,8 @@
|
|||||||
* ✅ Implement lexer loader that respects aliases
|
* ✅ Implement lexer loader that respects aliases
|
||||||
* ✅ Implement lexer loader by file extension
|
* ✅ Implement lexer loader by file extension
|
||||||
* ✅ Add --line-numbers to terminal formatter
|
* ✅ Add --line-numbers to terminal formatter
|
||||||
* Implement lexer loader by mime type
|
* ✅ Implement lexer loader by mime type
|
||||||
* ✅ Implement Delegating lexers
|
* ✅ Implement Delegating lexers
|
||||||
* ✅ Add RstLexer
|
* ✅ Add RstLexer
|
||||||
* Add Mako template lexer
|
* Add Mako template lexer
|
||||||
* Implement heuristic lexer detection
|
* ✅ Implement heuristic lexer detection
|
||||||
|
@ -3,6 +3,7 @@
|
|||||||
<name>Groff</name>
|
<name>Groff</name>
|
||||||
<alias>groff</alias>
|
<alias>groff</alias>
|
||||||
<alias>nroff</alias>
|
<alias>nroff</alias>
|
||||||
|
<alias>roff</alias>
|
||||||
<alias>man</alias>
|
<alias>man</alias>
|
||||||
<filename>*.[1-9]</filename>
|
<filename>*.[1-9]</filename>
|
||||||
<filename>*.1p</filename>
|
<filename>*.1p</filename>
|
||||||
|
@ -30,12 +30,12 @@
|
|||||||
disambiguations:
|
disambiguations:
|
||||||
- extensions: ['.1', '.2', '.3', '.4', '.5', '.6', '.7', '.8', '.9']
|
- extensions: ['.1', '.2', '.3', '.4', '.5', '.6', '.7', '.8', '.9']
|
||||||
rules:
|
rules:
|
||||||
- language: Roff Manpage
|
- language: man
|
||||||
and:
|
and:
|
||||||
- named_pattern: mdoc-date
|
- named_pattern: mdoc-date
|
||||||
- named_pattern: mdoc-title
|
- named_pattern: mdoc-title
|
||||||
- named_pattern: mdoc-heading
|
- named_pattern: mdoc-heading
|
||||||
- language: Roff Manpage
|
- language: man
|
||||||
and:
|
and:
|
||||||
- named_pattern: man-title
|
- named_pattern: man-title
|
||||||
- named_pattern: man-heading
|
- named_pattern: man-heading
|
||||||
@ -43,12 +43,12 @@ disambiguations:
|
|||||||
pattern: '^\.(?:[A-Za-z]{2}(?:\s|$)|\\")'
|
pattern: '^\.(?:[A-Za-z]{2}(?:\s|$)|\\")'
|
||||||
- extensions: ['.1in', '.1m', '.1x', '.3in', '.3m', '.3p', '.3pm', '.3qt', '.3x', '.man', '.mdoc']
|
- extensions: ['.1in', '.1m', '.1x', '.3in', '.3m', '.3p', '.3pm', '.3qt', '.3x', '.man', '.mdoc']
|
||||||
rules:
|
rules:
|
||||||
- language: Roff Manpage
|
- language: man
|
||||||
and:
|
and:
|
||||||
- named_pattern: mdoc-date
|
- named_pattern: mdoc-date
|
||||||
- named_pattern: mdoc-title
|
- named_pattern: mdoc-title
|
||||||
- named_pattern: mdoc-heading
|
- named_pattern: mdoc-heading
|
||||||
- language: Roff Manpage
|
- language: man
|
||||||
and:
|
and:
|
||||||
- named_pattern: man-title
|
- named_pattern: man-title
|
||||||
- named_pattern: man-heading
|
- named_pattern: man-heading
|
@ -52,6 +52,6 @@ with open("src/constants/lexers.cr", "w") as f:
|
|||||||
f.write(" LEXERS_BY_FILENAME = {\n")
|
f.write(" LEXERS_BY_FILENAME = {\n")
|
||||||
for k in sorted(lexer_by_filename.keys()):
|
for k in sorted(lexer_by_filename.keys()):
|
||||||
v = lexer_by_filename[k]
|
v = lexer_by_filename[k]
|
||||||
f.write(f'"{k}" => {str(list(v)).replace("'", "\"")}, \n')
|
f.write(f'"{k}" => {str(sorted(list(v))).replace("'", "\"")}, \n')
|
||||||
f.write("}\n")
|
f.write("}\n")
|
||||||
f.write("end\n")
|
f.write("end\n")
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
name: tartrazine
|
name: tartrazine
|
||||||
version: 0.6.0
|
version: 0.6.1
|
||||||
|
|
||||||
authors:
|
authors:
|
||||||
- Roberto Alsina <roberto.alsina@gmail.com>
|
- Roberto Alsina <roberto.alsina@gmail.com>
|
||||||
@ -10,11 +10,13 @@ targets:
|
|||||||
|
|
||||||
dependencies:
|
dependencies:
|
||||||
baked_file_system:
|
baked_file_system:
|
||||||
github: schovi/baked_file_system
|
github: ralsina/baked_file_system
|
||||||
|
branch: master
|
||||||
base58:
|
base58:
|
||||||
github: crystal-china/base58.cr
|
github: crystal-china/base58.cr
|
||||||
sixteen:
|
sixteen:
|
||||||
github: ralsina/sixteen
|
github: ralsina/sixteen
|
||||||
|
branch: main
|
||||||
docopt:
|
docopt:
|
||||||
github: chenkovsky/docopt.cr
|
github: chenkovsky/docopt.cr
|
||||||
|
|
||||||
|
@ -328,6 +328,7 @@ module Tartrazine
|
|||||||
"restructuredtext" => "rst",
|
"restructuredtext" => "rst",
|
||||||
"rexx" => "rexx",
|
"rexx" => "rexx",
|
||||||
"rkt" => "racket",
|
"rkt" => "racket",
|
||||||
|
"roff" => "groff",
|
||||||
"rpmspec" => "rpm_spec",
|
"rpmspec" => "rpm_spec",
|
||||||
"rs" => "rust",
|
"rs" => "rust",
|
||||||
"rst" => "rst",
|
"rst" => "rst",
|
||||||
@ -730,8 +731,8 @@ module Tartrazine
|
|||||||
"*.applescript" => ["applescript"],
|
"*.applescript" => ["applescript"],
|
||||||
"*.aql" => ["arangodb_aql"],
|
"*.aql" => ["arangodb_aql"],
|
||||||
"*.arexx" => ["rexx"],
|
"*.arexx" => ["rexx"],
|
||||||
"*.as" => ["actionscript_3", "actionscript"],
|
"*.as" => ["actionscript", "actionscript_3"],
|
||||||
"*.asm" => ["tasm", "nasm", "z80_assembly"],
|
"*.asm" => ["nasm", "tasm", "z80_assembly"],
|
||||||
"*.au3" => ["autoit"],
|
"*.au3" => ["autoit"],
|
||||||
"*.automount" => ["systemd"],
|
"*.automount" => ["systemd"],
|
||||||
"*.aux" => ["tex"],
|
"*.aux" => ["tex"],
|
||||||
@ -739,7 +740,7 @@ module Tartrazine
|
|||||||
"*.awk" => ["awk"],
|
"*.awk" => ["awk"],
|
||||||
"*.b" => ["brainfuck"],
|
"*.b" => ["brainfuck"],
|
||||||
"*.bal" => ["ballerina"],
|
"*.bal" => ["ballerina"],
|
||||||
"*.bas" => ["vb_net", "qbasic"],
|
"*.bas" => ["qbasic", "vb_net"],
|
||||||
"*.bash" => ["bash"],
|
"*.bash" => ["bash"],
|
||||||
"*.bat" => ["batchfile"],
|
"*.bat" => ["batchfile"],
|
||||||
"*.batch" => ["psl"],
|
"*.batch" => ["psl"],
|
||||||
@ -750,7 +751,7 @@ module Tartrazine
|
|||||||
"*.bnf" => ["bnf"],
|
"*.bnf" => ["bnf"],
|
||||||
"*.bqn" => ["bqn"],
|
"*.bqn" => ["bqn"],
|
||||||
"*.bzl" => ["python"],
|
"*.bzl" => ["python"],
|
||||||
"*.c" => ["c++", "c"],
|
"*.c" => ["c", "c++"],
|
||||||
"*.c++" => ["c++"],
|
"*.c++" => ["c++"],
|
||||||
"*.capnp" => ["cap_n_proto"],
|
"*.capnp" => ["cap_n_proto"],
|
||||||
"*.cc" => ["c++"],
|
"*.cc" => ["c++"],
|
||||||
@ -839,7 +840,7 @@ module Tartrazine
|
|||||||
"*.fx" => ["hlsl"],
|
"*.fx" => ["hlsl"],
|
||||||
"*.fxh" => ["hlsl"],
|
"*.fxh" => ["hlsl"],
|
||||||
"*.fzn" => ["minizinc"],
|
"*.fzn" => ["minizinc"],
|
||||||
"*.gd" => ["gdscript3", "gdscript"],
|
"*.gd" => ["gdscript", "gdscript3"],
|
||||||
"*.gemspec" => ["ruby"],
|
"*.gemspec" => ["ruby"],
|
||||||
"*.geo" => ["glsl"],
|
"*.geo" => ["glsl"],
|
||||||
"*.gleam" => ["gleam"],
|
"*.gleam" => ["gleam"],
|
||||||
@ -849,7 +850,7 @@ module Tartrazine
|
|||||||
"*.graphql" => ["graphql"],
|
"*.graphql" => ["graphql"],
|
||||||
"*.graphqls" => ["graphql"],
|
"*.graphqls" => ["graphql"],
|
||||||
"*.groovy" => ["groovy"],
|
"*.groovy" => ["groovy"],
|
||||||
"*.h" => ["c++", "c", "objective-c"],
|
"*.h" => ["c", "c++", "objective-c"],
|
||||||
"*.h++" => ["c++"],
|
"*.h++" => ["c++"],
|
||||||
"*.ha" => ["hare"],
|
"*.ha" => ["hare"],
|
||||||
"*.handlebars" => ["handlebars"],
|
"*.handlebars" => ["handlebars"],
|
||||||
@ -872,7 +873,7 @@ module Tartrazine
|
|||||||
"*.idc" => ["c"],
|
"*.idc" => ["c"],
|
||||||
"*.idr" => ["idris"],
|
"*.idr" => ["idris"],
|
||||||
"*.ijs" => ["j"],
|
"*.ijs" => ["j"],
|
||||||
"*.inc" => ["objectpascal", "povray", "php", "sourcepawn"],
|
"*.inc" => ["objectpascal", "php", "povray", "sourcepawn"],
|
||||||
"*.inf" => ["ini"],
|
"*.inf" => ["ini"],
|
||||||
"*.ini" => ["ini"],
|
"*.ini" => ["ini"],
|
||||||
"*.ino" => ["arduino"],
|
"*.ino" => ["arduino"],
|
||||||
@ -898,13 +899,13 @@ module Tartrazine
|
|||||||
"*.lpk" => ["objectpascal"],
|
"*.lpk" => ["objectpascal"],
|
||||||
"*.lpr" => ["objectpascal"],
|
"*.lpr" => ["objectpascal"],
|
||||||
"*.lua" => ["lua"],
|
"*.lua" => ["lua"],
|
||||||
"*.m" => ["mathematica", "octave", "matlab", "objective-c", "mason"],
|
"*.m" => ["mason", "mathematica", "matlab", "objective-c", "octave"],
|
||||||
"*.ma" => ["mathematica"],
|
"*.ma" => ["mathematica"],
|
||||||
"*.mak" => ["makefile"],
|
"*.mak" => ["makefile"],
|
||||||
"*.man" => ["groff"],
|
"*.man" => ["groff"],
|
||||||
"*.mao" => ["mako"],
|
"*.mao" => ["mako"],
|
||||||
"*.markdown" => ["markdown"],
|
"*.markdown" => ["markdown"],
|
||||||
"*.mc" => ["monkeyc", "mason"],
|
"*.mc" => ["mason", "monkeyc"],
|
||||||
"*.mcfunction" => ["mcfunction"],
|
"*.mcfunction" => ["mcfunction"],
|
||||||
"*.md" => ["markdown"],
|
"*.md" => ["markdown"],
|
||||||
"*.metal" => ["metal"],
|
"*.metal" => ["metal"],
|
||||||
@ -961,7 +962,7 @@ module Tartrazine
|
|||||||
"*.pml" => ["promela"],
|
"*.pml" => ["promela"],
|
||||||
"*.pony" => ["pony"],
|
"*.pony" => ["pony"],
|
||||||
"*.pov" => ["povray"],
|
"*.pov" => ["povray"],
|
||||||
"*.pp" => ["puppet", "objectpascal"],
|
"*.pp" => ["objectpascal", "puppet"],
|
||||||
"*.pq" => ["powerquery"],
|
"*.pq" => ["powerquery"],
|
||||||
"*.pr" => ["promela"],
|
"*.pr" => ["promela"],
|
||||||
"*.prm" => ["promela"],
|
"*.prm" => ["promela"],
|
||||||
@ -1010,7 +1011,7 @@ module Tartrazine
|
|||||||
"*.rst" => ["rst"],
|
"*.rst" => ["rst"],
|
||||||
"*.rvt" => ["tcl"],
|
"*.rvt" => ["tcl"],
|
||||||
"*.rx" => ["rexx"],
|
"*.rx" => ["rexx"],
|
||||||
"*.s" => ["armasm", "r", "gas"],
|
"*.s" => ["armasm", "gas", "r"],
|
||||||
"*.sage" => ["python"],
|
"*.sage" => ["python"],
|
||||||
"*.sas" => ["sas"],
|
"*.sas" => ["sas"],
|
||||||
"*.sass" => ["sass"],
|
"*.sass" => ["sass"],
|
||||||
@ -1023,7 +1024,7 @@ module Tartrazine
|
|||||||
"*.scope" => ["systemd"],
|
"*.scope" => ["systemd"],
|
||||||
"*.scss" => ["scss"],
|
"*.scss" => ["scss"],
|
||||||
"*.sed" => ["sed"],
|
"*.sed" => ["sed"],
|
||||||
"*.service" => ["systemd", "ini"],
|
"*.service" => ["ini", "systemd"],
|
||||||
"*.sh" => ["bash"],
|
"*.sh" => ["bash"],
|
||||||
"*.sh-session" => ["bash_session"],
|
"*.sh-session" => ["bash_session"],
|
||||||
"*.sieve" => ["sieve"],
|
"*.sieve" => ["sieve"],
|
||||||
@ -1033,7 +1034,7 @@ module Tartrazine
|
|||||||
"*.smali" => ["smali"],
|
"*.smali" => ["smali"],
|
||||||
"*.sml" => ["standard_ml"],
|
"*.sml" => ["standard_ml"],
|
||||||
"*.snobol" => ["snobol"],
|
"*.snobol" => ["snobol"],
|
||||||
"*.socket" => ["systemd", "ini"],
|
"*.socket" => ["ini", "systemd"],
|
||||||
"*.sol" => ["solidity"],
|
"*.sol" => ["solidity"],
|
||||||
"*.sp" => ["sourcepawn"],
|
"*.sp" => ["sourcepawn"],
|
||||||
"*.sparql" => ["sparql"],
|
"*.sparql" => ["sparql"],
|
||||||
@ -1068,7 +1069,7 @@ module Tartrazine
|
|||||||
"*.tpl" => ["smarty"],
|
"*.tpl" => ["smarty"],
|
||||||
"*.tpp" => ["c++"],
|
"*.tpp" => ["c++"],
|
||||||
"*.trig" => ["psl"],
|
"*.trig" => ["psl"],
|
||||||
"*.ts" => ["typoscript", "typescript"],
|
"*.ts" => ["typescript", "typoscript"],
|
||||||
"*.tst" => ["scilab"],
|
"*.tst" => ["scilab"],
|
||||||
"*.tsx" => ["typescript"],
|
"*.tsx" => ["typescript"],
|
||||||
"*.ttl" => ["turtle"],
|
"*.ttl" => ["turtle"],
|
||||||
@ -1104,7 +1105,7 @@ module Tartrazine
|
|||||||
"*.xml" => ["xml"],
|
"*.xml" => ["xml"],
|
||||||
"*.xsd" => ["xml"],
|
"*.xsd" => ["xml"],
|
||||||
"*.xsl" => ["xml"],
|
"*.xsl" => ["xml"],
|
||||||
"*.xslt" => ["xml", "html"],
|
"*.xslt" => ["html", "xml"],
|
||||||
"*.yaml" => ["yaml"],
|
"*.yaml" => ["yaml"],
|
||||||
"*.yang" => ["yang"],
|
"*.yang" => ["yang"],
|
||||||
"*.yml" => ["yaml"],
|
"*.yml" => ["yaml"],
|
||||||
|
@ -11,7 +11,7 @@ module Tartrazine
|
|||||||
"#{i + 1}".rjust(4).ljust(5)
|
"#{i + 1}".rjust(4).ljust(5)
|
||||||
end
|
end
|
||||||
|
|
||||||
def format(text : String, lexer : Lexer) : String
|
def format(text : String, lexer : BaseLexer) : String
|
||||||
outp = String::Builder.new("")
|
outp = String::Builder.new("")
|
||||||
format(text, lexer, outp)
|
format(text, lexer, outp)
|
||||||
outp.to_s
|
outp.to_s
|
||||||
|
@ -1,13 +1,12 @@
|
|||||||
require "yaml"
|
require "yaml"
|
||||||
|
|
||||||
# Use linguist's heuristics to disambiguate between languages
|
# Use linguist's heuristics to disambiguate between languages
|
||||||
# This is *shamelessly* stolen from https://github.com/github-linguist/linguist
|
# This is *shamelessly* stolen from https://github.com/github-linguist/linguist
|
||||||
# and ported to Crystal. Deepest thanks to the authors of Linguist
|
# and ported to Crystal. Deepest thanks to the authors of Linguist
|
||||||
# for licensing it liberally.
|
# for licensing it liberally.
|
||||||
#
|
#
|
||||||
# Consider this code (c) 2017 GitHub, Inc. even if I wrote it.
|
# Consider this code (c) 2017 GitHub, Inc. even if I wrote it.
|
||||||
module Linguist
|
module Linguist
|
||||||
|
|
||||||
class Heuristic
|
class Heuristic
|
||||||
include YAML::Serializable
|
include YAML::Serializable
|
||||||
|
|
||||||
@ -80,7 +79,3 @@ require "yaml"
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
h = Linguist::Heuristic.from_yaml(File.read("heuristics/heuristics.yml"))
|
|
||||||
fname = "/usr/include/sqlite3.h"
|
|
||||||
p! h.run(fname, File.read(fname))
|
|
||||||
|
30
src/lexer.cr
30
src/lexer.cr
@ -9,13 +9,21 @@ module Tartrazine
|
|||||||
|
|
||||||
# Get the lexer object for a language name
|
# Get the lexer object for a language name
|
||||||
# FIXME: support mimetypes
|
# FIXME: support mimetypes
|
||||||
def self.lexer(name : String? = nil, filename : String? = nil) : BaseLexer
|
def self.lexer(name : String? = nil, filename : String? = nil, mimetype : String? = nil) : BaseLexer
|
||||||
return lexer_by_name(name) if name && name != "autodetect"
|
return lexer_by_name(name) if name && name != "autodetect"
|
||||||
return lexer_by_filename(filename) if filename
|
return lexer_by_filename(filename) if filename
|
||||||
|
return lexer_by_mimetype(mimetype) if mimetype
|
||||||
|
|
||||||
Lexer.from_xml(LexerFiles.get("/#{LEXERS_BY_NAME["plaintext"]}.xml").gets_to_end)
|
Lexer.from_xml(LexerFiles.get("/#{LEXERS_BY_NAME["plaintext"]}.xml").gets_to_end)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
private def self.lexer_by_mimetype(mimetype : String) : BaseLexer
|
||||||
|
lexer_file_name = LEXERS_BY_MIMETYPE.fetch(mimetype, nil)
|
||||||
|
raise Exception.new("Unknown mimetype: #{mimetype}") if lexer_file_name.nil?
|
||||||
|
|
||||||
|
Lexer.from_xml(LexerFiles.get("/#{lexer_file_name}.xml").gets_to_end)
|
||||||
|
end
|
||||||
|
|
||||||
private def self.lexer_by_name(name : String) : BaseLexer
|
private def self.lexer_by_name(name : String) : BaseLexer
|
||||||
lexer_file_name = LEXERS_BY_NAME.fetch(name.downcase, nil)
|
lexer_file_name = LEXERS_BY_NAME.fetch(name.downcase, nil)
|
||||||
return create_delegating_lexer(name) if lexer_file_name.nil? && name.includes? "+"
|
return create_delegating_lexer(name) if lexer_file_name.nil? && name.includes? "+"
|
||||||
@ -36,12 +44,30 @@ module Tartrazine
|
|||||||
when 1
|
when 1
|
||||||
lexer_file_name = candidates.first
|
lexer_file_name = candidates.first
|
||||||
else
|
else
|
||||||
raise Exception.new("Multiple lexers match the filename: #{candidates.to_a.join(", ")}")
|
lexer_file_name = self.lexer_by_content(filename)
|
||||||
|
begin
|
||||||
|
return self.lexer(lexer_file_name)
|
||||||
|
rescue ex : Exception
|
||||||
|
raise Exception.new("Multiple lexers match the filename: #{candidates.to_a.join(", ")}, heuristics suggest #{lexer_file_name} but there is no matching lexer.")
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
Lexer.from_xml(LexerFiles.get("/#{lexer_file_name}.xml").gets_to_end)
|
Lexer.from_xml(LexerFiles.get("/#{lexer_file_name}.xml").gets_to_end)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
private def self.lexer_by_content(fname : String) : String?
|
||||||
|
h = Linguist::Heuristic.from_yaml(LexerFiles.get("/heuristics.yml").gets_to_end)
|
||||||
|
result = h.run(fname, File.read(fname))
|
||||||
|
case result
|
||||||
|
when Nil
|
||||||
|
raise Exception.new "No lexer found for #{fname}"
|
||||||
|
when String
|
||||||
|
result.as(String)
|
||||||
|
when Array(String)
|
||||||
|
result.first
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
private def self.create_delegating_lexer(name : String) : BaseLexer
|
private def self.create_delegating_lexer(name : String) : BaseLexer
|
||||||
language, root = name.split("+", 2)
|
language, root = name.split("+", 2)
|
||||||
language_lexer = lexer(language)
|
language_lexer = lexer(language)
|
||||||
|
Reference in New Issue
Block a user