mirror of
https://github.com/ralsina/tartrazine.git
synced 2025-07-01 12:27:08 -03:00
Compare commits
33 Commits
Author | SHA1 | Date | |
---|---|---|---|
8d7faf2098 | |||
2e87762f1b | |||
88f5674917 | |||
ce6f3d29b5 | |||
46d6d3f467 | |||
78ddc69937 | |||
b1ad7b64c0 | |||
cbedf8a8db | |||
ec8c53c823 | |||
e3a1ce37b4 | |||
b4f38e00e1 | |||
08daabe1c3 | |||
e8d405fc99 | |||
e295256573 | |||
e40c8b586c | |||
bc34f93cc5 | |||
f64c91801e | |||
8e29500fcf | |||
f2e638ce3b | |||
84ee7e6934 | |||
89d212b71c | |||
a92d2501f7 | |||
6b44bcb5ad | |||
86a5894429 | |||
be12e0f4f1 | |||
96dcb7e15e | |||
d1762f477a | |||
f98f44365f | |||
d0c2b1764a | |||
e6a292ade0 | |||
4ced996f90 | |||
fd5af6ba3b | |||
47237eecc3 |
106
.ameba.yml
106
.ameba.yml
@ -1,5 +1,5 @@
|
||||
# This configuration file was generated by `ameba --gen-config`
|
||||
# on 2024-08-04 23:09:09 UTC using Ameba version 1.6.1.
|
||||
# on 2024-08-12 22:00:49 UTC using Ameba version 1.6.1.
|
||||
# The point is for the user to remove these configuration records
|
||||
# one by one as the reported problems are removed from the code base.
|
||||
|
||||
@ -9,7 +9,7 @@ Documentation/DocumentationAdmonition:
|
||||
Description: Reports documentation admonitions
|
||||
Timezone: UTC
|
||||
Excluded:
|
||||
- src/tartrazine.cr
|
||||
- src/lexer.cr
|
||||
- src/actions.cr
|
||||
Admonitions:
|
||||
- TODO
|
||||
@ -17,3 +17,105 @@ Documentation/DocumentationAdmonition:
|
||||
- BUG
|
||||
Enabled: true
|
||||
Severity: Warning
|
||||
|
||||
# Problems found: 22
|
||||
# Run `ameba --only Lint/MissingBlockArgument` for details
|
||||
Lint/MissingBlockArgument:
|
||||
Description: Disallows yielding method definitions without block argument
|
||||
Excluded:
|
||||
- pygments/tests/examplefiles/cr/test.cr
|
||||
Enabled: true
|
||||
Severity: Warning
|
||||
|
||||
# Problems found: 1
|
||||
# Run `ameba --only Lint/NotNil` for details
|
||||
Lint/NotNil:
|
||||
Description: Identifies usage of `not_nil!` calls
|
||||
Excluded:
|
||||
- pygments/tests/examplefiles/cr/test.cr
|
||||
Enabled: true
|
||||
Severity: Warning
|
||||
|
||||
# Problems found: 34
|
||||
# Run `ameba --only Lint/ShadowingOuterLocalVar` for details
|
||||
Lint/ShadowingOuterLocalVar:
|
||||
Description: Disallows the usage of the same name as outer local variables for block
|
||||
or proc arguments
|
||||
Excluded:
|
||||
- pygments/tests/examplefiles/cr/test.cr
|
||||
Enabled: true
|
||||
Severity: Warning
|
||||
|
||||
# Problems found: 1
|
||||
# Run `ameba --only Lint/UnreachableCode` for details
|
||||
Lint/UnreachableCode:
|
||||
Description: Reports unreachable code
|
||||
Excluded:
|
||||
- pygments/tests/examplefiles/cr/test.cr
|
||||
Enabled: true
|
||||
Severity: Warning
|
||||
|
||||
# Problems found: 6
|
||||
# Run `ameba --only Lint/UselessAssign` for details
|
||||
Lint/UselessAssign:
|
||||
Description: Disallows useless variable assignments
|
||||
ExcludeTypeDeclarations: false
|
||||
Excluded:
|
||||
- pygments/tests/examplefiles/cr/test.cr
|
||||
Enabled: true
|
||||
Severity: Warning
|
||||
|
||||
# Problems found: 3
|
||||
# Run `ameba --only Naming/BlockParameterName` for details
|
||||
Naming/BlockParameterName:
|
||||
Description: Disallows non-descriptive block parameter names
|
||||
MinNameLength: 3
|
||||
AllowNamesEndingInNumbers: true
|
||||
Excluded:
|
||||
- pygments/tests/examplefiles/cr/test.cr
|
||||
AllowedNames:
|
||||
- _
|
||||
- e
|
||||
- i
|
||||
- j
|
||||
- k
|
||||
- v
|
||||
- x
|
||||
- y
|
||||
- ex
|
||||
- io
|
||||
- ws
|
||||
- op
|
||||
- tx
|
||||
- id
|
||||
- ip
|
||||
- k1
|
||||
- k2
|
||||
- v1
|
||||
- v2
|
||||
ForbiddenNames: []
|
||||
Enabled: true
|
||||
Severity: Convention
|
||||
|
||||
# Problems found: 1
|
||||
# Run `ameba --only Naming/RescuedExceptionsVariableName` for details
|
||||
Naming/RescuedExceptionsVariableName:
|
||||
Description: Makes sure that rescued exceptions variables are named as expected
|
||||
Excluded:
|
||||
- pygments/tests/examplefiles/cr/test.cr
|
||||
AllowedNames:
|
||||
- e
|
||||
- ex
|
||||
- exception
|
||||
- error
|
||||
Enabled: true
|
||||
Severity: Convention
|
||||
|
||||
# Problems found: 6
|
||||
# Run `ameba --only Naming/TypeNames` for details
|
||||
Naming/TypeNames:
|
||||
Description: Enforces type names in camelcase manner
|
||||
Excluded:
|
||||
- pygments/tests/examplefiles/cr/test.cr
|
||||
Enabled: true
|
||||
Severity: Convention
|
||||
|
2
Makefile
2
Makefile
@ -1,5 +1,5 @@
|
||||
build: $(wildcard src/**/*.cr) $(wildcard lexers/*xml) $(wildcard styles/*xml) shard.yml
|
||||
shards build -Dstrict_multi_assign -Dno_number_autocast
|
||||
shards build -Dstrict_multi_assign -Dno_number_autocast -d --error-trace
|
||||
release: $(wildcard src/**/*.cr) $(wildcard lexers/*xml) $(wildcard styles/*xml) shard.yml
|
||||
shards build --release
|
||||
static: $(wildcard src/**/*.cr) $(wildcard lexers/*xml) $(wildcard styles/*xml) shard.yml
|
||||
|
15
README.md
15
README.md
@ -31,14 +31,21 @@ is a subset of Pygments'.
|
||||
|
||||
Currently Tartrazine supports ... 241 languages.
|
||||
|
||||
It has 332 themes (64 from Chroma, the rest are base16 themes via
|
||||
It has 331 themes (63 from Chroma, the rest are base16 themes via
|
||||
[Sixteen](https://github.com/ralsina/sixteen)
|
||||
|
||||
## Installation
|
||||
|
||||
This will have a CLI tool that can be installed, but it's not
|
||||
there yet.
|
||||
From prebuilt binaries:
|
||||
|
||||
Each release provides statically-linked binaries that should
|
||||
work on any Linux. Get them from the [releases page](https://github.com/ralsina/tartrazine/releases) and put them in your PATH.
|
||||
|
||||
To build from source:
|
||||
|
||||
1. Clone this repo
|
||||
2. Run `make` to build the `tartrazine` binary
|
||||
3. Copy the binary somewhere in your PATH.
|
||||
|
||||
## Usage
|
||||
|
||||
@ -62,4 +69,4 @@ puts Tartrazine::Html.new.format(File.read(ARGV[0]), lexer, theme)
|
||||
|
||||
## Contributors
|
||||
|
||||
- [Roberto Alsina](https://github.com/ralsina) - creator and maintainer
|
||||
- [Roberto Alsina](https://github.com/ralsina) - creator and maintainer
|
10
TODO.md
10
TODO.md
@ -2,6 +2,10 @@
|
||||
|
||||
## TODO
|
||||
|
||||
* Implement styles
|
||||
* Implement formatters
|
||||
* Implement lexer loader that respects aliases, etc
|
||||
* ✅ Implement styles
|
||||
* ✅ Implement formatters
|
||||
* ✅ Implement CLI
|
||||
* ✅ Implement lexer loader that respects aliases
|
||||
* ✅ Implement lexer loader by file extension
|
||||
* ✅ Add --line-numbers to terminal formatter
|
||||
* Implement lexer loader by mime type
|
54
scripts/lexer_metadata.py
Normal file
54
scripts/lexer_metadata.py
Normal file
@ -0,0 +1,54 @@
|
||||
# This script parses the metadata of all the lexers and generates
|
||||
# a datafile with all the information so we don't have to instantiate
|
||||
# all the lexers to get the information.
|
||||
|
||||
import glob
|
||||
from collections import defaultdict
|
||||
|
||||
lexer_by_name = {}
|
||||
lexer_by_mimetype = defaultdict(set)
|
||||
lexer_by_filename = defaultdict(set)
|
||||
|
||||
|
||||
for fname in glob.glob("lexers/*.xml"):
|
||||
aliases = set([])
|
||||
mimetypes = set([])
|
||||
filenames = set([])
|
||||
print(fname)
|
||||
with open(fname) as f:
|
||||
lexer_name = fname.split("/")[-1].split(".")[0]
|
||||
for line in f:
|
||||
if "</config" in line:
|
||||
break
|
||||
if "<filename>" in line:
|
||||
filenames.add(line.split(">")[1].split("<")[0].lower())
|
||||
if "<mime_type>" in line:
|
||||
mimetypes.add(line.split(">")[1].split("<")[0].lower())
|
||||
if "<alias>" in line:
|
||||
aliases.add(line.split(">")[1].split("<")[0].lower())
|
||||
if "<name>" in line:
|
||||
aliases.add(line.split(">")[1].split("<")[0].lower())
|
||||
for alias in aliases:
|
||||
if alias in lexer_by_name and alias != lexer_by_name[alias]:
|
||||
raise Exception(f"Alias {alias} already in use by {lexer_by_name[alias]}")
|
||||
lexer_by_name[alias] = lexer_name
|
||||
for mimetype in mimetypes:
|
||||
lexer_by_mimetype[mimetype] = lexer_name
|
||||
for filename in filenames:
|
||||
lexer_by_filename[filename].add(lexer_name)
|
||||
|
||||
with open("src/constants/lexers.cr", "w") as f:
|
||||
f.write("module Tartrazine\n")
|
||||
f.write(" LEXERS_BY_NAME = {\n")
|
||||
for k, v in lexer_by_name.items():
|
||||
f.write(f'"{k}" => "{v}", \n')
|
||||
f.write("}\n")
|
||||
f.write(" LEXERS_BY_MIMETYPE = {\n")
|
||||
for k, v in lexer_by_mimetype.items():
|
||||
f.write(f'"{k}" => "{v}", \n')
|
||||
f.write("}\n")
|
||||
f.write(" LEXERS_BY_FILENAME = {\n")
|
||||
for k, v in lexer_by_filename.items():
|
||||
f.write(f'"{k}" => {str(list(v)).replace("'", "\"")}, \n')
|
||||
f.write("}\n")
|
||||
f.write("end\n")
|
@ -1,24 +1,55 @@
|
||||
# Script to generate abbreviations for tokens. Parses all lexers
|
||||
# and styles files to find all token names and generate a unique
|
||||
# abbreviation for each one. The abbreviations are generated by
|
||||
# taking the uppercase letters of the token name and converting
|
||||
# them to lowercase. If the abbreviation is not unique, the script
|
||||
# will print a warning and exit.
|
||||
|
||||
import sys
|
||||
import string
|
||||
import glob
|
||||
|
||||
# Run it as grep token lexers/* | python scripts/token_abbrevs.py
|
||||
|
||||
tokens = {"Highlight"}
|
||||
abbrevs = {"Highlight": "hl"}
|
||||
|
||||
def abbr(line):
|
||||
return "".join(c for c in line if c in string.ascii_uppercase).lower()
|
||||
|
||||
abbrevs = {}
|
||||
tokens = set([])
|
||||
for line in sys.stdin:
|
||||
if "<token" not in line:
|
||||
continue
|
||||
line = line.strip()
|
||||
line = line.split('<token ',1)[-1]
|
||||
line = line.split('"')[1]
|
||||
abbrevs[line] = abbr(line)
|
||||
tokens.add(line)
|
||||
def check_abbrevs():
|
||||
if len(abbrevs) != len(tokens):
|
||||
print("Warning: Abbreviations are not unique")
|
||||
print(len(abbrevs), len(tokens))
|
||||
sys.exit(1)
|
||||
|
||||
print("Abbreviations: {")
|
||||
for k, v in abbrevs.items():
|
||||
print(f' "{k}" => "{v}",')
|
||||
print("}")
|
||||
# Processes all files in lexers looking for token names
|
||||
for fname in glob.glob("lexers/*.xml"):
|
||||
with open(fname) as f:
|
||||
for line in f:
|
||||
if "<token" not in line:
|
||||
continue
|
||||
line = line.strip()
|
||||
line = line.split('<token ',1)[-1]
|
||||
line = line.split('"')[1]
|
||||
abbrevs[line] = abbr(line)
|
||||
tokens.add(line)
|
||||
check_abbrevs()
|
||||
|
||||
# Processes all files in styles looking for token names too
|
||||
for fname in glob.glob("styles/*.xml"):
|
||||
with open(fname) as f:
|
||||
for line in f:
|
||||
if "<entry" not in line:
|
||||
continue
|
||||
line = line.strip()
|
||||
line = line.split('type=',1)[-1]
|
||||
line = line.split('"')[1]
|
||||
abbrevs[line] = abbr(line)
|
||||
tokens.add(line)
|
||||
check_abbrevs()
|
||||
|
||||
with open ("src/constants/token_abbrevs.cr", "w") as outf:
|
||||
outf.write("module Tartrazine\n")
|
||||
outf.write(" Abbreviations = {\n")
|
||||
for k in sorted(abbrevs.keys()):
|
||||
outf.write(f' "{k}" => "{abbrevs[k]}",\n')
|
||||
outf.write(" }\nend\n")
|
@ -1,5 +1,5 @@
|
||||
name: tartrazine
|
||||
version: 0.1.1
|
||||
version: 0.3.0
|
||||
|
||||
authors:
|
||||
- Roberto Alsina <roberto.alsina@gmail.com>
|
||||
@ -15,6 +15,8 @@ dependencies:
|
||||
github: crystal-china/base58.cr
|
||||
sixteen:
|
||||
github: ralsina/sixteen
|
||||
docopt:
|
||||
github: chenkovsky/docopt.cr
|
||||
|
||||
crystal: ">= 1.13.0"
|
||||
|
||||
|
@ -1,5 +1,4 @@
|
||||
require "./actions"
|
||||
require "./constants"
|
||||
require "./formatter"
|
||||
require "./rules"
|
||||
require "./styles"
|
||||
|
1160
src/constants/lexers.cr
Normal file
1160
src/constants/lexers.cr
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,92 +1,100 @@
|
||||
module Tartrazine
|
||||
Abbreviations = {
|
||||
"Background" => "b",
|
||||
"Text" => "t",
|
||||
"CodeLine" => "cl",
|
||||
"Comment" => "c",
|
||||
"CommentHashbang" => "ch",
|
||||
"CommentMultiline" => "cm",
|
||||
"CommentPreproc" => "cp",
|
||||
"CommentPreprocFile" => "cpf",
|
||||
"CommentSingle" => "cs",
|
||||
"CommentSpecial" => "cs",
|
||||
"NameVariable" => "nv",
|
||||
"Keyword" => "k",
|
||||
"NameFunction" => "nf",
|
||||
"Punctuation" => "p",
|
||||
"Operator" => "o",
|
||||
"LiteralNumberInteger" => "lni",
|
||||
"NameBuiltin" => "nb",
|
||||
"Name" => "n",
|
||||
"OperatorWord" => "ow",
|
||||
"LiteralStringSingle" => "lss",
|
||||
"Literal" => "l",
|
||||
"NameClass" => "nc",
|
||||
"CommentMultiline" => "cm",
|
||||
"LiteralStringRegex" => "lsr",
|
||||
"KeywordDeclaration" => "kd",
|
||||
"KeywordConstant" => "kc",
|
||||
"NameOther" => "no",
|
||||
"LiteralNumberFloat" => "lnf",
|
||||
"LiteralNumberHex" => "lnh",
|
||||
"LiteralStringDouble" => "lsd",
|
||||
"KeywordType" => "kt",
|
||||
"NameNamespace" => "nn",
|
||||
"NameAttribute" => "na",
|
||||
"KeywordReserved" => "kr",
|
||||
"CommentPreproc" => "cp",
|
||||
"KeywordNamespace" => "kn",
|
||||
"NameConstant" => "nc",
|
||||
"NameLabel" => "nl",
|
||||
"LiteralString" => "ls",
|
||||
"LiteralStringChar" => "lsc",
|
||||
"TextWhitespace" => "tw",
|
||||
"LiteralStringEscape" => "lse",
|
||||
"LiteralNumber" => "ln",
|
||||
"Other" => "o",
|
||||
"LiteralStringBoolean" => "lsb",
|
||||
"NameProperty" => "np",
|
||||
"Comment" => "c",
|
||||
"NameTag" => "nt",
|
||||
"LiteralStringOther" => "lso",
|
||||
"NameVariableGlobal" => "nvg",
|
||||
"NameBuiltinPseudo" => "nbp",
|
||||
"LiteralNumberBin" => "lnb",
|
||||
"KeywordPseudo" => "kp",
|
||||
"CommentPreprocFile" => "cpf",
|
||||
"LiteralStringAffix" => "lsa",
|
||||
"LiteralStringDelimiter" => "lsd",
|
||||
"LiteralNumberOct" => "lno",
|
||||
"Error" => "e",
|
||||
"Generic" => "g",
|
||||
"LiteralNumberIntegerLong" => "lnil",
|
||||
"NameDecorator" => "nd",
|
||||
"LiteralStringInterpol" => "lsi",
|
||||
"LiteralStringBacktick" => "lsb",
|
||||
"GenericPrompt" => "gp",
|
||||
"GenericOutput" => "go",
|
||||
"LiteralStringName" => "lsn",
|
||||
"LiteralStringHeredoc" => "lsh",
|
||||
"LiteralStringSymbol" => "lss",
|
||||
"NameVariableInstance" => "nvi",
|
||||
"LiteralOther" => "lo",
|
||||
"NameVariableClass" => "nvc",
|
||||
"NameOperator" => "no",
|
||||
"None" => "n",
|
||||
"LiteralStringDoc" => "lsd",
|
||||
"NameException" => "ne",
|
||||
"GenericSubheading" => "gs",
|
||||
"GenericStrong" => "gs",
|
||||
"GenericDeleted" => "gd",
|
||||
"GenericInserted" => "gi",
|
||||
"GenericHeading" => "gh",
|
||||
"NameEntity" => "ne",
|
||||
"NamePseudo" => "np",
|
||||
"CommentHashbang" => "ch",
|
||||
"TextPunctuation" => "tp",
|
||||
"NameVariableAnonymous" => "nva",
|
||||
"NameVariableMagic" => "nvm",
|
||||
"NameFunctionMagic" => "nfm",
|
||||
"GenericEmph" => "ge",
|
||||
"GenericUnderline" => "gu",
|
||||
"LiteralStringAtom" => "lsa",
|
||||
"LiteralDate" => "ld",
|
||||
"GenericError" => "ge",
|
||||
"TextSymbol" => "ts",
|
||||
"GenericHeading" => "gh",
|
||||
"GenericInserted" => "gi",
|
||||
"GenericOutput" => "go",
|
||||
"GenericPrompt" => "gp",
|
||||
"GenericStrong" => "gs",
|
||||
"GenericSubheading" => "gs",
|
||||
"GenericTraceback" => "gt",
|
||||
"GenericUnderline" => "gu",
|
||||
"Highlight" => "hl",
|
||||
"Keyword" => "k",
|
||||
"KeywordConstant" => "kc",
|
||||
"KeywordDeclaration" => "kd",
|
||||
"KeywordNamespace" => "kn",
|
||||
"KeywordPseudo" => "kp",
|
||||
"KeywordReserved" => "kr",
|
||||
"KeywordType" => "kt",
|
||||
"LineHighlight" => "lh",
|
||||
"LineNumbers" => "ln",
|
||||
"LineNumbersTable" => "lnt",
|
||||
"LineTable" => "lt",
|
||||
"LineTableTD" => "lttd",
|
||||
"Literal" => "l",
|
||||
"LiteralDate" => "ld",
|
||||
"LiteralNumber" => "ln",
|
||||
"LiteralNumberBin" => "lnb",
|
||||
"LiteralNumberFloat" => "lnf",
|
||||
"LiteralNumberHex" => "lnh",
|
||||
"LiteralNumberInteger" => "lni",
|
||||
"LiteralNumberIntegerLong" => "lnil",
|
||||
"LiteralNumberOct" => "lno",
|
||||
"LiteralOther" => "lo",
|
||||
"LiteralString" => "ls",
|
||||
"LiteralStringAffix" => "lsa",
|
||||
"LiteralStringAtom" => "lsa",
|
||||
"LiteralStringBacktick" => "lsb",
|
||||
"LiteralStringBoolean" => "lsb",
|
||||
"LiteralStringChar" => "lsc",
|
||||
"LiteralStringDelimiter" => "lsd",
|
||||
"LiteralStringDoc" => "lsd",
|
||||
"LiteralStringDouble" => "lsd",
|
||||
"LiteralStringEscape" => "lse",
|
||||
"LiteralStringHeredoc" => "lsh",
|
||||
"LiteralStringInterpol" => "lsi",
|
||||
"LiteralStringName" => "lsn",
|
||||
"LiteralStringOther" => "lso",
|
||||
"LiteralStringRegex" => "lsr",
|
||||
"LiteralStringSingle" => "lss",
|
||||
"LiteralStringSymbol" => "lss",
|
||||
"Name" => "n",
|
||||
"NameAttribute" => "na",
|
||||
"NameBuiltin" => "nb",
|
||||
"NameBuiltinPseudo" => "nbp",
|
||||
"NameClass" => "nc",
|
||||
"NameConstant" => "nc",
|
||||
"NameDecorator" => "nd",
|
||||
"NameEntity" => "ne",
|
||||
"NameException" => "ne",
|
||||
"NameFunction" => "nf",
|
||||
"NameFunctionMagic" => "nfm",
|
||||
"NameKeyword" => "nk",
|
||||
"NameLabel" => "nl",
|
||||
"NameNamespace" => "nn",
|
||||
"NameOperator" => "no",
|
||||
"NameOther" => "no",
|
||||
"NameProperty" => "np",
|
||||
"NamePseudo" => "np",
|
||||
"NameTag" => "nt",
|
||||
"NameVariable" => "nv",
|
||||
"NameVariableAnonymous" => "nva",
|
||||
"NameVariableClass" => "nvc",
|
||||
"NameVariableGlobal" => "nvg",
|
||||
"NameVariableInstance" => "nvi",
|
||||
"NameVariableMagic" => "nvm",
|
||||
"None" => "n",
|
||||
"Operator" => "o",
|
||||
"OperatorWord" => "ow",
|
||||
"Other" => "o",
|
||||
"Punctuation" => "p",
|
||||
"Text" => "t",
|
||||
"TextPunctuation" => "tp",
|
||||
"TextSymbol" => "ts",
|
||||
"TextWhitespace" => "tw",
|
||||
}
|
||||
end
|
@ -1,5 +1,4 @@
|
||||
require "./actions"
|
||||
require "./constants"
|
||||
require "./formatter"
|
||||
require "./rules"
|
||||
require "./styles"
|
||||
@ -10,102 +9,16 @@ module Tartrazine
|
||||
# This is the base class for all formatters.
|
||||
abstract class Formatter
|
||||
property name : String = ""
|
||||
property theme : Theme = Tartrazine.theme("default-dark")
|
||||
|
||||
def format(text : String, lexer : Lexer, theme : Theme) : String
|
||||
# Format the text using the given lexer.
|
||||
def format(text : String, lexer : Lexer) : String
|
||||
raise Exception.new("Not implemented")
|
||||
end
|
||||
|
||||
def get_style_defs(theme : Theme) : String
|
||||
# Return the styles, if the formatter supports it.
|
||||
def style_defs : String
|
||||
raise Exception.new("Not implemented")
|
||||
end
|
||||
end
|
||||
|
||||
class Ansi < Formatter
|
||||
def format(text : String, lexer : Lexer, theme : Theme) : String
|
||||
output = String.build do |outp|
|
||||
lexer.tokenize(text).each do |token|
|
||||
outp << self.colorize(token[:value], token[:type], theme)
|
||||
end
|
||||
end
|
||||
output
|
||||
end
|
||||
|
||||
def colorize(text : String, token : String, theme : Theme) : String
|
||||
style = theme.styles.fetch(token, nil)
|
||||
return text if style.nil?
|
||||
if theme.styles.has_key?(token)
|
||||
s = theme.styles[token]
|
||||
else
|
||||
# Themes don't contain information for each specific
|
||||
# token type. However, they may contain information
|
||||
# for a parent style. Worst case, we go to the root
|
||||
# (Background) style.
|
||||
s = theme.styles[theme.style_parents(token).reverse.find { |parent|
|
||||
theme.styles.has_key?(parent)
|
||||
}]
|
||||
end
|
||||
colorized = text.colorize
|
||||
s.color.try { |c| colorized = colorized.fore(c.colorize) }
|
||||
# Intentionally not setting background color
|
||||
colorized.mode(:bold) if s.bold
|
||||
colorized.mode(:italic) if s.italic
|
||||
colorized.mode(:underline) if s.underline
|
||||
colorized.to_s
|
||||
end
|
||||
end
|
||||
|
||||
class Html < Formatter
|
||||
def format(text : String, lexer : Lexer, theme : Theme) : String
|
||||
output = String.build do |outp|
|
||||
outp << "<html><head><style>"
|
||||
outp << get_style_defs(theme)
|
||||
outp << "</style></head><body>"
|
||||
outp << "<pre class=\"#{get_css_class("Background", theme)}\"><code class=\"#{get_css_class("Background", theme)}\">"
|
||||
lexer.tokenize(text).each do |token|
|
||||
fragment = "<span class=\"#{get_css_class(token[:type], theme)}\">#{token[:value]}</span>"
|
||||
outp << fragment
|
||||
end
|
||||
outp << "</code></pre></body></html>"
|
||||
end
|
||||
output
|
||||
end
|
||||
|
||||
# ameba:disable Metrics/CyclomaticComplexity
|
||||
def get_style_defs(theme : Theme) : String
|
||||
output = String.build do |outp|
|
||||
theme.styles.each do |token, style|
|
||||
outp << ".#{get_css_class(token, theme)} {"
|
||||
# These are set or nil
|
||||
outp << "color: #{style.color.try &.hex};" if style.color
|
||||
outp << "background-color: #{style.background.try &.hex};" if style.background
|
||||
outp << "border: 1px solid #{style.border.try &.hex};" if style.border
|
||||
|
||||
# These are true/false/nil
|
||||
outp << "border: none;" if style.border == false
|
||||
outp << "font-weight: bold;" if style.bold
|
||||
outp << "font-weight: 400;" if style.bold == false
|
||||
outp << "font-style: italic;" if style.italic
|
||||
outp << "font-style: normal;" if style.italic == false
|
||||
outp << "text-decoration: underline;" if style.underline
|
||||
outp << "text-decoration: none;" if style.underline == false
|
||||
|
||||
outp << "}"
|
||||
end
|
||||
end
|
||||
output
|
||||
end
|
||||
|
||||
# Given a token type, return the CSS class to use.
|
||||
def get_css_class(token, theme)
|
||||
return Abbreviations[token] if theme.styles.has_key?(token)
|
||||
|
||||
# Themes don't contain information for each specific
|
||||
# token type. However, they may contain information
|
||||
# for a parent style. Worst case, we go to the root
|
||||
# (Background) style.
|
||||
Abbreviations[theme.style_parents(token).reverse.find { |parent|
|
||||
theme.styles.has_key?(parent)
|
||||
}]
|
||||
end
|
||||
end
|
||||
end
|
||||
|
46
src/formatters/ansi.cr
Normal file
46
src/formatters/ansi.cr
Normal file
@ -0,0 +1,46 @@
|
||||
require "../formatter"
|
||||
|
||||
module Tartrazine
|
||||
class Ansi < Formatter
|
||||
property? line_numbers : Bool = false
|
||||
|
||||
def initialize(@theme : Theme = Tartrazine.theme("default-dark"), @line_numbers : Bool = false)
|
||||
end
|
||||
|
||||
def format(text : String, lexer : Lexer) : String
|
||||
output = String.build do |outp|
|
||||
lexer.group_tokens_in_lines(lexer.tokenize(text)).each_with_index do |line, i|
|
||||
label = line_numbers? ? "#{i + 1}".rjust(4).ljust(5) : ""
|
||||
outp << label
|
||||
line.each do |token|
|
||||
outp << colorize(token[:value], token[:type])
|
||||
end
|
||||
end
|
||||
end
|
||||
output
|
||||
end
|
||||
|
||||
def colorize(text : String, token : String) : String
|
||||
style = theme.styles.fetch(token, nil)
|
||||
return text if style.nil?
|
||||
if theme.styles.has_key?(token)
|
||||
s = theme.styles[token]
|
||||
else
|
||||
# Themes don't contain information for each specific
|
||||
# token type. However, they may contain information
|
||||
# for a parent style. Worst case, we go to the root
|
||||
# (Background) style.
|
||||
s = theme.styles[theme.style_parents(token).reverse.find { |parent|
|
||||
theme.styles.has_key?(parent)
|
||||
}]
|
||||
end
|
||||
colorized = text.colorize
|
||||
s.color.try { |col| colorized = colorized.fore(col.colorize) }
|
||||
# Intentionally not setting background color
|
||||
colorized.mode(:bold) if s.bold
|
||||
colorized.mode(:italic) if s.italic
|
||||
colorized.mode(:underline) if s.underline
|
||||
colorized.to_s
|
||||
end
|
||||
end
|
||||
end
|
123
src/formatters/html.cr
Normal file
123
src/formatters/html.cr
Normal file
@ -0,0 +1,123 @@
|
||||
require "../constants/token_abbrevs.cr"
|
||||
require "../formatter"
|
||||
|
||||
module Tartrazine
|
||||
class Html < Formatter
|
||||
# property line_number_in_table : Bool = false
|
||||
# property with_classes : Bool = true
|
||||
property class_prefix : String = ""
|
||||
property highlight_lines : Array(Range(Int32, Int32)) = [] of Range(Int32, Int32)
|
||||
property line_number_id_prefix : String = "line-"
|
||||
property line_number_start : Int32 = 1
|
||||
property tab_width = 8
|
||||
property? line_numbers : Bool = false
|
||||
property? linkable_line_numbers : Bool = true
|
||||
property? standalone : Bool = false
|
||||
property? surrounding_pre : Bool = true
|
||||
property? wrap_long_lines : Bool = false
|
||||
property weight_of_bold : Int32 = 600
|
||||
|
||||
property theme : Theme
|
||||
|
||||
def initialize(@theme : Theme = Tartrazine.theme("default-dark"), *,
|
||||
@highlight_lines = [] of Range(Int32, Int32),
|
||||
@class_prefix : String = "",
|
||||
@line_number_id_prefix = "line-",
|
||||
@line_number_start = 1,
|
||||
@tab_width = 8,
|
||||
@line_numbers : Bool = false,
|
||||
@linkable_line_numbers : Bool = true,
|
||||
@standalone : Bool = false,
|
||||
@surrounding_pre : Bool = true,
|
||||
@wrap_long_lines : Bool = false,
|
||||
@weight_of_bold : Int32 = 600,)
|
||||
end
|
||||
|
||||
def format(text : String, lexer : Lexer) : String
|
||||
text = format_text(text, lexer)
|
||||
if standalone?
|
||||
text = wrap_standalone(text)
|
||||
end
|
||||
text
|
||||
end
|
||||
|
||||
# Wrap text into a full HTML document, including the CSS for the theme
|
||||
def wrap_standalone(text) : String
|
||||
output = String.build do |outp|
|
||||
outp << "<!DOCTYPE html><html><head><style>"
|
||||
outp << style_defs
|
||||
outp << "</style></head><body>"
|
||||
outp << text
|
||||
outp << "</body></html>"
|
||||
end
|
||||
output
|
||||
end
|
||||
|
||||
def format_text(text : String, lexer : Lexer) : String
|
||||
lines = lexer.group_tokens_in_lines(lexer.tokenize(text))
|
||||
output = String.build do |outp|
|
||||
if surrounding_pre?
|
||||
pre_style = wrap_long_lines? ? "style=\"white-space: pre-wrap; word-break: break-word;\"" : ""
|
||||
outp << "<pre class=\"#{get_css_class("Background")}\" #{pre_style}>"
|
||||
end
|
||||
outp << "<code class=\"#{get_css_class("Background")}\">"
|
||||
lines.each_with_index(offset: line_number_start - 1) do |line, i|
|
||||
line_label = line_numbers? ? "#{i + 1}".rjust(4).ljust(5) : ""
|
||||
line_class = highlighted?(i + 1) ? "class=\"#{get_css_class("LineHighlight")}\"" : ""
|
||||
line_id = linkable_line_numbers? ? "id=\"#{line_number_id_prefix}#{i + 1}\"" : ""
|
||||
outp << "<span #{line_id} #{line_class} style=\"user-select: none;\">#{line_label} </span>"
|
||||
line.each do |token|
|
||||
fragment = "<span class=\"#{get_css_class(token[:type])}\">#{token[:value]}</span>"
|
||||
outp << fragment
|
||||
end
|
||||
end
|
||||
outp << "</code></pre>"
|
||||
end
|
||||
output
|
||||
end
|
||||
|
||||
# ameba:disable Metrics/CyclomaticComplexity
|
||||
def style_defs : String
|
||||
output = String.build do |outp|
|
||||
theme.styles.each do |token, style|
|
||||
outp << ".#{get_css_class(token)} {"
|
||||
# These are set or nil
|
||||
outp << "color: ##{style.color.try &.hex};" if style.color
|
||||
outp << "background-color: ##{style.background.try &.hex};" if style.background
|
||||
outp << "border: 1px solid ##{style.border.try &.hex};" if style.border
|
||||
|
||||
# These are true/false/nil
|
||||
outp << "border: none;" if style.border == false
|
||||
outp << "font-weight: bold;" if style.bold
|
||||
outp << "font-weight: #{@weight_of_bold};" if style.bold == false
|
||||
outp << "font-style: italic;" if style.italic
|
||||
outp << "font-style: normal;" if style.italic == false
|
||||
outp << "text-decoration: underline;" if style.underline
|
||||
outp << "text-decoration: none;" if style.underline == false
|
||||
outp << "tab-size: #{tab_width};" if token == "Background"
|
||||
|
||||
outp << "}"
|
||||
end
|
||||
end
|
||||
output
|
||||
end
|
||||
|
||||
# Given a token type, return the CSS class to use.
|
||||
def get_css_class(token : String) : String
|
||||
return class_prefix + Abbreviations[token] if theme.styles.has_key?(token)
|
||||
|
||||
# Themes don't contain information for each specific
|
||||
# token type. However, they may contain information
|
||||
# for a parent style. Worst case, we go to the root
|
||||
# (Background) style.
|
||||
class_prefix + Abbreviations[theme.style_parents(token).reverse.find { |parent|
|
||||
theme.styles.has_key?(parent)
|
||||
}]
|
||||
end
|
||||
|
||||
# Is this line in the highlighted ranges?
|
||||
def highlighted?(line : Int) : Bool
|
||||
highlight_lines.any?(&.includes?(line))
|
||||
end
|
||||
end
|
||||
end
|
11
src/formatters/json.cr
Normal file
11
src/formatters/json.cr
Normal file
@ -0,0 +1,11 @@
|
||||
require "../formatter"
|
||||
|
||||
module Tartrazine
|
||||
class Json < Formatter
|
||||
property name = "json"
|
||||
|
||||
def format(text : String, lexer : Lexer, _theme : Theme) : String
|
||||
lexer.tokenize(text).to_json
|
||||
end
|
||||
end
|
||||
end
|
232
src/lexer.cr
Normal file
232
src/lexer.cr
Normal file
@ -0,0 +1,232 @@
|
||||
require "./constants/lexers"
|
||||
|
||||
module Tartrazine
|
||||
class LexerFiles
|
||||
extend BakedFileSystem
|
||||
|
||||
bake_folder "../lexers", __DIR__
|
||||
end
|
||||
|
||||
# Get the lexer object for a language name
|
||||
# FIXME: support mimetypes
|
||||
def self.lexer(name : String? = nil, filename : String? = nil) : Lexer
|
||||
if name.nil? && filename.nil?
|
||||
lexer_file_name = LEXERS_BY_NAME["plaintext"]
|
||||
elsif name && name != "autodetect"
|
||||
lexer_file_name = LEXERS_BY_NAME[name.downcase]
|
||||
else
|
||||
# Guess by filename
|
||||
candidates = Set(String).new
|
||||
LEXERS_BY_FILENAME.each do |k, v|
|
||||
candidates += v.to_set if File.match?(k, File.basename(filename.to_s))
|
||||
end
|
||||
case candidates.size
|
||||
when 0
|
||||
lexer_file_name = LEXERS_BY_NAME["plaintext"]
|
||||
when 1
|
||||
lexer_file_name = candidates.first
|
||||
else
|
||||
raise Exception.new("Multiple lexers match the filename: #{candidates.to_a.join(", ")}")
|
||||
end
|
||||
end
|
||||
Lexer.from_xml(LexerFiles.get("/#{lexer_file_name}.xml").gets_to_end)
|
||||
end
|
||||
|
||||
# Return a list of all lexers
|
||||
def self.lexers : Array(String)
|
||||
LEXERS_BY_NAME.keys.sort!
|
||||
end
|
||||
|
||||
# This implements a lexer for Pygments RegexLexers as expressed
|
||||
# in Chroma's XML serialization.
|
||||
#
|
||||
# For explanations on what actions and states do
|
||||
# the Pygments documentation is a good place to start.
|
||||
# https://pygments.org/docs/lexerdevelopment/
|
||||
class Lexer
|
||||
property config = {
|
||||
name: "",
|
||||
aliases: [] of String,
|
||||
filenames: [] of String,
|
||||
mime_types: [] of String,
|
||||
priority: 0.0,
|
||||
case_insensitive: false,
|
||||
dot_all: false,
|
||||
not_multiline: false,
|
||||
ensure_nl: false,
|
||||
}
|
||||
property xml : String = ""
|
||||
|
||||
property states = {} of String => State
|
||||
|
||||
property state_stack = ["root"]
|
||||
|
||||
# Turn the text into a list of tokens. The `usingself` parameter
|
||||
# is true when the lexer is being used to tokenize a string
|
||||
# from a larger text that is already being tokenized.
|
||||
# So, when it's true, we don't modify the text.
|
||||
def tokenize(text, usingself = false) : Array(Token)
|
||||
@state_stack = ["root"]
|
||||
tokens = [] of Token
|
||||
pos = 0
|
||||
matched = false
|
||||
|
||||
# Respect the `ensure_nl` config option
|
||||
if text.size > 0 && text[-1] != '\n' && config[:ensure_nl] && !usingself
|
||||
text += "\n"
|
||||
end
|
||||
|
||||
# Loop through the text, applying rules
|
||||
while pos < text.size
|
||||
state = states[@state_stack.last]
|
||||
# Log.trace { "Stack is #{@state_stack} State is #{state.name}, pos is #{pos}, text is #{text[pos..pos + 10]}" }
|
||||
state.rules.each do |rule|
|
||||
matched, new_pos, new_tokens = rule.match(text, pos, self)
|
||||
if matched
|
||||
# Move position forward, save the tokens,
|
||||
# tokenize from the new position
|
||||
# Log.trace { "MATCHED: #{rule.xml}" }
|
||||
pos = new_pos
|
||||
tokens += new_tokens
|
||||
break
|
||||
end
|
||||
# Log.trace { "NOT MATCHED: #{rule.xml}" }
|
||||
end
|
||||
# If no rule matches, emit an error token
|
||||
unless matched
|
||||
# Log.trace { "Error at #{pos}" }
|
||||
tokens << {type: "Error", value: "#{text[pos]}"}
|
||||
pos += 1
|
||||
end
|
||||
end
|
||||
Lexer.collapse_tokens(tokens)
|
||||
end
|
||||
|
||||
# Collapse consecutive tokens of the same type for easier comparison
|
||||
# and smaller output
|
||||
def self.collapse_tokens(tokens : Array(Tartrazine::Token)) : Array(Tartrazine::Token)
|
||||
result = [] of Tartrazine::Token
|
||||
tokens = tokens.reject { |token| token[:value] == "" }
|
||||
tokens.each do |token|
|
||||
if result.empty?
|
||||
result << token
|
||||
next
|
||||
end
|
||||
last = result.last
|
||||
if last[:type] == token[:type]
|
||||
new_token = {type: last[:type], value: last[:value] + token[:value]}
|
||||
result.pop
|
||||
result << new_token
|
||||
else
|
||||
result << token
|
||||
end
|
||||
end
|
||||
result
|
||||
end
|
||||
|
||||
# Group tokens into lines, splitting them when a newline is found
|
||||
def group_tokens_in_lines(tokens : Array(Token)) : Array(Array(Token))
|
||||
split_tokens = [] of Token
|
||||
tokens.each do |token|
|
||||
if token[:value].includes?("\n")
|
||||
values = token[:value].split("\n")
|
||||
values.each_with_index do |value, index|
|
||||
value += "\n" if index < values.size - 1
|
||||
split_tokens << {type: token[:type], value: value}
|
||||
end
|
||||
else
|
||||
split_tokens << token
|
||||
end
|
||||
end
|
||||
lines = [Array(Token).new]
|
||||
split_tokens.each do |token|
|
||||
lines.last << token
|
||||
if token[:value].includes?("\n")
|
||||
lines << Array(Token).new
|
||||
end
|
||||
end
|
||||
lines
|
||||
end
|
||||
|
||||
# ameba:disable Metrics/CyclomaticComplexity
|
||||
def self.from_xml(xml : String) : Lexer
|
||||
l = Lexer.new
|
||||
l.xml = xml
|
||||
lexer = XML.parse(xml).first_element_child
|
||||
if lexer
|
||||
config = lexer.children.find { |node|
|
||||
node.name == "config"
|
||||
}
|
||||
if config
|
||||
l.config = {
|
||||
name: xml_to_s(config, name) || "",
|
||||
aliases: xml_to_a(config, _alias) || [] of String,
|
||||
filenames: xml_to_a(config, filename) || [] of String,
|
||||
mime_types: xml_to_a(config, mime_type) || [] of String,
|
||||
priority: xml_to_f(config, priority) || 0.0,
|
||||
not_multiline: xml_to_s(config, not_multiline) == "true",
|
||||
dot_all: xml_to_s(config, dot_all) == "true",
|
||||
case_insensitive: xml_to_s(config, case_insensitive) == "true",
|
||||
ensure_nl: xml_to_s(config, ensure_nl) == "true",
|
||||
}
|
||||
end
|
||||
|
||||
rules = lexer.children.find { |node|
|
||||
node.name == "rules"
|
||||
}
|
||||
if rules
|
||||
# Rules contains states 🤷
|
||||
rules.children.select { |node|
|
||||
node.name == "state"
|
||||
}.each do |state_node|
|
||||
state = State.new
|
||||
state.name = state_node["name"]
|
||||
if l.states.has_key?(state.name)
|
||||
raise Exception.new("Duplicate state: #{state.name}")
|
||||
else
|
||||
l.states[state.name] = state
|
||||
end
|
||||
# And states contain rules 🤷
|
||||
state_node.children.select { |node|
|
||||
node.name == "rule"
|
||||
}.each do |rule_node|
|
||||
case rule_node["pattern"]?
|
||||
when nil
|
||||
if rule_node.first_element_child.try &.name == "include"
|
||||
rule = IncludeStateRule.new(rule_node)
|
||||
else
|
||||
rule = UnconditionalRule.new(rule_node)
|
||||
end
|
||||
else
|
||||
rule = Rule.new(rule_node,
|
||||
multiline: !l.config[:not_multiline],
|
||||
dotall: l.config[:dot_all],
|
||||
ignorecase: l.config[:case_insensitive])
|
||||
end
|
||||
state.rules << rule
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
l
|
||||
end
|
||||
end
|
||||
|
||||
# A Lexer state. A state has a name and a list of rules.
|
||||
# The state machine has a state stack containing references
|
||||
# to states to decide which rules to apply.
|
||||
class State
|
||||
property name : String = ""
|
||||
property rules = [] of Rule
|
||||
|
||||
def +(other : State)
|
||||
new_state = State.new
|
||||
new_state.name = Random.base58(8)
|
||||
new_state.rules = rules + other.rules
|
||||
new_state
|
||||
end
|
||||
end
|
||||
|
||||
# A token, the output of the tokenizer
|
||||
alias Token = NamedTuple(type: String, value: String)
|
||||
end
|
98
src/main.cr
98
src/main.cr
@ -1,5 +1,97 @@
|
||||
require "docopt"
|
||||
require "./**"
|
||||
|
||||
lexer = Tartrazine.lexer("crystal")
|
||||
theme = Tartrazine.theme(ARGV[1])
|
||||
puts Tartrazine::Html.new.format(File.read(ARGV[0]), lexer, theme)
|
||||
HELP = <<-HELP
|
||||
tartrazine: a syntax highlighting tool
|
||||
|
||||
Usage:
|
||||
tartrazine (-h, --help)
|
||||
tartrazine FILE -f html [-t theme][--standalone][--line-numbers]
|
||||
[-l lexer][-o output]
|
||||
tartrazine -f html -t theme --css
|
||||
tartrazine FILE -f terminal [-t theme][-l lexer][--line-numbers]
|
||||
[-o output]
|
||||
tartrazine FILE -f json [-o output]
|
||||
tartrazine --list-themes
|
||||
tartrazine --list-lexers
|
||||
tartrazine --list-formatters
|
||||
tartrazine --version
|
||||
|
||||
Options:
|
||||
-f <formatter> Format to use (html, terminal, json)
|
||||
-t <theme> Theme to use, see --list-themes [default: default-dark]
|
||||
-l <lexer> Lexer (language) to use, see --list-lexers [default: autodetect]
|
||||
-o <output> Output file. Default is stdout.
|
||||
--standalone Generate a standalone HTML file, which includes
|
||||
all style information. If not given, it will generate just
|
||||
a HTML fragment ready to include in your own page.
|
||||
--css Generate a CSS file for the theme called <theme>.css
|
||||
--line-numbers Include line numbers in the output
|
||||
-h, --help Show this screen
|
||||
-v, --version Show version number
|
||||
HELP
|
||||
|
||||
options = Docopt.docopt(HELP, ARGV)
|
||||
|
||||
# Handle version manually
|
||||
if options["--version"]
|
||||
puts "tartrazine #{Tartrazine::VERSION}"
|
||||
exit 0
|
||||
end
|
||||
|
||||
if options["--list-themes"]
|
||||
puts Tartrazine.themes.join("\n")
|
||||
exit 0
|
||||
end
|
||||
|
||||
if options["--list-lexers"]
|
||||
puts Tartrazine.lexers.join("\n")
|
||||
exit 0
|
||||
end
|
||||
|
||||
if options["--list-formatters"]
|
||||
puts "html\njson\nterminal"
|
||||
exit 0
|
||||
end
|
||||
|
||||
theme = Tartrazine.theme(options["-t"].as(String))
|
||||
|
||||
if options["-f"]
|
||||
formatter = options["-f"].as(String)
|
||||
case formatter
|
||||
when "html"
|
||||
formatter = Tartrazine::Html.new
|
||||
formatter.standalone = options["--standalone"] != nil
|
||||
formatter.line_numbers = options["--line-numbers"] != nil
|
||||
formatter.theme = theme
|
||||
when "terminal"
|
||||
formatter = Tartrazine::Ansi.new
|
||||
formatter.line_numbers = options["--line-numbers"] != nil
|
||||
formatter.theme = theme
|
||||
when "json"
|
||||
formatter = Tartrazine::Json.new
|
||||
else
|
||||
puts "Invalid formatter: #{formatter}"
|
||||
exit 1
|
||||
end
|
||||
|
||||
if formatter.is_a?(Tartrazine::Html) && options["--css"]
|
||||
File.open("#{options["-t"].as(String)}.css", "w") do |outf|
|
||||
outf.puts formatter.style_defs
|
||||
end
|
||||
exit 0
|
||||
end
|
||||
|
||||
lexer = Tartrazine.lexer(name: options["-l"].as(String), filename: options["FILE"].as(String))
|
||||
|
||||
input = File.open(options["FILE"].as(String)).gets_to_end
|
||||
output = formatter.format(input, lexer)
|
||||
|
||||
if options["-o"].nil?
|
||||
puts output
|
||||
else
|
||||
File.open(options["-o"].as(String), "w") do |outf|
|
||||
outf.puts output
|
||||
end
|
||||
end
|
||||
end
|
||||
|
41
src/rules.cr
41
src/rules.cr
@ -1,9 +1,8 @@
|
||||
require "./actions"
|
||||
require "./constants"
|
||||
require "./formatter"
|
||||
require "./rules"
|
||||
require "./styles"
|
||||
require "./tartrazine"
|
||||
require "./lexer"
|
||||
|
||||
# These are lexer rules. They match with the text being parsed
|
||||
# and perform actions, either emitting tokens or changing the
|
||||
@ -12,7 +11,7 @@ module Tartrazine
|
||||
# This rule matches via a regex pattern
|
||||
|
||||
class Rule
|
||||
property pattern : Regex = Re2.new ""
|
||||
property pattern : Regex = Regex.new ""
|
||||
property actions : Array(Action) = [] of Action
|
||||
property xml : String = "foo"
|
||||
|
||||
@ -34,12 +33,15 @@ module Tartrazine
|
||||
|
||||
def initialize(node : XML::Node, multiline, dotall, ignorecase)
|
||||
@xml = node.to_s
|
||||
@pattern = Re2.new(
|
||||
node["pattern"],
|
||||
multiline,
|
||||
dotall,
|
||||
ignorecase,
|
||||
anchored: true)
|
||||
pattern = node["pattern"]
|
||||
flags = Regex::Options::ANCHORED
|
||||
# MULTILINE implies DOTALL which we don't want, so we
|
||||
# use in-pattern flag (?m) instead
|
||||
# flags |= Regex::Options::MULTILINE if multiline
|
||||
pattern = "(?m)" + pattern if multiline
|
||||
flags |= Regex::Options::DOTALL if dotall
|
||||
flags |= Regex::Options::IGNORE_CASE if ignorecase
|
||||
@pattern = Regex.new(pattern, flags)
|
||||
add_actions(node)
|
||||
end
|
||||
|
||||
@ -91,25 +93,4 @@ module Tartrazine
|
||||
add_actions(node)
|
||||
end
|
||||
end
|
||||
|
||||
# This is a hack to workaround that Crystal seems to disallow
|
||||
# having regexes multiline but not dot_all
|
||||
class Re2 < Regex
|
||||
@source = "fa"
|
||||
@options = Regex::Options::None
|
||||
@jit = true
|
||||
|
||||
def initialize(pattern : String, multiline = false, dotall = false, ignorecase = false, anchored = false)
|
||||
flags = LibPCRE2::UTF | LibPCRE2::DUPNAMES |
|
||||
LibPCRE2::UCP
|
||||
flags |= LibPCRE2::MULTILINE if multiline
|
||||
flags |= LibPCRE2::DOTALL if dotall
|
||||
flags |= LibPCRE2::CASELESS if ignorecase
|
||||
flags |= LibPCRE2::ANCHORED if anchored
|
||||
flags |= LibPCRE2::NO_UTF_CHECK
|
||||
@re = Regex::PCRE2.compile(pattern, flags) do |error_message|
|
||||
raise Exception.new(error_message)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
@ -1,5 +1,4 @@
|
||||
require "./actions"
|
||||
require "./constants"
|
||||
require "./formatter"
|
||||
require "./rules"
|
||||
require "./styles"
|
||||
@ -10,16 +9,36 @@ require "xml"
|
||||
module Tartrazine
|
||||
alias Color = Sixteen::Color
|
||||
|
||||
def self.theme(name : String) : Theme
|
||||
return Theme.from_base16(name[7..]) if name.starts_with? "base16_"
|
||||
Theme.from_xml(ThemeFiles.get("/#{name}.xml").gets_to_end)
|
||||
end
|
||||
|
||||
class ThemeFiles
|
||||
extend BakedFileSystem
|
||||
bake_folder "../styles", __DIR__
|
||||
end
|
||||
|
||||
def self.theme(name : String) : Theme
|
||||
begin
|
||||
return Theme.from_base16(name)
|
||||
rescue ex : Exception
|
||||
raise ex unless ex.message.try &.includes? "Theme not found"
|
||||
end
|
||||
begin
|
||||
Theme.from_xml(ThemeFiles.get("/#{name}.xml").gets_to_end)
|
||||
rescue
|
||||
raise Exception.new("Theme #{name} not found")
|
||||
end
|
||||
end
|
||||
|
||||
# Return a list of all themes
|
||||
def self.themes
|
||||
themes = Set(String).new
|
||||
ThemeFiles.files.each do |file|
|
||||
themes << file.path.split("/").last.split(".").first
|
||||
end
|
||||
Sixteen::DataFiles.files.each do |file|
|
||||
themes << file.path.split("/").last.split(".").first
|
||||
end
|
||||
themes.to_a.sort!
|
||||
end
|
||||
|
||||
class Style
|
||||
# These properties are tri-state.
|
||||
# true means it's set
|
||||
@ -103,7 +122,8 @@ module Tartrazine
|
||||
# The color assignments are adapted from
|
||||
# https://github.com/mohd-akram/base16-pygments/
|
||||
|
||||
theme.styles["Background"] = Style.new(color: t["base05"], background: t["base00"])
|
||||
theme.styles["Background"] = Style.new(color: t["base05"], background: t["base00"], bold: true)
|
||||
theme.styles["LineHighlight"] = Style.new(color: t["base0D"], background: t["base01"])
|
||||
theme.styles["Text"] = Style.new(color: t["base05"])
|
||||
theme.styles["Error"] = Style.new(color: t["base08"])
|
||||
theme.styles["Comment"] = Style.new(color: t["base03"])
|
||||
@ -162,7 +182,26 @@ module Tartrazine
|
||||
|
||||
theme.styles[node["type"]] = s
|
||||
end
|
||||
# We really want a LineHighlight class
|
||||
if !theme.styles.has_key?("LineHighlight")
|
||||
theme.styles["LineHighlight"] = Style.new
|
||||
theme.styles["LineHighlight"].background = make_highlight_color(theme.styles["Background"].background)
|
||||
theme.styles["LineHighlight"].bold = true
|
||||
end
|
||||
theme
|
||||
end
|
||||
|
||||
# If the color is dark, make it brighter and viceversa
|
||||
def self.make_highlight_color(base_color)
|
||||
if base_color.nil?
|
||||
# WHo knows
|
||||
return Color.new(127, 127, 127)
|
||||
end
|
||||
if base_color.dark?
|
||||
base_color.lighter(0.2)
|
||||
else
|
||||
base_color.darker(0.2)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
@ -1,5 +1,4 @@
|
||||
require "./actions"
|
||||
require "./constants"
|
||||
require "./formatter"
|
||||
require "./rules"
|
||||
require "./styles"
|
||||
@ -12,189 +11,9 @@ require "xml"
|
||||
|
||||
module Tartrazine
|
||||
extend self
|
||||
VERSION = "0.1.1"
|
||||
VERSION = {{ `shards version #{__DIR__}`.chomp.stringify }}
|
||||
|
||||
Log = ::Log.for("tartrazine")
|
||||
|
||||
# This implements a lexer for Pygments RegexLexers as expressed
|
||||
# in Chroma's XML serialization.
|
||||
#
|
||||
# For explanations on what actions and states do
|
||||
# the Pygments documentation is a good place to start.
|
||||
# https://pygments.org/docs/lexerdevelopment/
|
||||
|
||||
# A Lexer state. A state has a name and a list of rules.
|
||||
# The state machine has a state stack containing references
|
||||
# to states to decide which rules to apply.
|
||||
class State
|
||||
property name : String = ""
|
||||
property rules = [] of Rule
|
||||
|
||||
def +(other : State)
|
||||
new_state = State.new
|
||||
new_state.name = Random.base58(8)
|
||||
new_state.rules = rules + other.rules
|
||||
new_state
|
||||
end
|
||||
end
|
||||
|
||||
class LexerFiles
|
||||
extend BakedFileSystem
|
||||
|
||||
bake_folder "../lexers", __DIR__
|
||||
end
|
||||
|
||||
# A token, the output of the tokenizer
|
||||
alias Token = NamedTuple(type: String, value: String)
|
||||
|
||||
class Lexer
|
||||
property config = {
|
||||
name: "",
|
||||
aliases: [] of String,
|
||||
filenames: [] of String,
|
||||
mime_types: [] of String,
|
||||
priority: 0.0,
|
||||
case_insensitive: false,
|
||||
dot_all: false,
|
||||
not_multiline: false,
|
||||
ensure_nl: false,
|
||||
}
|
||||
property xml : String = ""
|
||||
|
||||
property states = {} of String => State
|
||||
|
||||
property state_stack = ["root"]
|
||||
|
||||
# Turn the text into a list of tokens. The `usingself` parameter
|
||||
# is true when the lexer is being used to tokenize a string
|
||||
# from a larger text that is already being tokenized.
|
||||
# So, when it's true, we don't modify the text.
|
||||
def tokenize(text, usingself = false) : Array(Token)
|
||||
@state_stack = ["root"]
|
||||
tokens = [] of Token
|
||||
pos = 0
|
||||
matched = false
|
||||
|
||||
# Respect the `ensure_nl` config option
|
||||
if text.size > 0 && text[-1] != '\n' && config[:ensure_nl] && !usingself
|
||||
text += "\n"
|
||||
end
|
||||
|
||||
# Loop through the text, applying rules
|
||||
while pos < text.size
|
||||
state = states[@state_stack.last]
|
||||
# Log.trace { "Stack is #{@state_stack} State is #{state.name}, pos is #{pos}, text is #{text[pos..pos + 10]}" }
|
||||
state.rules.each do |rule|
|
||||
matched, new_pos, new_tokens = rule.match(text, pos, self)
|
||||
if matched
|
||||
# Move position forward, save the tokens,
|
||||
# tokenize from the new position
|
||||
# Log.trace { "MATCHED: #{rule.xml}" }
|
||||
pos = new_pos
|
||||
tokens += new_tokens
|
||||
break
|
||||
end
|
||||
# Log.trace { "NOT MATCHED: #{rule.xml}" }
|
||||
end
|
||||
# If no rule matches, emit an error token
|
||||
unless matched
|
||||
# Log.trace { "Error at #{pos}" }
|
||||
tokens << {type: "Error", value: "#{text[pos]}"}
|
||||
pos += 1
|
||||
end
|
||||
end
|
||||
Lexer.collapse_tokens(tokens)
|
||||
end
|
||||
|
||||
# Collapse consecutive tokens of the same type for easier comparison
|
||||
# and smaller output
|
||||
def self.collapse_tokens(tokens : Array(Tartrazine::Token)) : Array(Tartrazine::Token)
|
||||
result = [] of Tartrazine::Token
|
||||
tokens = tokens.reject { |token| token[:value] == "" }
|
||||
tokens.each do |token|
|
||||
if result.empty?
|
||||
result << token
|
||||
next
|
||||
end
|
||||
last = result.last
|
||||
if last[:type] == token[:type]
|
||||
new_token = {type: last[:type], value: last[:value] + token[:value]}
|
||||
result.pop
|
||||
result << new_token
|
||||
else
|
||||
result << token
|
||||
end
|
||||
end
|
||||
result
|
||||
end
|
||||
|
||||
# ameba:disable Metrics/CyclomaticComplexity
|
||||
def self.from_xml(xml : String) : Lexer
|
||||
l = Lexer.new
|
||||
l.xml = xml
|
||||
lexer = XML.parse(xml).first_element_child
|
||||
if lexer
|
||||
config = lexer.children.find { |node|
|
||||
node.name == "config"
|
||||
}
|
||||
if config
|
||||
l.config = {
|
||||
name: xml_to_s(config, name) || "",
|
||||
aliases: xml_to_a(config, _alias) || [] of String,
|
||||
filenames: xml_to_a(config, filename) || [] of String,
|
||||
mime_types: xml_to_a(config, mime_type) || [] of String,
|
||||
priority: xml_to_f(config, priority) || 0.0,
|
||||
not_multiline: xml_to_s(config, not_multiline) == "true",
|
||||
dot_all: xml_to_s(config, dot_all) == "true",
|
||||
case_insensitive: xml_to_s(config, case_insensitive) == "true",
|
||||
ensure_nl: xml_to_s(config, ensure_nl) == "true",
|
||||
}
|
||||
end
|
||||
|
||||
rules = lexer.children.find { |node|
|
||||
node.name == "rules"
|
||||
}
|
||||
if rules
|
||||
# Rules contains states 🤷
|
||||
rules.children.select { |node|
|
||||
node.name == "state"
|
||||
}.each do |state_node|
|
||||
state = State.new
|
||||
state.name = state_node["name"]
|
||||
if l.states.has_key?(state.name)
|
||||
raise Exception.new("Duplicate state: #{state.name}")
|
||||
else
|
||||
l.states[state.name] = state
|
||||
end
|
||||
# And states contain rules 🤷
|
||||
state_node.children.select { |node|
|
||||
node.name == "rule"
|
||||
}.each do |rule_node|
|
||||
case rule_node["pattern"]?
|
||||
when nil
|
||||
if rule_node.first_element_child.try &.name == "include"
|
||||
rule = IncludeStateRule.new(rule_node)
|
||||
else
|
||||
rule = UnconditionalRule.new(rule_node)
|
||||
end
|
||||
else
|
||||
rule = Rule.new(rule_node,
|
||||
multiline: !l.config[:not_multiline],
|
||||
dotall: l.config[:dot_all],
|
||||
ignorecase: l.config[:case_insensitive])
|
||||
end
|
||||
state.rules << rule
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
l
|
||||
end
|
||||
end
|
||||
|
||||
def self.lexer(name : String) : Lexer
|
||||
Lexer.from_xml(LexerFiles.get("/#{name}.xml").gets_to_end)
|
||||
end
|
||||
end
|
||||
|
||||
# Convenience macros to parse XML
|
||||
|
@ -1,74 +0,0 @@
|
||||
<style name="base16-snazzy">
|
||||
<entry type="Other" style="#e2e4e5"/>
|
||||
<entry type="Error" style="#ff5c57"/>
|
||||
<entry type="Background" style="bg:#282a36"/>
|
||||
<entry type="Keyword" style="#ff6ac1"/>
|
||||
<entry type="KeywordConstant" style="#ff6ac1"/>
|
||||
<entry type="KeywordDeclaration" style="#ff5c57"/>
|
||||
<entry type="KeywordNamespace" style="#ff6ac1"/>
|
||||
<entry type="KeywordPseudo" style="#ff6ac1"/>
|
||||
<entry type="KeywordReserved" style="#ff6ac1"/>
|
||||
<entry type="KeywordType" style="#9aedfe"/>
|
||||
<entry type="Name" style="#e2e4e5"/>
|
||||
<entry type="NameAttribute" style="#57c7ff"/>
|
||||
<entry type="NameBuiltin" style="#ff5c57"/>
|
||||
<entry type="NameBuiltinPseudo" style="#e2e4e5"/>
|
||||
<entry type="NameClass" style="#f3f99d"/>
|
||||
<entry type="NameConstant" style="#ff9f43"/>
|
||||
<entry type="NameDecorator" style="#ff9f43"/>
|
||||
<entry type="NameEntity" style="#e2e4e5"/>
|
||||
<entry type="NameException" style="#e2e4e5"/>
|
||||
<entry type="NameFunction" style="#57c7ff"/>
|
||||
<entry type="NameLabel" style="#ff5c57"/>
|
||||
<entry type="NameNamespace" style="#e2e4e5"/>
|
||||
<entry type="NameOther" style="#e2e4e5"/>
|
||||
<entry type="NameTag" style="#ff6ac1"/>
|
||||
<entry type="NameVariable" style="#ff5c57"/>
|
||||
<entry type="NameVariableClass" style="#ff5c57"/>
|
||||
<entry type="NameVariableGlobal" style="#ff5c57"/>
|
||||
<entry type="NameVariableInstance" style="#ff5c57"/>
|
||||
<entry type="Literal" style="#e2e4e5"/>
|
||||
<entry type="LiteralDate" style="#e2e4e5"/>
|
||||
<entry type="LiteralString" style="#5af78e"/>
|
||||
<entry type="LiteralStringBacktick" style="#5af78e"/>
|
||||
<entry type="LiteralStringChar" style="#5af78e"/>
|
||||
<entry type="LiteralStringDoc" style="#5af78e"/>
|
||||
<entry type="LiteralStringDouble" style="#5af78e"/>
|
||||
<entry type="LiteralStringEscape" style="#5af78e"/>
|
||||
<entry type="LiteralStringHeredoc" style="#5af78e"/>
|
||||
<entry type="LiteralStringInterpol" style="#5af78e"/>
|
||||
<entry type="LiteralStringOther" style="#5af78e"/>
|
||||
<entry type="LiteralStringRegex" style="#5af78e"/>
|
||||
<entry type="LiteralStringSingle" style="#5af78e"/>
|
||||
<entry type="LiteralStringSymbol" style="#5af78e"/>
|
||||
<entry type="LiteralNumber" style="#ff9f43"/>
|
||||
<entry type="LiteralNumberBin" style="#ff9f43"/>
|
||||
<entry type="LiteralNumberFloat" style="#ff9f43"/>
|
||||
<entry type="LiteralNumberHex" style="#ff9f43"/>
|
||||
<entry type="LiteralNumberInteger" style="#ff9f43"/>
|
||||
<entry type="LiteralNumberIntegerLong" style="#ff9f43"/>
|
||||
<entry type="LiteralNumberOct" style="#ff9f43"/>
|
||||
<entry type="Operator" style="#ff6ac1"/>
|
||||
<entry type="OperatorWord" style="#ff6ac1"/>
|
||||
<entry type="Punctuation" style="#e2e4e5"/>
|
||||
<entry type="Comment" style="#78787e"/>
|
||||
<entry type="CommentHashbang" style="#78787e"/>
|
||||
<entry type="CommentMultiline" style="#78787e"/>
|
||||
<entry type="CommentSingle" style="#78787e"/>
|
||||
<entry type="CommentSpecial" style="#78787e"/>
|
||||
<entry type="CommentPreproc" style="#78787e"/>
|
||||
<entry type="Generic" style="#e2e4e5"/>
|
||||
<entry type="GenericDeleted" style="#ff5c57"/>
|
||||
<entry type="GenericEmph" style="underline #e2e4e5"/>
|
||||
<entry type="GenericError" style="#ff5c57"/>
|
||||
<entry type="GenericHeading" style="bold #e2e4e5"/>
|
||||
<entry type="GenericInserted" style="bold #e2e4e5"/>
|
||||
<entry type="GenericOutput" style="#43454f"/>
|
||||
<entry type="GenericPrompt" style="#e2e4e5"/>
|
||||
<entry type="GenericStrong" style="italic #e2e4e5"/>
|
||||
<entry type="GenericSubheading" style="bold #e2e4e5"/>
|
||||
<entry type="GenericTraceback" style="#e2e4e5"/>
|
||||
<entry type="GenericUnderline" style="underline"/>
|
||||
<entry type="Text" style="#e2e4e5"/>
|
||||
<entry type="TextWhitespace" style="#e2e4e5"/>
|
||||
</style>
|
Reference in New Issue
Block a user