mirror of
https://github.com/ralsina/tartrazine.git
synced 2025-09-16 02:17:32 +00:00
Compare commits
3 Commits
v0.1.1
...
2a19f3889f
Author | SHA1 | Date | |
---|---|---|---|
2a19f3889f | |||
b9e51824df | |||
ff1c0012ec |
1
.gitignore
vendored
1
.gitignore
vendored
@@ -6,4 +6,3 @@
|
|||||||
chroma/
|
chroma/
|
||||||
pygments/
|
pygments/
|
||||||
shard.lock
|
shard.lock
|
||||||
.vscode/
|
|
||||||
|
@@ -1,15 +0,0 @@
|
|||||||
FROM --platform=${TARGETPLATFORM:-linux/amd64} alpine:3.20 AS build
|
|
||||||
RUN apk add --no-cache \
|
|
||||||
crystal \
|
|
||||||
shards \
|
|
||||||
yaml-dev \
|
|
||||||
yaml-static \
|
|
||||||
openssl-dev \
|
|
||||||
openssl-libs-static \
|
|
||||||
libxml2-dev \
|
|
||||||
libxml2-static \
|
|
||||||
zlib-dev \
|
|
||||||
zlib-static \
|
|
||||||
xz-dev \
|
|
||||||
xz-static \
|
|
||||||
make
|
|
7
Makefile
7
Makefile
@@ -1,7 +0,0 @@
|
|||||||
build: $(wildcard src/**/*.cr) $(wildcard lexers/*xml) $(wildcard styles/*xml) shard.yml
|
|
||||||
shards build -Dstrict_multi_assign -Dno_number_autocast
|
|
||||||
release: $(wildcard src/**/*.cr) $(wildcard lexers/*xml) $(wildcard styles/*xml) shard.yml
|
|
||||||
shards build --release
|
|
||||||
static: $(wildcard src/**/*.cr) $(wildcard lexers/*xml) $(wildcard styles/*xml) shard.yml
|
|
||||||
shards build --release --static
|
|
||||||
strip bin/tartrazine
|
|
@@ -1,16 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
set -e
|
|
||||||
|
|
||||||
docker run --rm --privileged \
|
|
||||||
multiarch/qemu-user-static \
|
|
||||||
--reset -p yes
|
|
||||||
|
|
||||||
# Build for AMD64
|
|
||||||
docker build . -f Dockerfile.static -t tartrazine-builder
|
|
||||||
docker run -ti --rm -v "$PWD":/app --user="$UID" tartrazine-builder /bin/sh -c "cd /app && rm -rf lib shard.lock && make static"
|
|
||||||
mv bin/tartrazine bin/tartrazine-static-linux-amd64
|
|
||||||
|
|
||||||
# Build for ARM64
|
|
||||||
docker build . -f Dockerfile.static --platform linux/arm64 -t tartrazine-builder
|
|
||||||
docker run -ti --rm -v "$PWD":/app --platform linux/arm64 --user="$UID" tartrazine-builder /bin/sh -c "cd /app && rm -rf lib shard.lock && make static"
|
|
||||||
mv bin/tartrazine bin/tartrazine-static-linux-arm64
|
|
@@ -1,5 +1,5 @@
|
|||||||
name: tartrazine
|
name: tartrazine
|
||||||
version: 0.1.1
|
version: 0.1.0
|
||||||
|
|
||||||
authors:
|
authors:
|
||||||
- Roberto Alsina <roberto.alsina@gmail.com>
|
- Roberto Alsina <roberto.alsina@gmail.com>
|
||||||
@@ -9,12 +9,11 @@ targets:
|
|||||||
main: src/main.cr
|
main: src/main.cr
|
||||||
|
|
||||||
dependencies:
|
dependencies:
|
||||||
baked_file_system:
|
|
||||||
github: schovi/baked_file_system
|
|
||||||
base58:
|
base58:
|
||||||
github: crystal-china/base58.cr
|
github: crystal-china/base58.cr
|
||||||
sixteen:
|
sixteen:
|
||||||
github: ralsina/sixteen
|
github: ralsina/sixteen
|
||||||
|
branch: main
|
||||||
|
|
||||||
crystal: ">= 1.13.0"
|
crystal: ">= 1.13.0"
|
||||||
|
|
||||||
|
@@ -1,9 +1,4 @@
|
|||||||
require "./actions"
|
require "xml"
|
||||||
require "./constants"
|
|
||||||
require "./formatter"
|
|
||||||
require "./rules"
|
|
||||||
require "./styles"
|
|
||||||
require "./tartrazine"
|
|
||||||
|
|
||||||
# These are Lexer actions. When a rule matches, it will
|
# These are Lexer actions. When a rule matches, it will
|
||||||
# perform a list of actions. These actions can emit tokens
|
# perform a list of actions. These actions can emit tokens
|
||||||
@@ -31,11 +26,11 @@ module Tartrazine
|
|||||||
end
|
end
|
||||||
|
|
||||||
# ameba:disable Metrics/CyclomaticComplexity
|
# ameba:disable Metrics/CyclomaticComplexity
|
||||||
def emit(match : Regex::MatchData?, lexer : Lexer, match_group = 0) : Array(Token)
|
def emit(matches : Pointer(LibCre2::StringPiece), lexer : Lexer, match_group = 0) : Array(Token)
|
||||||
case type
|
case type
|
||||||
when "token"
|
when "token"
|
||||||
raise Exception.new "Can't have a token without a match" if match.nil?
|
raise Exception.new "Can't have a token without a match" if matches[0].length == 0
|
||||||
[Token.new(type: xml["type"], value: match[match_group])]
|
[Token.new(type: xml["type"], value: String.new(Slice.new(matches[0].data, matches[0].length)))]
|
||||||
when "push"
|
when "push"
|
||||||
states_to_push = xml.attributes.select { |attrib|
|
states_to_push = xml.attributes.select { |attrib|
|
||||||
attrib.name == "state"
|
attrib.name == "state"
|
||||||
@@ -68,35 +63,37 @@ module Tartrazine
|
|||||||
when "bygroups"
|
when "bygroups"
|
||||||
# FIXME: handle
|
# FIXME: handle
|
||||||
# ><bygroups>
|
# ><bygroups>
|
||||||
# <token type="Punctuation"/>
|
# <token type="Punctuation"/>https://github.com/google/re2/wiki/Syntax
|
||||||
# None
|
# None
|
||||||
# <token type="LiteralStringRegex"/>
|
# <token type="LiteralStringRegex"/>
|
||||||
#
|
#
|
||||||
# where that None means skipping a group
|
# where that None means skipping a group
|
||||||
#
|
#
|
||||||
raise Exception.new "Can't have a token without a match" if match.nil?
|
raise Exception.new "Can't have a bygroups without a match" if matches[0].length == 0
|
||||||
|
|
||||||
# Each group matches an action. If the group match is empty,
|
# Each group matches an action. If the group match is empty,
|
||||||
# the action is skipped.
|
# the action is skipped.
|
||||||
result = [] of Token
|
result = [] of Token
|
||||||
@actions.each_with_index do |e, i|
|
@actions.each_with_index do |e, i|
|
||||||
next if match[i + 1]?.nil?
|
next if matches[i].length == 0
|
||||||
result += e.emit(match, lexer, i + 1)
|
result += e.emit(matches, lexer, i)
|
||||||
end
|
end
|
||||||
result
|
result
|
||||||
when "using"
|
when "using"
|
||||||
# Shunt to another lexer entirely
|
# Shunt to another lexer entirely
|
||||||
return [] of Token if match.nil?
|
return [] of Token if matches[0].length == 0
|
||||||
lexer_name = xml["lexer"].downcase
|
lexer_name = xml["lexer"].downcase
|
||||||
Log.trace { "to tokenize: #{match[match_group]}" }
|
# Log.trace { "to tokenize: #{match[match_group]}" }
|
||||||
Tartrazine.lexer(lexer_name).tokenize(match[match_group], usingself: true)
|
to_tokenize = String.new(Slice.new(matches[match_group].data, matches[match_group].length))
|
||||||
|
Tartrazine.lexer(lexer_name).tokenize(to_tokenize, usingself: true)
|
||||||
when "usingself"
|
when "usingself"
|
||||||
# Shunt to another copy of this lexer
|
# Shunt to another copy of this lexer
|
||||||
return [] of Token if match.nil?
|
return [] of Token if matches[0].length == 0
|
||||||
|
|
||||||
new_lexer = Lexer.from_xml(lexer.xml)
|
new_lexer = Lexer.from_xml(lexer.xml)
|
||||||
Log.trace { "to tokenize: #{match[match_group]}" }
|
# Log.trace { "to tokenize: #{match[match_group]}" }
|
||||||
new_lexer.tokenize(match[match_group], usingself: true)
|
to_tokenize = String.new(Slice.new(matches[match_group].data, matches[match_group].length))
|
||||||
|
new_lexer.tokenize(to_tokenize, usingself: true)
|
||||||
when "combined"
|
when "combined"
|
||||||
# Combine two states into one anonymous state
|
# Combine two states into one anonymous state
|
||||||
states = xml.attributes.select { |attrib|
|
states = xml.attributes.select { |attrib|
|
||||||
|
5
src/cre2/Makefile
Normal file
5
src/cre2/Makefile
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
all: cre2.o
|
||||||
|
clean:
|
||||||
|
rm -f cre2.o
|
||||||
|
cre2.o: cre2.cpp cre2.h
|
||||||
|
g++ -O3 -c -o cre2.o cre2.cpp
|
122
src/cre2/cre2.cpp
Normal file
122
src/cre2/cre2.cpp
Normal file
@@ -0,0 +1,122 @@
|
|||||||
|
#include <re2/re2.h>
|
||||||
|
#include "cre2.h"
|
||||||
|
|
||||||
|
#define TO_OPT(opt) (reinterpret_cast<RE2::Options *>(opt))
|
||||||
|
|
||||||
|
cre2_options *cre2_opt_new(void) {
|
||||||
|
return reinterpret_cast<void*>(new RE2::Options());
|
||||||
|
}
|
||||||
|
|
||||||
|
void cre2_opt_delete(cre2_options *opt) {
|
||||||
|
delete TO_OPT(opt);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#define OPT_bool(name) \
|
||||||
|
void cre2_opt_##name(cre2_options *opt, int flag) { \
|
||||||
|
TO_OPT(opt)->set_##name(bool(flag)); \
|
||||||
|
}
|
||||||
|
|
||||||
|
OPT_bool(posix_syntax)
|
||||||
|
OPT_bool(longest_match)
|
||||||
|
OPT_bool(log_errors)
|
||||||
|
OPT_bool(literal)
|
||||||
|
OPT_bool(never_nl)
|
||||||
|
OPT_bool(dot_nl)
|
||||||
|
OPT_bool(case_sensitive)
|
||||||
|
OPT_bool(perl_classes)
|
||||||
|
OPT_bool(word_boundary)
|
||||||
|
OPT_bool(one_line)
|
||||||
|
|
||||||
|
#undef OPT_BOOL
|
||||||
|
|
||||||
|
|
||||||
|
void cre2_opt_encoding(cre2_options *opt, encoding_t enc) {
|
||||||
|
switch (enc) {
|
||||||
|
case CRE2_UTF8:
|
||||||
|
TO_OPT(opt)->set_encoding(RE2::Options::EncodingUTF8);
|
||||||
|
break;
|
||||||
|
case CRE2_Latin1:
|
||||||
|
TO_OPT(opt)->set_encoding(RE2::Options::EncodingLatin1);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void cre2_opt_max_mem(cre2_options *opt, int m) {
|
||||||
|
TO_OPT(opt)->set_max_mem(m);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#define TO_RE2(re) (reinterpret_cast<RE2 *>(re))
|
||||||
|
#define TO_CONST_RE2(re) (reinterpret_cast<const RE2 *>(re))
|
||||||
|
|
||||||
|
cre2 *cre2_new(const char *pattern, int patternlen, const cre2_options *opt) {
|
||||||
|
re2::StringPiece pattern_re2(pattern, patternlen);
|
||||||
|
return reinterpret_cast<void*>(
|
||||||
|
new RE2(pattern_re2, *reinterpret_cast<const RE2::Options *>(opt)));
|
||||||
|
}
|
||||||
|
|
||||||
|
void cre2_delete(cre2 *re) {
|
||||||
|
delete TO_RE2(re);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int cre2_error_code(const cre2 *re) {
|
||||||
|
return int(TO_CONST_RE2(re)->error_code());
|
||||||
|
}
|
||||||
|
|
||||||
|
const char *cre2_error_string(const cre2 *re) {
|
||||||
|
return TO_CONST_RE2(re)->error().c_str();
|
||||||
|
}
|
||||||
|
|
||||||
|
void cre2_error_arg(const cre2 *re, struct string_piece *arg) {
|
||||||
|
const std::string &argstr = TO_CONST_RE2(re)->error_arg();
|
||||||
|
arg->data = argstr.data();
|
||||||
|
arg->length = argstr.length();
|
||||||
|
}
|
||||||
|
|
||||||
|
int cre2_num_capturing_groups(const cre2 *re) {
|
||||||
|
return TO_CONST_RE2(re)->NumberOfCapturingGroups();
|
||||||
|
}
|
||||||
|
|
||||||
|
int cre2_program_size(const cre2 *re) {
|
||||||
|
return TO_CONST_RE2(re)->ProgramSize();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int cre2_match(
|
||||||
|
const cre2 *re
|
||||||
|
, const char *text
|
||||||
|
, int textlen
|
||||||
|
, int startpos
|
||||||
|
, int endpos
|
||||||
|
, anchor_t anchor
|
||||||
|
, struct string_piece *match
|
||||||
|
, int nmatch) {
|
||||||
|
|
||||||
|
re2::StringPiece text_re2(text, textlen);
|
||||||
|
// FIXME: exceptions?
|
||||||
|
re2::StringPiece *match_re2 = new re2::StringPiece[nmatch];
|
||||||
|
|
||||||
|
RE2::Anchor anchor_re2 = RE2::UNANCHORED;
|
||||||
|
switch (anchor) {
|
||||||
|
case CRE2_ANCHOR_START:
|
||||||
|
anchor_re2 = RE2::ANCHOR_START; break;
|
||||||
|
case CRE2_ANCHOR_BOTH:
|
||||||
|
anchor_re2 = RE2::ANCHOR_BOTH; break;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ret = TO_CONST_RE2(re)
|
||||||
|
->Match(text_re2, startpos, endpos, anchor_re2, match_re2, nmatch);
|
||||||
|
|
||||||
|
if (ret) {
|
||||||
|
for (int i=0; i<nmatch; i++) {
|
||||||
|
match[i].data = match_re2[i].data();
|
||||||
|
match[i].length = match_re2[i].length();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
delete [] match_re2;
|
||||||
|
|
||||||
|
return int(ret);
|
||||||
|
}
|
70
src/cre2/cre2.cr
Normal file
70
src/cre2/cre2.cr
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
@[Link(ldflags: "#{__DIR__}/cre2.o -Wl,--copy-dt-needed-entries `pkg-config --libs re2`")]
|
||||||
|
lib LibCre2
|
||||||
|
type Options = Void*
|
||||||
|
|
||||||
|
fun opt_new = cre2_opt_new : Options
|
||||||
|
fun opt_delete = cre2_opt_delete(op : Options) : Nil
|
||||||
|
|
||||||
|
fun opt_posix_syntax = cre2_opt_posix_syntax(op : Options, flag : Bool) : Nil
|
||||||
|
fun opt_longest_match = cre2_opt_longest_match(op : Options, flag : Bool) : Nil
|
||||||
|
fun opt_log_errors = cre2_opt_log_errors(op : Options, flag : Bool) : Nil
|
||||||
|
fun opt_literal = cre2_opt_literal(op : Options, flag : Bool) : Nil
|
||||||
|
fun opt_never_nl = cre2_opt_never_nl(op : Options, flag : Bool) : Nil
|
||||||
|
fun opt_case_sensitive = cre2_opt_case_sensitive(op : Options, flag : Bool) : Nil
|
||||||
|
fun opt_perl_classes = cre2_opt_perl_classes(op : Options, flag : Bool) : Nil
|
||||||
|
fun opt_word_boundary = cre2_opt_word_boundary(op : Options, flag : Bool) : Nil
|
||||||
|
fun opt_one_line = cre2_opt_one_line(op : Options, flag : Bool) : Nil
|
||||||
|
fun opt_dot_nl = cre2_opt_dot_nl(op : Options, flag : Bool) : Nil
|
||||||
|
fun opt_encoding = cre2_opt_encoding(op : Options, encoding : Int32) : Nil
|
||||||
|
fun opt_max_mem = cre2_opt_max_mem(op : Options, flag : Bool) : Nil
|
||||||
|
|
||||||
|
struct StringPiece
|
||||||
|
data : LibC::Char*
|
||||||
|
length : Int32
|
||||||
|
end
|
||||||
|
|
||||||
|
type CRe2 = Void*
|
||||||
|
|
||||||
|
fun new = cre2_new(pattern : LibC::Char*, patternlen : UInt32, opt : Options) : CRe2
|
||||||
|
fun del = cre2_delete(re : CRe2) : Nil
|
||||||
|
fun error_code = cre2_error_core(re : CRe2) : Int32
|
||||||
|
fun num_capturing_groups(re : CRe2) : Int32
|
||||||
|
fun program_size(re : CRe2) : Int32
|
||||||
|
|
||||||
|
# Invalidated by further re use
|
||||||
|
fun error_string = cre2_error_string(re : CRe2) : LibC::Char*
|
||||||
|
fun error_arg = cre2_error_arg(re : CRe2, arg : StringPiece*) : Nil
|
||||||
|
|
||||||
|
CRE2_UNANCHORED = 1
|
||||||
|
CRE2_ANCHOR_START = 2
|
||||||
|
CRE2_ANCHOR_BOTH = 3
|
||||||
|
|
||||||
|
fun match = cre2_match(
|
||||||
|
re : CRe2,
|
||||||
|
text : LibC::Char*,
|
||||||
|
textlen : UInt32,
|
||||||
|
startpos : UInt32,
|
||||||
|
endpos : UInt32,
|
||||||
|
anchor : Int32,
|
||||||
|
match : StringPiece*,
|
||||||
|
nmatch : Int32
|
||||||
|
) : Int32
|
||||||
|
end
|
||||||
|
|
||||||
|
# match = Pointer(LibCre2::StringPiece).malloc(10)
|
||||||
|
# opts = LibCre2.opt_new
|
||||||
|
# LibCre2.opt_posix_syntax(opts, true)
|
||||||
|
# LibCre2.opt_longest_match(opts, true)
|
||||||
|
# LibCre2.opt_perl_classes(opts, true)
|
||||||
|
# LibCre2.opt_encoding(opts, 1)
|
||||||
|
# # LibCre2.opt_one_line(opts, false)
|
||||||
|
# # LibCre2.opt_never_nl(opts, false)
|
||||||
|
|
||||||
|
# pattern = "(\\s+)(foo)"
|
||||||
|
# text = " foo"
|
||||||
|
# re = LibCre2.new(pattern, pattern.size, opts)
|
||||||
|
# p! LibCre2.match(re, text, text.size, 0, text.size,
|
||||||
|
# LibCre2::CRE2_ANCHOR_START, match, 10)
|
||||||
|
# (0...10).each do |i|
|
||||||
|
# p! String.new(Slice.new(match[i].data, match[i].length))
|
||||||
|
# end
|
67
src/cre2/cre2.h
Normal file
67
src/cre2/cre2.h
Normal file
@@ -0,0 +1,67 @@
|
|||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
typedef void cre2_options;
|
||||||
|
|
||||||
|
typedef int encoding_t;
|
||||||
|
#define CRE2_UTF8 1
|
||||||
|
#define CRE2_Latin1 2
|
||||||
|
|
||||||
|
cre2_options *cre2_opt_new(void);
|
||||||
|
void cre2_opt_delete(cre2_options *opt);
|
||||||
|
|
||||||
|
void cre2_opt_posix_syntax(cre2_options *opt, int flag);
|
||||||
|
void cre2_opt_longest_match(cre2_options *opt, int flag);
|
||||||
|
void cre2_opt_log_errors(cre2_options *opt, int flag);
|
||||||
|
void cre2_opt_literal(cre2_options *opt, int flag);
|
||||||
|
void cre2_opt_never_nl(cre2_options *opt, int flag);
|
||||||
|
void cre2_opt_case_sensitive(cre2_options *opt, int flag);
|
||||||
|
void cre2_opt_perl_classes(cre2_options *opt, int flag);
|
||||||
|
void cre2_opt_word_boundary(cre2_options *opt, int flag);
|
||||||
|
void cre2_opt_one_line(cre2_options *opt, int flag);
|
||||||
|
void cre2_opt_dot_nl(cre2_options *opt, int flag);
|
||||||
|
void cre2_opt_encoding(cre2_options *opt, encoding_t enc);
|
||||||
|
void cre2_opt_max_mem(cre2_options *opt, int m);
|
||||||
|
|
||||||
|
|
||||||
|
struct string_piece {
|
||||||
|
const char *data;
|
||||||
|
int length;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
typedef void cre2;
|
||||||
|
|
||||||
|
cre2 *cre2_new(const char *pattern, int patternlen, const cre2_options *opt);
|
||||||
|
void cre2_delete(cre2 *re);
|
||||||
|
|
||||||
|
int cre2_error_code(const cre2 *re);
|
||||||
|
int cre2_num_capturing_groups(const cre2 *re);
|
||||||
|
int cre2_program_size(const cre2 *re);
|
||||||
|
|
||||||
|
// invalidated by further re use
|
||||||
|
const char *cre2_error_string(const cre2 *re);
|
||||||
|
void cre2_error_arg(const cre2 *re, struct string_piece *arg);
|
||||||
|
|
||||||
|
|
||||||
|
typedef int anchor_t;
|
||||||
|
#define CRE2_UNANCHORED 1
|
||||||
|
#define CRE2_ANCHOR_START 2
|
||||||
|
#define CRE2_ANCHOR_BOTH 3
|
||||||
|
|
||||||
|
int cre2_match(
|
||||||
|
const cre2 *re
|
||||||
|
, const char *text
|
||||||
|
, int textlen
|
||||||
|
, int startpos
|
||||||
|
, int endpos
|
||||||
|
, anchor_t anchor
|
||||||
|
, struct string_piece *match
|
||||||
|
, int nmatch);
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
} // extern "C"
|
||||||
|
#endif
|
BIN
src/cre2/cre2.o
Normal file
BIN
src/cre2/cre2.o
Normal file
Binary file not shown.
@@ -1,10 +1,6 @@
|
|||||||
require "./actions"
|
require "./constants.cr"
|
||||||
require "./constants"
|
require "./styles.cr"
|
||||||
require "./formatter"
|
require "./tartrazine.cr"
|
||||||
require "./rules"
|
|
||||||
require "./styles"
|
|
||||||
require "./tartrazine"
|
|
||||||
require "colorize"
|
|
||||||
|
|
||||||
module Tartrazine
|
module Tartrazine
|
||||||
# This is the base class for all formatters.
|
# This is the base class for all formatters.
|
||||||
@@ -15,43 +11,30 @@ module Tartrazine
|
|||||||
raise Exception.new("Not implemented")
|
raise Exception.new("Not implemented")
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# ameba:disable Metrics/CyclomaticComplexity
|
||||||
def get_style_defs(theme : Theme) : String
|
def get_style_defs(theme : Theme) : String
|
||||||
raise Exception.new("Not implemented")
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
class Ansi < Formatter
|
|
||||||
def format(text : String, lexer : Lexer, theme : Theme) : String
|
|
||||||
output = String.build do |outp|
|
output = String.build do |outp|
|
||||||
lexer.tokenize(text).each do |token|
|
theme.styles.each do |token, style|
|
||||||
outp << self.colorize(token[:value], token[:type], theme)
|
outp << ".#{get_css_class(token, theme)} {"
|
||||||
|
# These are set or nil
|
||||||
|
outp << "color: #{style.color};" if style.color
|
||||||
|
outp << "background-color: #{style.background};" if style.background
|
||||||
|
outp << "border: 1px solid #{style.border};" if style.border
|
||||||
|
|
||||||
|
# These are true/false/nil
|
||||||
|
outp << "border: none;" if style.border == false
|
||||||
|
outp << "font-weight: bold;" if style.bold
|
||||||
|
outp << "font-weight: 400;" if style.bold == false
|
||||||
|
outp << "font-style: italic;" if style.italic
|
||||||
|
outp << "font-style: normal;" if style.italic == false
|
||||||
|
outp << "text-decoration: underline;" if style.underline
|
||||||
|
outp << "text-decoration: none;" if style.underline == false
|
||||||
|
|
||||||
|
outp << "}"
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
output
|
output
|
||||||
end
|
end
|
||||||
|
|
||||||
def colorize(text : String, token : String, theme : Theme) : String
|
|
||||||
style = theme.styles.fetch(token, nil)
|
|
||||||
return text if style.nil?
|
|
||||||
if theme.styles.has_key?(token)
|
|
||||||
s = theme.styles[token]
|
|
||||||
else
|
|
||||||
# Themes don't contain information for each specific
|
|
||||||
# token type. However, they may contain information
|
|
||||||
# for a parent style. Worst case, we go to the root
|
|
||||||
# (Background) style.
|
|
||||||
s = theme.styles[theme.style_parents(token).reverse.find { |parent|
|
|
||||||
theme.styles.has_key?(parent)
|
|
||||||
}]
|
|
||||||
end
|
|
||||||
colorized = text.colorize
|
|
||||||
s.color.try { |c| colorized = colorized.fore(c.colorize) }
|
|
||||||
# Intentionally not setting background color
|
|
||||||
colorized.mode(:bold) if s.bold
|
|
||||||
colorized.mode(:italic) if s.italic
|
|
||||||
colorized.mode(:underline) if s.underline
|
|
||||||
colorized.to_s
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
|
||||||
class Html < Formatter
|
class Html < Formatter
|
||||||
@@ -70,31 +53,6 @@ module Tartrazine
|
|||||||
output
|
output
|
||||||
end
|
end
|
||||||
|
|
||||||
# ameba:disable Metrics/CyclomaticComplexity
|
|
||||||
def get_style_defs(theme : Theme) : String
|
|
||||||
output = String.build do |outp|
|
|
||||||
theme.styles.each do |token, style|
|
|
||||||
outp << ".#{get_css_class(token, theme)} {"
|
|
||||||
# These are set or nil
|
|
||||||
outp << "color: #{style.color.try &.hex};" if style.color
|
|
||||||
outp << "background-color: #{style.background.try &.hex};" if style.background
|
|
||||||
outp << "border: 1px solid #{style.border.try &.hex};" if style.border
|
|
||||||
|
|
||||||
# These are true/false/nil
|
|
||||||
outp << "border: none;" if style.border == false
|
|
||||||
outp << "font-weight: bold;" if style.bold
|
|
||||||
outp << "font-weight: 400;" if style.bold == false
|
|
||||||
outp << "font-style: italic;" if style.italic
|
|
||||||
outp << "font-style: normal;" if style.italic == false
|
|
||||||
outp << "text-decoration: underline;" if style.underline
|
|
||||||
outp << "text-decoration: none;" if style.underline == false
|
|
||||||
|
|
||||||
outp << "}"
|
|
||||||
end
|
|
||||||
end
|
|
||||||
output
|
|
||||||
end
|
|
||||||
|
|
||||||
# Given a token type, return the CSS class to use.
|
# Given a token type, return the CSS class to use.
|
||||||
def get_css_class(token, theme)
|
def get_css_class(token, theme)
|
||||||
return Abbreviations[token] if theme.styles.has_key?(token)
|
return Abbreviations[token] if theme.styles.has_key?(token)
|
||||||
|
0
src/re2.cr
Normal file
0
src/re2.cr
Normal file
64
src/rules.cr
64
src/rules.cr
@@ -1,9 +1,5 @@
|
|||||||
|
require "./cre2/cre2"
|
||||||
require "./actions"
|
require "./actions"
|
||||||
require "./constants"
|
|
||||||
require "./formatter"
|
|
||||||
require "./rules"
|
|
||||||
require "./styles"
|
|
||||||
require "./tartrazine"
|
|
||||||
|
|
||||||
# These are lexer rules. They match with the text being parsed
|
# These are lexer rules. They match with the text being parsed
|
||||||
# and perform actions, either emitting tokens or changing the
|
# and perform actions, either emitting tokens or changing the
|
||||||
@@ -12,29 +8,30 @@ module Tartrazine
|
|||||||
# This rule matches via a regex pattern
|
# This rule matches via a regex pattern
|
||||||
|
|
||||||
class Rule
|
class Rule
|
||||||
property pattern : Regex = Re2.new ""
|
property pattern : Re3 = Re3.new ""
|
||||||
property actions : Array(Action) = [] of Action
|
property actions : Array(Action) = [] of Action
|
||||||
property xml : String = "foo"
|
property xml : String = "foo"
|
||||||
|
|
||||||
def match(text, pos, lexer) : Tuple(Bool, Int32, Array(Token))
|
def match(text, pos, lexer) : Tuple(Bool, Int32, Array(Token))
|
||||||
match = pattern.match(text, pos)
|
matched, matches = pattern.match(text, pos)
|
||||||
# We don't match if the match doesn't move the cursor
|
# We don't match if the match doesn't move the cursor
|
||||||
# because that causes infinite loops
|
# because that causes infinite loops
|
||||||
return false, pos, [] of Token if match.nil? || match.end == 0
|
|
||||||
|
return false, pos, [] of Token unless matched
|
||||||
# Log.trace { "#{match}, #{pattern.inspect}, #{text}, #{pos}" }
|
# Log.trace { "#{match}, #{pattern.inspect}, #{text}, #{pos}" }
|
||||||
tokens = [] of Token
|
tokens = [] of Token
|
||||||
# Emit the tokens
|
# Emit the tokens
|
||||||
actions.each do |action|
|
actions.each do |action|
|
||||||
# Emit the token
|
# Emit the token
|
||||||
tokens += action.emit(match, lexer)
|
tokens += action.emit(matches, lexer)
|
||||||
end
|
end
|
||||||
Log.trace { "#{xml}, #{match.end}, #{tokens}" }
|
# Log.trace { "#{xml}, #{match.end}, #{tokens}" }
|
||||||
return true, match.end, tokens
|
return true, matches[0].length, tokens
|
||||||
end
|
end
|
||||||
|
|
||||||
def initialize(node : XML::Node, multiline, dotall, ignorecase)
|
def initialize(node : XML::Node, multiline, dotall, ignorecase)
|
||||||
@xml = node.to_s
|
@xml = node.to_s
|
||||||
@pattern = Re2.new(
|
@pattern = Re3.new(
|
||||||
node["pattern"],
|
node["pattern"],
|
||||||
multiline,
|
multiline,
|
||||||
dotall,
|
dotall,
|
||||||
@@ -81,7 +78,7 @@ module Tartrazine
|
|||||||
def match(text, pos, lexer) : Tuple(Bool, Int32, Array(Token))
|
def match(text, pos, lexer) : Tuple(Bool, Int32, Array(Token))
|
||||||
tokens = [] of Token
|
tokens = [] of Token
|
||||||
actions.each do |action|
|
actions.each do |action|
|
||||||
tokens += action.emit(nil, lexer)
|
tokens += action.emit(Pointer(LibCre2::StringPiece).malloc(1), lexer)
|
||||||
end
|
end
|
||||||
return true, pos, tokens
|
return true, pos, tokens
|
||||||
end
|
end
|
||||||
@@ -106,10 +103,49 @@ module Tartrazine
|
|||||||
flags |= LibPCRE2::DOTALL if dotall
|
flags |= LibPCRE2::DOTALL if dotall
|
||||||
flags |= LibPCRE2::CASELESS if ignorecase
|
flags |= LibPCRE2::CASELESS if ignorecase
|
||||||
flags |= LibPCRE2::ANCHORED if anchored
|
flags |= LibPCRE2::ANCHORED if anchored
|
||||||
flags |= LibPCRE2::NO_UTF_CHECK
|
|
||||||
@re = Regex::PCRE2.compile(pattern, flags) do |error_message|
|
@re = Regex::PCRE2.compile(pattern, flags) do |error_message|
|
||||||
raise Exception.new(error_message)
|
raise Exception.new(error_message)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
class Re3
|
||||||
|
@matches = Pointer(LibCre2::StringPiece).malloc(50)
|
||||||
|
@opts : LibCre2::Options
|
||||||
|
|
||||||
|
@re : LibCre2::CRe2
|
||||||
|
|
||||||
|
def group_count
|
||||||
|
LibCre2.num_capturing_groups(@re)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def initialize(pattern : String, multiline = false, dotall = false,
|
||||||
|
ignorecase = false, anchored = false)
|
||||||
|
@opts = LibCre2.opt_new
|
||||||
|
LibCre2.opt_posix_syntax(@opts, false)
|
||||||
|
LibCre2.opt_longest_match(@opts, false)
|
||||||
|
# These 3 are ignored when posix_syntax is false
|
||||||
|
# LibCre2.opt_one_line(@opts, !multiline)
|
||||||
|
# LibCre2.opt_perl_classes(@opts, true)
|
||||||
|
# LibCre2.opt_word_boundary(@opts, true)
|
||||||
|
LibCre2.opt_encoding(@opts, 1)
|
||||||
|
LibCre2.opt_case_sensitive(@opts, !ignorecase)
|
||||||
|
LibCre2.opt_dot_nl(@opts, dotall)
|
||||||
|
pattern = "(?m)#{pattern}" if multiline
|
||||||
|
@re = LibCre2.new(pattern, pattern.size, @opts)
|
||||||
|
end
|
||||||
|
|
||||||
|
def match(text, pos)
|
||||||
|
matched = LibCre2.match(@re, text, text.size, pos, text.size,
|
||||||
|
LibCre2::CRE2_ANCHOR_START, @matches, 50)
|
||||||
|
return {matched != 0, @matches}
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
# re2 doesn't support this (should match "x")
|
||||||
|
# re = Tartrazine::Re3.new("x(?!foo)", multiline: true, dotall: false)
|
||||||
|
# m = re.match("xfoo", 0)
|
||||||
|
# p m[0], m[1][0]
|
||||||
|
|
||||||
|
@@ -1,23 +1,11 @@
|
|||||||
require "./actions"
|
|
||||||
require "./constants"
|
|
||||||
require "./formatter"
|
|
||||||
require "./rules"
|
|
||||||
require "./styles"
|
|
||||||
require "./tartrazine"
|
|
||||||
require "sixteen"
|
require "sixteen"
|
||||||
require "xml"
|
require "xml"
|
||||||
|
|
||||||
module Tartrazine
|
module Tartrazine
|
||||||
alias Color = Sixteen::Color
|
|
||||||
|
|
||||||
def self.theme(name : String) : Theme
|
def self.theme(name : String) : Theme
|
||||||
return Theme.from_base16(name[7..]) if name.starts_with? "base16_"
|
return Theme.from_base16(name[7..]) if name.starts_with? "base16_"
|
||||||
Theme.from_xml(ThemeFiles.get("/#{name}.xml").gets_to_end)
|
path = File.join("styles", "#{name}.xml")
|
||||||
end
|
Theme.from_xml(File.read(path))
|
||||||
|
|
||||||
class ThemeFiles
|
|
||||||
extend BakedFileSystem
|
|
||||||
bake_folder "../styles", __DIR__
|
|
||||||
end
|
end
|
||||||
|
|
||||||
class Style
|
class Style
|
||||||
@@ -31,9 +19,9 @@ module Tartrazine
|
|||||||
|
|
||||||
# These properties are either set or nil
|
# These properties are either set or nil
|
||||||
# (inherit from parent style)
|
# (inherit from parent style)
|
||||||
property background : Color?
|
property background : String?
|
||||||
property border : Color?
|
property border : String?
|
||||||
property color : Color?
|
property color : String?
|
||||||
|
|
||||||
# Styles are incomplete by default and inherit
|
# Styles are incomplete by default and inherit
|
||||||
# from parents. If this is true, this style
|
# from parents. If this is true, this style
|
||||||
@@ -103,33 +91,33 @@ module Tartrazine
|
|||||||
# The color assignments are adapted from
|
# The color assignments are adapted from
|
||||||
# https://github.com/mohd-akram/base16-pygments/
|
# https://github.com/mohd-akram/base16-pygments/
|
||||||
|
|
||||||
theme.styles["Background"] = Style.new(color: t["base05"], background: t["base00"])
|
theme.styles["Background"] = Style.new(color: t.palette["base05"], background: t.palette["base00"])
|
||||||
theme.styles["Text"] = Style.new(color: t["base05"])
|
theme.styles["Text"] = Style.new(color: t.palette["base05"])
|
||||||
theme.styles["Error"] = Style.new(color: t["base08"])
|
theme.styles["Error"] = Style.new(color: t.palette["base08"])
|
||||||
theme.styles["Comment"] = Style.new(color: t["base03"])
|
theme.styles["Comment"] = Style.new(color: t.palette["base03"])
|
||||||
theme.styles["CommentPreproc"] = Style.new(color: t["base0F"])
|
theme.styles["CommentPreproc"] = Style.new(color: t.palette["base0F"])
|
||||||
theme.styles["CommentPreprocFile"] = Style.new(color: t["base0B"])
|
theme.styles["CommentPreprocFile"] = Style.new(color: t.palette["base0B"])
|
||||||
theme.styles["Keyword"] = Style.new(color: t["base0E"])
|
theme.styles["Keyword"] = Style.new(color: t.palette["base0E"])
|
||||||
theme.styles["KeywordType"] = Style.new(color: t["base08"])
|
theme.styles["KeywordType"] = Style.new(color: t.palette["base08"])
|
||||||
theme.styles["NameAttribute"] = Style.new(color: t["base0D"])
|
theme.styles["NameAttribute"] = Style.new(color: t.palette["base0D"])
|
||||||
theme.styles["NameBuiltin"] = Style.new(color: t["base08"])
|
theme.styles["NameBuiltin"] = Style.new(color: t.palette["base08"])
|
||||||
theme.styles["NameBuiltinPseudo"] = Style.new(color: t["base08"])
|
theme.styles["NameBuiltinPseudo"] = Style.new(color: t.palette["base08"])
|
||||||
theme.styles["NameClass"] = Style.new(color: t["base0D"])
|
theme.styles["NameClass"] = Style.new(color: t.palette["base0D"])
|
||||||
theme.styles["NameConstant"] = Style.new(color: t["base09"])
|
theme.styles["NameConstant"] = Style.new(color: t.palette["base09"])
|
||||||
theme.styles["NameDecorator"] = Style.new(color: t["base09"])
|
theme.styles["NameDecorator"] = Style.new(color: t.palette["base09"])
|
||||||
theme.styles["NameFunction"] = Style.new(color: t["base0D"])
|
theme.styles["NameFunction"] = Style.new(color: t.palette["base0D"])
|
||||||
theme.styles["NameNamespace"] = Style.new(color: t["base0D"])
|
theme.styles["NameNamespace"] = Style.new(color: t.palette["base0D"])
|
||||||
theme.styles["NameTag"] = Style.new(color: t["base0E"])
|
theme.styles["NameTag"] = Style.new(color: t.palette["base0E"])
|
||||||
theme.styles["NameVariable"] = Style.new(color: t["base0D"])
|
theme.styles["NameVariable"] = Style.new(color: t.palette["base0D"])
|
||||||
theme.styles["NameVariableInstance"] = Style.new(color: t["base08"])
|
theme.styles["NameVariableInstance"] = Style.new(color: t.palette["base08"])
|
||||||
theme.styles["LiteralNumber"] = Style.new(color: t["base09"])
|
theme.styles["LiteralNumber"] = Style.new(color: t.palette["base09"])
|
||||||
theme.styles["Operator"] = Style.new(color: t["base0C"])
|
theme.styles["Operator"] = Style.new(color: t.palette["base0C"])
|
||||||
theme.styles["OperatorWord"] = Style.new(color: t["base0E"])
|
theme.styles["OperatorWord"] = Style.new(color: t.palette["base0E"])
|
||||||
theme.styles["Literal"] = Style.new(color: t["base0B"])
|
theme.styles["Literal"] = Style.new(color: t.palette["base0B"])
|
||||||
theme.styles["LiteralString"] = Style.new(color: t["base0B"])
|
theme.styles["LiteralString"] = Style.new(color: t.palette["base0B"])
|
||||||
theme.styles["LiteralStringInterpol"] = Style.new(color: t["base0F"])
|
theme.styles["LiteralStringInterpol"] = Style.new(color: t.palette["base0F"])
|
||||||
theme.styles["LiteralStringRegex"] = Style.new(color: t["base0C"])
|
theme.styles["LiteralStringRegex"] = Style.new(color: t.palette["base0C"])
|
||||||
theme.styles["LiteralStringSymbol"] = Style.new(color: t["base09"])
|
theme.styles["LiteralStringSymbol"] = Style.new(color: t.palette["base09"])
|
||||||
theme
|
theme
|
||||||
end
|
end
|
||||||
|
|
||||||
@@ -156,9 +144,9 @@ module Tartrazine
|
|||||||
s.underline = true if style.includes?("underline")
|
s.underline = true if style.includes?("underline")
|
||||||
s.underline = false if style.includes?("nounderline")
|
s.underline = false if style.includes?("nounderline")
|
||||||
|
|
||||||
s.color = style.find(&.starts_with?("#")).try { |v| Color.new v.split("#").last }
|
s.color = style.find(&.starts_with?("#")).try &.split("#").last
|
||||||
s.background = style.find(&.starts_with?("bg:#")).try { |v| Color.new v.split("#").last }
|
s.background = style.find(&.starts_with?("bg:#")).try &.split("#").last
|
||||||
s.border = style.find(&.starts_with?("border:#")).try { |v| Color.new v.split("#").last }
|
s.border = style.find(&.starts_with?("border:#")).try &.split("#").last
|
||||||
|
|
||||||
theme.styles[node["type"]] = s
|
theme.styles[node["type"]] = s
|
||||||
end
|
end
|
||||||
|
@@ -1,10 +1,5 @@
|
|||||||
require "./actions"
|
require "./actions"
|
||||||
require "./constants"
|
|
||||||
require "./formatter"
|
|
||||||
require "./rules"
|
require "./rules"
|
||||||
require "./styles"
|
|
||||||
require "./tartrazine"
|
|
||||||
require "baked_file_system"
|
|
||||||
require "base58"
|
require "base58"
|
||||||
require "json"
|
require "json"
|
||||||
require "log"
|
require "log"
|
||||||
@@ -12,7 +7,7 @@ require "xml"
|
|||||||
|
|
||||||
module Tartrazine
|
module Tartrazine
|
||||||
extend self
|
extend self
|
||||||
VERSION = "0.1.1"
|
VERSION = "0.1.0"
|
||||||
|
|
||||||
Log = ::Log.for("tartrazine")
|
Log = ::Log.for("tartrazine")
|
||||||
|
|
||||||
@@ -38,12 +33,6 @@ module Tartrazine
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
class LexerFiles
|
|
||||||
extend BakedFileSystem
|
|
||||||
|
|
||||||
bake_folder "../lexers", __DIR__
|
|
||||||
end
|
|
||||||
|
|
||||||
# A token, the output of the tokenizer
|
# A token, the output of the tokenizer
|
||||||
alias Token = NamedTuple(type: String, value: String)
|
alias Token = NamedTuple(type: String, value: String)
|
||||||
|
|
||||||
@@ -83,22 +72,22 @@ module Tartrazine
|
|||||||
# Loop through the text, applying rules
|
# Loop through the text, applying rules
|
||||||
while pos < text.size
|
while pos < text.size
|
||||||
state = states[@state_stack.last]
|
state = states[@state_stack.last]
|
||||||
# Log.trace { "Stack is #{@state_stack} State is #{state.name}, pos is #{pos}, text is #{text[pos..pos + 10]}" }
|
Log.trace { "Stack is #{@state_stack} State is #{state.name}, pos is #{pos}, text is #{text[pos..pos + 10]}" }
|
||||||
state.rules.each do |rule|
|
state.rules.each do |rule|
|
||||||
matched, new_pos, new_tokens = rule.match(text, pos, self)
|
matched, new_pos, new_tokens = rule.match(text, pos, self)
|
||||||
if matched
|
if matched
|
||||||
# Move position forward, save the tokens,
|
# Move position forward, save the tokens,
|
||||||
# tokenize from the new position
|
# tokenize from the new position
|
||||||
# Log.trace { "MATCHED: #{rule.xml}" }
|
Log.trace { "MATCHED: #{rule.xml}" }
|
||||||
pos = new_pos
|
pos = new_pos
|
||||||
tokens += new_tokens
|
tokens += new_tokens
|
||||||
break
|
break
|
||||||
end
|
end
|
||||||
# Log.trace { "NOT MATCHED: #{rule.xml}" }
|
Log.trace { "NOT MATCHED: #{rule.xml}" }
|
||||||
end
|
end
|
||||||
# If no rule matches, emit an error token
|
# If no rule matches, emit an error token
|
||||||
unless matched
|
unless matched
|
||||||
# Log.trace { "Error at #{pos}" }
|
Log.trace { "Error at #{pos}" }
|
||||||
tokens << {type: "Error", value: "#{text[pos]}"}
|
tokens << {type: "Error", value: "#{text[pos]}"}
|
||||||
pos += 1
|
pos += 1
|
||||||
end
|
end
|
||||||
@@ -193,7 +182,7 @@ module Tartrazine
|
|||||||
end
|
end
|
||||||
|
|
||||||
def self.lexer(name : String) : Lexer
|
def self.lexer(name : String) : Lexer
|
||||||
Lexer.from_xml(LexerFiles.get("/#{name}.xml").gets_to_end)
|
Lexer.from_xml(File.read("lexers/#{name}.xml"))
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user