mirror of
https://github.com/ralsina/tartrazine.git
synced 2025-09-17 10:48:12 +00:00
Compare commits
9 Commits
v0.2.0
...
32816eb207
Author | SHA1 | Date | |
---|---|---|---|
32816eb207 | |||
d2b61fdc6c | |||
a704c59fa9 | |||
2a9e7fde0d | |||
d49d0969a9 | |||
ce6f3d29b5 | |||
46d6d3f467 | |||
78ddc69937 | |||
b1ad7b64c0 |
106
.ameba.yml
106
.ameba.yml
@@ -1,5 +1,5 @@
|
||||
# This configuration file was generated by `ameba --gen-config`
|
||||
# on 2024-08-04 23:09:09 UTC using Ameba version 1.6.1.
|
||||
# on 2024-08-12 22:00:49 UTC using Ameba version 1.6.1.
|
||||
# The point is for the user to remove these configuration records
|
||||
# one by one as the reported problems are removed from the code base.
|
||||
|
||||
@@ -9,7 +9,7 @@ Documentation/DocumentationAdmonition:
|
||||
Description: Reports documentation admonitions
|
||||
Timezone: UTC
|
||||
Excluded:
|
||||
- src/tartrazine.cr
|
||||
- src/lexer.cr
|
||||
- src/actions.cr
|
||||
Admonitions:
|
||||
- TODO
|
||||
@@ -17,3 +17,105 @@ Documentation/DocumentationAdmonition:
|
||||
- BUG
|
||||
Enabled: true
|
||||
Severity: Warning
|
||||
|
||||
# Problems found: 22
|
||||
# Run `ameba --only Lint/MissingBlockArgument` for details
|
||||
Lint/MissingBlockArgument:
|
||||
Description: Disallows yielding method definitions without block argument
|
||||
Excluded:
|
||||
- pygments/tests/examplefiles/cr/test.cr
|
||||
Enabled: true
|
||||
Severity: Warning
|
||||
|
||||
# Problems found: 1
|
||||
# Run `ameba --only Lint/NotNil` for details
|
||||
Lint/NotNil:
|
||||
Description: Identifies usage of `not_nil!` calls
|
||||
Excluded:
|
||||
- pygments/tests/examplefiles/cr/test.cr
|
||||
Enabled: true
|
||||
Severity: Warning
|
||||
|
||||
# Problems found: 34
|
||||
# Run `ameba --only Lint/ShadowingOuterLocalVar` for details
|
||||
Lint/ShadowingOuterLocalVar:
|
||||
Description: Disallows the usage of the same name as outer local variables for block
|
||||
or proc arguments
|
||||
Excluded:
|
||||
- pygments/tests/examplefiles/cr/test.cr
|
||||
Enabled: true
|
||||
Severity: Warning
|
||||
|
||||
# Problems found: 1
|
||||
# Run `ameba --only Lint/UnreachableCode` for details
|
||||
Lint/UnreachableCode:
|
||||
Description: Reports unreachable code
|
||||
Excluded:
|
||||
- pygments/tests/examplefiles/cr/test.cr
|
||||
Enabled: true
|
||||
Severity: Warning
|
||||
|
||||
# Problems found: 6
|
||||
# Run `ameba --only Lint/UselessAssign` for details
|
||||
Lint/UselessAssign:
|
||||
Description: Disallows useless variable assignments
|
||||
ExcludeTypeDeclarations: false
|
||||
Excluded:
|
||||
- pygments/tests/examplefiles/cr/test.cr
|
||||
Enabled: true
|
||||
Severity: Warning
|
||||
|
||||
# Problems found: 3
|
||||
# Run `ameba --only Naming/BlockParameterName` for details
|
||||
Naming/BlockParameterName:
|
||||
Description: Disallows non-descriptive block parameter names
|
||||
MinNameLength: 3
|
||||
AllowNamesEndingInNumbers: true
|
||||
Excluded:
|
||||
- pygments/tests/examplefiles/cr/test.cr
|
||||
AllowedNames:
|
||||
- _
|
||||
- e
|
||||
- i
|
||||
- j
|
||||
- k
|
||||
- v
|
||||
- x
|
||||
- y
|
||||
- ex
|
||||
- io
|
||||
- ws
|
||||
- op
|
||||
- tx
|
||||
- id
|
||||
- ip
|
||||
- k1
|
||||
- k2
|
||||
- v1
|
||||
- v2
|
||||
ForbiddenNames: []
|
||||
Enabled: true
|
||||
Severity: Convention
|
||||
|
||||
# Problems found: 1
|
||||
# Run `ameba --only Naming/RescuedExceptionsVariableName` for details
|
||||
Naming/RescuedExceptionsVariableName:
|
||||
Description: Makes sure that rescued exceptions variables are named as expected
|
||||
Excluded:
|
||||
- pygments/tests/examplefiles/cr/test.cr
|
||||
AllowedNames:
|
||||
- e
|
||||
- ex
|
||||
- exception
|
||||
- error
|
||||
Enabled: true
|
||||
Severity: Convention
|
||||
|
||||
# Problems found: 6
|
||||
# Run `ameba --only Naming/TypeNames` for details
|
||||
Naming/TypeNames:
|
||||
Description: Enforces type names in camelcase manner
|
||||
Excluded:
|
||||
- pygments/tests/examplefiles/cr/test.cr
|
||||
Enabled: true
|
||||
Severity: Convention
|
||||
|
@@ -42,6 +42,9 @@ known_bad = {
|
||||
"#{__DIR__}/tests/mcfunction/selectors.txt",
|
||||
"#{__DIR__}/tests/php/anonymous_class.txt",
|
||||
"#{__DIR__}/tests/html/javascript_unclosed.txt",
|
||||
# BAD FOR ONIGMO
|
||||
"#{__DIR__}/tests/json/test_backtracking.txt",
|
||||
|
||||
|
||||
}
|
||||
|
||||
@@ -58,6 +61,7 @@ describe Tartrazine do
|
||||
end
|
||||
else
|
||||
it "parses #{testcase}".split("/")[-2...].join("/") do
|
||||
p! testcase
|
||||
text = File.read(testcase).split("---input---\n").last.split("---tokens---").first
|
||||
lexer_name = File.basename(File.dirname(testcase)).downcase
|
||||
unless failing_lexers.includes?(lexer_name) ||
|
||||
|
@@ -30,11 +30,11 @@ module Tartrazine
|
||||
end
|
||||
|
||||
# ameba:disable Metrics/CyclomaticComplexity
|
||||
def emit(match : Regex::MatchData?, lexer : Lexer, match_group = 0) : Array(Token)
|
||||
def emit(match, lexer : Lexer, match_group = 0) : Array(Token)
|
||||
case type
|
||||
when "token"
|
||||
raise Exception.new "Can't have a token without a match" if match.nil?
|
||||
[Token.new(type: xml["type"], value: match[match_group])]
|
||||
[Token.new(type: xml["type"], value: match[match_group].as(Onigmo::Match).value)]
|
||||
when "push"
|
||||
states_to_push = xml.attributes.select { |attrib|
|
||||
attrib.name == "state"
|
||||
@@ -88,14 +88,14 @@ module Tartrazine
|
||||
return [] of Token if match.nil?
|
||||
lexer_name = xml["lexer"].downcase
|
||||
Log.trace { "to tokenize: #{match[match_group]}" }
|
||||
Tartrazine.lexer(lexer_name).tokenize(match[match_group], usingself: true)
|
||||
Tartrazine.lexer(lexer_name).tokenize(match[match_group].as(Onigmo::Match).value, usingself: true)
|
||||
when "usingself"
|
||||
# Shunt to another copy of this lexer
|
||||
return [] of Token if match.nil?
|
||||
|
||||
new_lexer = Lexer.from_xml(lexer.xml)
|
||||
Log.trace { "to tokenize: #{match[match_group]}" }
|
||||
new_lexer.tokenize(match[match_group], usingself: true)
|
||||
new_lexer.tokenize(match[match_group].as(Onigmo::Match).value, usingself: true)
|
||||
when "combined"
|
||||
# Combine two states into one anonymous state
|
||||
states = xml.attributes.select { |attrib|
|
||||
|
@@ -15,6 +15,7 @@ module Tartrazine
|
||||
property? standalone : Bool = false
|
||||
property? surrounding_pre : Bool = true
|
||||
property? wrap_long_lines : Bool = false
|
||||
property? weight_of_bold : Int32 = 600
|
||||
|
||||
def format(text : String, lexer : Lexer, theme : Theme) : String
|
||||
text = format_text(text, lexer, theme)
|
||||
@@ -43,7 +44,7 @@ module Tartrazine
|
||||
pre_style = wrap_long_lines? ? "style=\"white-space: pre-wrap; word-break: break-word;\"" : ""
|
||||
outp << "<pre class=\"#{get_css_class("Background", theme)}\" #{pre_style}>"
|
||||
end
|
||||
"<code class=\"#{get_css_class("Background", theme)}\">"
|
||||
outp << "<code class=\"#{get_css_class("Background", theme)}\">"
|
||||
lines.each_with_index(offset: line_number_start - 1) do |line, i|
|
||||
line_label = line_numbers? ? "#{i + 1}".rjust(4).ljust(5) : ""
|
||||
line_class = highlighted?(i + 1) ? "class=\"#{get_css_class("LineHighlight", theme)}\"" : ""
|
||||
@@ -72,7 +73,7 @@ module Tartrazine
|
||||
# These are true/false/nil
|
||||
outp << "border: none;" if style.border == false
|
||||
outp << "font-weight: bold;" if style.bold
|
||||
outp << "font-weight: 400;" if style.bold == false
|
||||
outp << "font-weight: #{@weight_of_bold};" if style.bold == false
|
||||
outp << "font-style: italic;" if style.italic
|
||||
outp << "font-style: normal;" if style.italic == false
|
||||
outp << "text-decoration: underline;" if style.underline
|
||||
|
85
src/onigmo.cr
Normal file
85
src/onigmo.cr
Normal file
@@ -0,0 +1,85 @@
|
||||
@[Link("onigmo")]
|
||||
@[Link(ldflags: "#{__DIR__}/onigmo/onigwrap.o")]
|
||||
|
||||
lib LibOnigmo
|
||||
type Regex = Pointer(Void)
|
||||
type Region = Pointer(Void)
|
||||
|
||||
fun create = onigwrap_create(pattern : LibC::Char*, len : UInt32,
|
||||
ignoreCase : Int32,
|
||||
multiline : Int32,
|
||||
dotall : Int32) : Regex
|
||||
fun free = onigwrap_free(re : Regex)
|
||||
fun region_free = onigwrap_region_free(region : Region)
|
||||
|
||||
fun search = onigwrap_search(re : Regex, str : LibC::Char*, offset : UInt32, length : UInt32) : Region
|
||||
fun num_regs = onigwrap_num_regs(region : Region) : Int32
|
||||
fun pos = onigwrap_pos(region : Region, index : Int32) : Int32
|
||||
fun len = onigwrap_len(region : Region, index : Int32) : Int32
|
||||
end
|
||||
|
||||
module Onigmo
|
||||
class Match
|
||||
property begin : Int32
|
||||
property end : Int32
|
||||
property value : String
|
||||
|
||||
def initialize(@begin, @end, @value)
|
||||
end
|
||||
|
||||
def to_s
|
||||
@value
|
||||
end
|
||||
end
|
||||
|
||||
class Regex
|
||||
def initialize(@pattern : String, @ignorecase = false, @multiline = false, @dotall = false)
|
||||
@re = LibOnigmo.create(@pattern.to_unsafe, @pattern.bytesize, @ignorecase ? 1 : 0, @multiline ? 1 : 0, @dotall ? 1 : 0)
|
||||
end
|
||||
|
||||
def finalize
|
||||
LibOnigmo.free(@re)
|
||||
end
|
||||
|
||||
def match(str : String, offset = 0)
|
||||
# The offset argument is a character index, but Onigmo expects a byte index
|
||||
offset = str.char_index_to_byte_index(offset)
|
||||
if offset.nil?
|
||||
raise Exception.new "Invalid offset"
|
||||
end
|
||||
|
||||
region = LibOnigmo.search(@re, str.to_unsafe, offset, str.bytesize)
|
||||
result = [] of Match?
|
||||
num_regs = LibOnigmo.num_regs(region)
|
||||
if num_regs > 0
|
||||
(0...num_regs).each do |i|
|
||||
pos = LibOnigmo.pos(region, i)
|
||||
l = LibOnigmo.len(region, i)
|
||||
if pos == -1 || l == -1
|
||||
result << nil
|
||||
else
|
||||
b = str.byte_index_to_char_index(pos)
|
||||
e = str.byte_index_to_char_index(pos + l)
|
||||
# p! pos, l, b, e, str[pos..]
|
||||
if b.nil? || e.nil?
|
||||
raise Exception.new "Invalid substring"
|
||||
end
|
||||
|
||||
v = str[b...e]
|
||||
result << Match.new(b, b + v.size, v)
|
||||
end
|
||||
end
|
||||
else
|
||||
return [] of Match
|
||||
end
|
||||
LibOnigmo.region_free(region)
|
||||
result
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# pattern = "\\w"
|
||||
# str = "α"
|
||||
|
||||
# re = Onigmo::Regex.new(pattern, false, false, false)
|
||||
# p! re.match(str)
|
94
src/onigmo/onigwrap.c
Normal file
94
src/onigmo/onigwrap.c
Normal file
@@ -0,0 +1,94 @@
|
||||
#include "onigmo.h"
|
||||
|
||||
regex_t *onigwrap_create(char *pattern, int len, int ignoreCase, int multiline, int dotall)
|
||||
{
|
||||
regex_t *reg;
|
||||
|
||||
OnigErrorInfo einfo;
|
||||
|
||||
OnigOptionType onigOptions = ONIG_OPTION_DEFAULT;
|
||||
|
||||
if (ignoreCase == 1)
|
||||
onigOptions |= ONIG_OPTION_IGNORECASE;
|
||||
|
||||
if (multiline == 1)
|
||||
onigOptions |= ONIG_OPTION_NEGATE_SINGLELINE;
|
||||
|
||||
if (dotall == 1)
|
||||
onigOptions |= ONIG_OPTION_DOTALL;
|
||||
|
||||
OnigUChar *stringStart = (OnigUChar*) pattern;
|
||||
OnigUChar *stringEnd = (OnigUChar*) pattern + len;
|
||||
int res = onig_new(®, stringStart, stringEnd, onigOptions, ONIG_ENCODING_UTF8, ONIG_SYNTAX_PYTHON, &einfo);
|
||||
|
||||
return reg;
|
||||
}
|
||||
|
||||
void onigwrap_region_free(OnigRegion *region)
|
||||
{
|
||||
onig_region_free(region, 1);
|
||||
}
|
||||
|
||||
void onigwrap_free(regex_t *reg)
|
||||
{
|
||||
onig_free(reg);
|
||||
}
|
||||
|
||||
int onigwrap_index_in(regex_t *reg, char *charPtr, int offset, int length)
|
||||
{
|
||||
OnigUChar *stringStart = (OnigUChar*) charPtr;
|
||||
OnigUChar *stringEnd = (OnigUChar*) (charPtr + length);
|
||||
OnigUChar *stringOffset = (OnigUChar*) (charPtr + offset);
|
||||
OnigUChar *stringRange = (OnigUChar*) stringEnd;
|
||||
|
||||
OnigRegion *region = onig_region_new();
|
||||
int result = onig_search(reg, stringStart, stringEnd, stringOffset, stringRange, region, ONIG_OPTION_NONE);
|
||||
onig_region_free(region, 1);
|
||||
|
||||
if (result >= 0)
|
||||
return result >> 1;
|
||||
if (result == ONIG_MISMATCH)
|
||||
return -1;
|
||||
return -2;
|
||||
}
|
||||
|
||||
OnigRegion *onigwrap_search(regex_t *reg, char *charPtr, int offset, int length)
|
||||
{
|
||||
OnigUChar *stringStart = (OnigUChar*) charPtr;
|
||||
OnigUChar *stringEnd = (OnigUChar*) (charPtr + length);
|
||||
OnigUChar *stringOffset = (OnigUChar*) (charPtr + offset);
|
||||
OnigUChar *stringRange = (OnigUChar*) stringEnd;
|
||||
|
||||
OnigRegion *region = onig_region_new();
|
||||
|
||||
int result = onig_search(reg, stringStart, stringEnd, stringOffset, stringRange, region, ONIG_OPTION_NONE);
|
||||
return region;
|
||||
}
|
||||
|
||||
int onigwrap_num_regs(OnigRegion *region)
|
||||
{
|
||||
return region->num_regs;
|
||||
}
|
||||
|
||||
int onigwrap_pos(OnigRegion *region, int nth)
|
||||
{
|
||||
if (nth < region->num_regs)
|
||||
{
|
||||
int result = region->beg[nth];
|
||||
if (result < 0)
|
||||
return -1;
|
||||
return result;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
int onigwrap_len(OnigRegion *region, int nth)
|
||||
{
|
||||
if (nth < region->num_regs)
|
||||
{
|
||||
int result = region->end[nth] - region->beg[nth];
|
||||
return result;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
32
src/onigmo/onigwrap.h
Normal file
32
src/onigmo/onigwrap.h
Normal file
@@ -0,0 +1,32 @@
|
||||
#include "onigmo.h"
|
||||
|
||||
#if defined(_WIN32)
|
||||
#define ONIGWRAP_EXTERN extern __declspec(dllexport)
|
||||
#else
|
||||
#define ONIGWRAP_EXTERN extern
|
||||
#endif
|
||||
|
||||
ONIGWRAP_EXTERN
|
||||
regex_t *onigwrap_create(char *pattern, int len, int ignoreCase, int multiline);
|
||||
|
||||
ONIGWRAP_EXTERN
|
||||
void onigwrap_region_free(OnigRegion *region);
|
||||
|
||||
ONIGWRAP_EXTERN
|
||||
void onigwrap_free(regex_t *reg);
|
||||
|
||||
ONIGWRAP_EXTERN
|
||||
int onigwrap_index_in(regex_t *reg, char *charPtr, int offset, int length);
|
||||
|
||||
ONIGWRAP_EXTERN
|
||||
OnigRegion *onigwrap_search(regex_t *reg, char *charPtr, int offset, int length);
|
||||
|
||||
ONIGWRAP_EXTERN
|
||||
int onigwrap_num_regs(OnigRegion *region);
|
||||
|
||||
ONIGWRAP_EXTERN
|
||||
int onigwrap_pos(OnigRegion *region, int nth);
|
||||
|
||||
ONIGWRAP_EXTERN
|
||||
int onigwrap_len(OnigRegion *region, int nth);
|
||||
|
53
src/rules.cr
53
src/rules.cr
@@ -3,6 +3,7 @@ require "./formatter"
|
||||
require "./rules"
|
||||
require "./styles"
|
||||
require "./lexer"
|
||||
require "./onigmo"
|
||||
|
||||
# These are lexer rules. They match with the text being parsed
|
||||
# and perform actions, either emitting tokens or changing the
|
||||
@@ -10,16 +11,22 @@ require "./lexer"
|
||||
module Tartrazine
|
||||
# This rule matches via a regex pattern
|
||||
|
||||
alias Regex = Onigmo::Regex
|
||||
|
||||
class Rule
|
||||
property pattern : Regex = Re2.new ""
|
||||
property pattern : Regex = Regex.new ""
|
||||
property pattern2 : ::Regex = ::Regex.new ""
|
||||
property actions : Array(Action) = [] of Action
|
||||
property xml : String = "foo"
|
||||
|
||||
def match(text, pos, lexer) : Tuple(Bool, Int32, Array(Token))
|
||||
match = pattern.match(text, pos)
|
||||
match2 = pattern2.match(text, pos)
|
||||
# We don't match if the match doesn't move the cursor
|
||||
# because that causes infinite loops
|
||||
return false, pos, [] of Token if match.nil? || match.end == 0
|
||||
# The `match.begin > pos` is the same as the ANCHORED option
|
||||
return false, pos, [] of Token if match.empty? || match[0].nil? || match[0].try { |m| m.begin > pos }
|
||||
# p! match.map(&.to_s), match2, text[pos-1..pos + 20],"----------------------"
|
||||
# Log.trace { "#{match}, #{pattern.inspect}, #{text}, #{pos}" }
|
||||
tokens = [] of Token
|
||||
# Emit the tokens
|
||||
@@ -27,18 +34,23 @@ module Tartrazine
|
||||
# Emit the token
|
||||
tokens += action.emit(match, lexer)
|
||||
end
|
||||
Log.trace { "#{xml}, #{match.end}, #{tokens}" }
|
||||
return true, match.end, tokens
|
||||
# Log.trace { "#{xml}, #{match[0].end}, #{tokens}" }
|
||||
return true, pos + match[0].as(Onigmo::Match).value.size, tokens
|
||||
end
|
||||
|
||||
def initialize(node : XML::Node, multiline, dotall, ignorecase)
|
||||
@xml = node.to_s
|
||||
@pattern = Re2.new(
|
||||
node["pattern"],
|
||||
multiline,
|
||||
dotall,
|
||||
ignorecase,
|
||||
anchored: true)
|
||||
pattern = node["pattern"]
|
||||
# flags = Regex::Options::ANCHORED
|
||||
flags = ::Regex::Options::NO_UTF_CHECK
|
||||
# MULTILINE implies DOTALL which we don't want, so we
|
||||
# use in-pattern flag (?m) instead
|
||||
flags |= ::Regex::Options::MULTILINE if multiline
|
||||
pattern = "(?m)" + pattern if multiline
|
||||
flags |= ::Regex::Options::DOTALL if dotall
|
||||
flags |= ::Regex::Options::IGNORE_CASE if ignorecase
|
||||
@pattern = Regex.new(pattern, ignorecase, multiline, dotall)
|
||||
@pattern2 = ::Regex.new(pattern, flags)
|
||||
add_actions(node)
|
||||
end
|
||||
|
||||
@@ -90,25 +102,4 @@ module Tartrazine
|
||||
add_actions(node)
|
||||
end
|
||||
end
|
||||
|
||||
# This is a hack to workaround that Crystal seems to disallow
|
||||
# having regexes multiline but not dot_all
|
||||
class Re2 < Regex
|
||||
@source = "fa"
|
||||
@options = Regex::Options::None
|
||||
@jit = true
|
||||
|
||||
def initialize(pattern : String, multiline = false, dotall = false, ignorecase = false, anchored = false)
|
||||
flags = LibPCRE2::UTF | LibPCRE2::DUPNAMES |
|
||||
LibPCRE2::UCP
|
||||
flags |= LibPCRE2::MULTILINE if multiline
|
||||
flags |= LibPCRE2::DOTALL if dotall
|
||||
flags |= LibPCRE2::CASELESS if ignorecase
|
||||
flags |= LibPCRE2::ANCHORED if anchored
|
||||
flags |= LibPCRE2::NO_UTF_CHECK
|
||||
@re = Regex::PCRE2.compile(pattern, flags) do |error_message|
|
||||
raise Exception.new(error_message)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
Reference in New Issue
Block a user