mirror of
https://github.com/ralsina/tartrazine.git
synced 2025-06-07 20:20:26 -03:00
Some tests pass!
This commit is contained in:
parent
2a9e7fde0d
commit
a704c59fa9
@ -42,7 +42,6 @@ known_bad = {
|
||||
"#{__DIR__}/tests/mcfunction/selectors.txt",
|
||||
"#{__DIR__}/tests/php/anonymous_class.txt",
|
||||
"#{__DIR__}/tests/html/javascript_unclosed.txt",
|
||||
|
||||
}
|
||||
|
||||
# Tests that fail because of a limitation in PCRE2
|
||||
|
@ -30,11 +30,11 @@ module Tartrazine
|
||||
end
|
||||
|
||||
# ameba:disable Metrics/CyclomaticComplexity
|
||||
def emit(match : Regex::MatchData?, lexer : Lexer, match_group = 0) : Array(Token)
|
||||
def emit(match, lexer : Lexer, match_group = 0) : Array(Token)
|
||||
case type
|
||||
when "token"
|
||||
raise Exception.new "Can't have a token without a match" if match.nil?
|
||||
[Token.new(type: xml["type"], value: match[match_group])]
|
||||
[Token.new(type: xml["type"], value: match[match_group].as(Onigmo::Match).value)]
|
||||
when "push"
|
||||
states_to_push = xml.attributes.select { |attrib|
|
||||
attrib.name == "state"
|
||||
@ -88,14 +88,14 @@ module Tartrazine
|
||||
return [] of Token if match.nil?
|
||||
lexer_name = xml["lexer"].downcase
|
||||
Log.trace { "to tokenize: #{match[match_group]}" }
|
||||
Tartrazine.lexer(lexer_name).tokenize(match[match_group], usingself: true)
|
||||
Tartrazine.lexer(lexer_name).tokenize(match[match_group].as(Onigmo::Match).value, usingself: true)
|
||||
when "usingself"
|
||||
# Shunt to another copy of this lexer
|
||||
return [] of Token if match.nil?
|
||||
|
||||
new_lexer = Lexer.from_xml(lexer.xml)
|
||||
Log.trace { "to tokenize: #{match[match_group]}" }
|
||||
new_lexer.tokenize(match[match_group], usingself: true)
|
||||
new_lexer.tokenize(match[match_group].as(Onigmo::Match).value, usingself: true)
|
||||
when "combined"
|
||||
# Combine two states into one anonymous state
|
||||
states = xml.attributes.select { |attrib|
|
||||
|
@ -73,7 +73,7 @@ module Tartrazine
|
||||
# These are true/false/nil
|
||||
outp << "border: none;" if style.border == false
|
||||
outp << "font-weight: bold;" if style.bold
|
||||
outp << "font-weight: #{weight_of_bold};" if style.bold == false
|
||||
outp << "font-weight: #{@weight_of_bold};" if style.bold == false
|
||||
outp << "font-style: italic;" if style.italic
|
||||
outp << "font-style: normal;" if style.italic == false
|
||||
outp << "text-decoration: underline;" if style.underline
|
||||
|
@ -1,15 +1,14 @@
|
||||
@[Link("onigmo")]
|
||||
@[Link(ldflags: "#{__DIR__}/onigmo/onigwrap.o")]
|
||||
|
||||
lib Onigmo
|
||||
lib LibOnigmo
|
||||
type Regex = Pointer(Void)
|
||||
type Region = Pointer(Void)
|
||||
|
||||
fun create = onigwrap_create(pattern : LibC::Char*, len : UInt32,
|
||||
ignoreCase : Int32,
|
||||
multiline : Int32,
|
||||
dotall : Int32,
|
||||
anchored : Int32) : Regex
|
||||
dotall : Int32) : Regex
|
||||
fun free = onigwrap_free(re : Regex)
|
||||
fun region_free = onigwrap_region_free(region : Region)
|
||||
|
||||
@ -19,16 +18,51 @@ lib Onigmo
|
||||
fun len = onigwrap_len(region : Region, index : Int32) : Int32
|
||||
end
|
||||
|
||||
pattern = "a"
|
||||
module Onigmo
|
||||
class Match
|
||||
property begin : Int32
|
||||
property end : Int32
|
||||
property value : String
|
||||
|
||||
def initialize(@begin, @end, @value)
|
||||
end
|
||||
end
|
||||
|
||||
class Regex
|
||||
def initialize(@pattern : String, @ignorecase = false, @multiline = false, @dotall = false)
|
||||
@re = LibOnigmo.create(@pattern.to_unsafe, @pattern.bytesize, @ignorecase ? 1 : 0, @multiline ? 1 : 0, @dotall ? 1 : 0)
|
||||
end
|
||||
|
||||
def finalize
|
||||
LibOnigmo.free(@re)
|
||||
end
|
||||
|
||||
def match(str : String, offset = 0)
|
||||
region = LibOnigmo.search(@re, str.to_unsafe, offset, str.bytesize)
|
||||
result = [] of Match?
|
||||
num_regs = LibOnigmo.num_regs(region)
|
||||
if num_regs > 0
|
||||
(0...num_regs).each do |i|
|
||||
b = LibOnigmo.pos(region, i)
|
||||
e = b + LibOnigmo.len(region, i)
|
||||
if b == -1 || e == -1
|
||||
result << nil
|
||||
else
|
||||
v = str[b...e]
|
||||
result << Match.new(b, e, v)
|
||||
end
|
||||
end
|
||||
else
|
||||
return [] of Match
|
||||
end
|
||||
LibOnigmo.region_free(region)
|
||||
result
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
pattern = "#.*x"
|
||||
str = "# foobar"
|
||||
|
||||
re = Onigmo.create(pattern, pattern.size, false, true, false, false)
|
||||
region = Onigmo.search(re, str.to_unsafe, 0, str.bytesize)
|
||||
num_regs = Onigmo.num_regs(region)
|
||||
(0...num_regs).each do |i|
|
||||
pos = Onigmo.pos(region, i)
|
||||
len = Onigmo.len(region, i)
|
||||
puts "match #{i}: #{str[pos, len]}"
|
||||
end
|
||||
Onigmo.region_free(region)
|
||||
Onigmo.free(re)
|
||||
re = Onigmo::Regex.new(pattern, false, false, false)
|
||||
p! re.match(str)
|
||||
|
@ -1,6 +1,6 @@
|
||||
#include "onigmo.h"
|
||||
|
||||
regex_t *onigwrap_create(char *pattern, int len, int ignoreCase, int multiline, int dotall, int anchored )
|
||||
regex_t *onigwrap_create(char *pattern, int len, int ignoreCase, int multiline, int dotall)
|
||||
{
|
||||
regex_t *reg;
|
||||
|
||||
|
20
src/rules.cr
20
src/rules.cr
@ -3,6 +3,7 @@ require "./formatter"
|
||||
require "./rules"
|
||||
require "./styles"
|
||||
require "./lexer"
|
||||
require "./onigmo"
|
||||
|
||||
# These are lexer rules. They match with the text being parsed
|
||||
# and perform actions, either emitting tokens or changing the
|
||||
@ -10,6 +11,8 @@ require "./lexer"
|
||||
module Tartrazine
|
||||
# This rule matches via a regex pattern
|
||||
|
||||
alias Regex = Onigmo::Regex
|
||||
|
||||
class Rule
|
||||
property pattern : Regex = Regex.new ""
|
||||
property actions : Array(Action) = [] of Action
|
||||
@ -19,7 +22,9 @@ module Tartrazine
|
||||
match = pattern.match(text, pos)
|
||||
# We don't match if the match doesn't move the cursor
|
||||
# because that causes infinite loops
|
||||
return false, pos, [] of Token if match.nil? || match.end == 0
|
||||
# The `match.begin > pos` is the same as the ANCHORED option
|
||||
return false, pos, [] of Token if match.empty? || match[0].nil? || match[0].try { |m| m.begin > pos }
|
||||
# p! match.map(&.value), text[pos..pos + 20]
|
||||
# Log.trace { "#{match}, #{pattern.inspect}, #{text}, #{pos}" }
|
||||
tokens = [] of Token
|
||||
# Emit the tokens
|
||||
@ -27,21 +32,22 @@ module Tartrazine
|
||||
# Emit the token
|
||||
tokens += action.emit(match, lexer)
|
||||
end
|
||||
Log.trace { "#{xml}, #{match.end}, #{tokens}" }
|
||||
return true, match.end, tokens
|
||||
# Log.trace { "#{xml}, #{match[0].end}, #{tokens}" }
|
||||
return true, match[0].as(Onigmo::Match).end, tokens
|
||||
end
|
||||
|
||||
def initialize(node : XML::Node, multiline, dotall, ignorecase)
|
||||
@xml = node.to_s
|
||||
pattern = node["pattern"]
|
||||
flags = Regex::Options::ANCHORED
|
||||
# flags = Regex::Options::ANCHORED
|
||||
# flags = Regex::Options::NO_UTF_CHECK
|
||||
# MULTILINE implies DOTALL which we don't want, so we
|
||||
# use in-pattern flag (?m) instead
|
||||
# flags |= Regex::Options::MULTILINE if multiline
|
||||
pattern = "(?m)" + pattern if multiline
|
||||
flags |= Regex::Options::DOTALL if dotall
|
||||
flags |= Regex::Options::IGNORE_CASE if ignorecase
|
||||
@pattern = Regex.new(pattern, flags)
|
||||
# flags |= Regex::Options::DOTALL if dotall
|
||||
# flags |= Regex::Options::IGNORE_CASE if ignorecase
|
||||
@pattern = Regex.new(pattern, ignorecase, multiline, dotall)
|
||||
add_actions(node)
|
||||
end
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user