Some tests pass!

This commit is contained in:
Roberto Alsina 2024-08-13 19:19:12 -03:00
parent 2a9e7fde0d
commit a704c59fa9
6 changed files with 67 additions and 28 deletions

View File

@ -42,7 +42,6 @@ known_bad = {
"#{__DIR__}/tests/mcfunction/selectors.txt",
"#{__DIR__}/tests/php/anonymous_class.txt",
"#{__DIR__}/tests/html/javascript_unclosed.txt",
}
# Tests that fail because of a limitation in PCRE2

View File

@ -30,11 +30,11 @@ module Tartrazine
end
# ameba:disable Metrics/CyclomaticComplexity
def emit(match : Regex::MatchData?, lexer : Lexer, match_group = 0) : Array(Token)
def emit(match, lexer : Lexer, match_group = 0) : Array(Token)
case type
when "token"
raise Exception.new "Can't have a token without a match" if match.nil?
[Token.new(type: xml["type"], value: match[match_group])]
[Token.new(type: xml["type"], value: match[match_group].as(Onigmo::Match).value)]
when "push"
states_to_push = xml.attributes.select { |attrib|
attrib.name == "state"
@ -88,14 +88,14 @@ module Tartrazine
return [] of Token if match.nil?
lexer_name = xml["lexer"].downcase
Log.trace { "to tokenize: #{match[match_group]}" }
Tartrazine.lexer(lexer_name).tokenize(match[match_group], usingself: true)
Tartrazine.lexer(lexer_name).tokenize(match[match_group].as(Onigmo::Match).value, usingself: true)
when "usingself"
# Shunt to another copy of this lexer
return [] of Token if match.nil?
new_lexer = Lexer.from_xml(lexer.xml)
Log.trace { "to tokenize: #{match[match_group]}" }
new_lexer.tokenize(match[match_group], usingself: true)
new_lexer.tokenize(match[match_group].as(Onigmo::Match).value, usingself: true)
when "combined"
# Combine two states into one anonymous state
states = xml.attributes.select { |attrib|

View File

@ -73,7 +73,7 @@ module Tartrazine
# These are true/false/nil
outp << "border: none;" if style.border == false
outp << "font-weight: bold;" if style.bold
outp << "font-weight: #{weight_of_bold};" if style.bold == false
outp << "font-weight: #{@weight_of_bold};" if style.bold == false
outp << "font-style: italic;" if style.italic
outp << "font-style: normal;" if style.italic == false
outp << "text-decoration: underline;" if style.underline

View File

@ -1,15 +1,14 @@
@[Link("onigmo")]
@[Link(ldflags: "#{__DIR__}/onigmo/onigwrap.o")]
lib Onigmo
lib LibOnigmo
type Regex = Pointer(Void)
type Region = Pointer(Void)
fun create = onigwrap_create(pattern : LibC::Char*, len : UInt32,
ignoreCase : Int32,
multiline : Int32,
dotall : Int32,
anchored : Int32) : Regex
dotall : Int32) : Regex
fun free = onigwrap_free(re : Regex)
fun region_free = onigwrap_region_free(region : Region)
@ -19,16 +18,51 @@ lib Onigmo
fun len = onigwrap_len(region : Region, index : Int32) : Int32
end
pattern = "a"
module Onigmo
class Match
property begin : Int32
property end : Int32
property value : String
def initialize(@begin, @end, @value)
end
end
class Regex
def initialize(@pattern : String, @ignorecase = false, @multiline = false, @dotall = false)
@re = LibOnigmo.create(@pattern.to_unsafe, @pattern.bytesize, @ignorecase ? 1 : 0, @multiline ? 1 : 0, @dotall ? 1 : 0)
end
def finalize
LibOnigmo.free(@re)
end
def match(str : String, offset = 0)
region = LibOnigmo.search(@re, str.to_unsafe, offset, str.bytesize)
result = [] of Match?
num_regs = LibOnigmo.num_regs(region)
if num_regs > 0
(0...num_regs).each do |i|
b = LibOnigmo.pos(region, i)
e = b + LibOnigmo.len(region, i)
if b == -1 || e == -1
result << nil
else
v = str[b...e]
result << Match.new(b, e, v)
end
end
else
return [] of Match
end
LibOnigmo.region_free(region)
result
end
end
end
pattern = "#.*x"
str = "# foobar"
re = Onigmo.create(pattern, pattern.size, false, true, false, false)
region = Onigmo.search(re, str.to_unsafe, 0, str.bytesize)
num_regs = Onigmo.num_regs(region)
(0...num_regs).each do |i|
pos = Onigmo.pos(region, i)
len = Onigmo.len(region, i)
puts "match #{i}: #{str[pos, len]}"
end
Onigmo.region_free(region)
Onigmo.free(re)
re = Onigmo::Regex.new(pattern, false, false, false)
p! re.match(str)

View File

@ -1,6 +1,6 @@
#include "onigmo.h"
regex_t *onigwrap_create(char *pattern, int len, int ignoreCase, int multiline, int dotall, int anchored )
regex_t *onigwrap_create(char *pattern, int len, int ignoreCase, int multiline, int dotall)
{
regex_t *reg;

View File

@ -3,6 +3,7 @@ require "./formatter"
require "./rules"
require "./styles"
require "./lexer"
require "./onigmo"
# These are lexer rules. They match with the text being parsed
# and perform actions, either emitting tokens or changing the
@ -10,6 +11,8 @@ require "./lexer"
module Tartrazine
# This rule matches via a regex pattern
alias Regex = Onigmo::Regex
class Rule
property pattern : Regex = Regex.new ""
property actions : Array(Action) = [] of Action
@ -19,7 +22,9 @@ module Tartrazine
match = pattern.match(text, pos)
# We don't match if the match doesn't move the cursor
# because that causes infinite loops
return false, pos, [] of Token if match.nil? || match.end == 0
# The `match.begin > pos` is the same as the ANCHORED option
return false, pos, [] of Token if match.empty? || match[0].nil? || match[0].try { |m| m.begin > pos }
# p! match.map(&.value), text[pos..pos + 20]
# Log.trace { "#{match}, #{pattern.inspect}, #{text}, #{pos}" }
tokens = [] of Token
# Emit the tokens
@ -27,21 +32,22 @@ module Tartrazine
# Emit the token
tokens += action.emit(match, lexer)
end
Log.trace { "#{xml}, #{match.end}, #{tokens}" }
return true, match.end, tokens
# Log.trace { "#{xml}, #{match[0].end}, #{tokens}" }
return true, match[0].as(Onigmo::Match).end, tokens
end
def initialize(node : XML::Node, multiline, dotall, ignorecase)
@xml = node.to_s
pattern = node["pattern"]
flags = Regex::Options::ANCHORED
# flags = Regex::Options::ANCHORED
# flags = Regex::Options::NO_UTF_CHECK
# MULTILINE implies DOTALL which we don't want, so we
# use in-pattern flag (?m) instead
# flags |= Regex::Options::MULTILINE if multiline
pattern = "(?m)" + pattern if multiline
flags |= Regex::Options::DOTALL if dotall
flags |= Regex::Options::IGNORE_CASE if ignorecase
@pattern = Regex.new(pattern, flags)
# flags |= Regex::Options::DOTALL if dotall
# flags |= Regex::Options::IGNORE_CASE if ignorecase
@pattern = Regex.new(pattern, ignorecase, multiline, dotall)
add_actions(node)
end