mirror of
https://github.com/ralsina/tartrazine.git
synced 2024-11-12 22:42:23 +00:00
refactor
This commit is contained in:
parent
0c86e91b0b
commit
e0f697f1f9
32
src/rules.cr
32
src/rules.cr
@ -5,18 +5,19 @@ require "./actions"
|
||||
# state of the lexer.
|
||||
module Tartrazine
|
||||
# This rule matches via a regex pattern
|
||||
|
||||
class Rule
|
||||
property pattern : Regex = Re2.new ""
|
||||
property actions : Array(Action) = [] of Action
|
||||
property xml : String = "foo"
|
||||
|
||||
def match(text, pos, lexer) : Tuple(Bool, Int32, Array(Token))
|
||||
tokens = [] of Token
|
||||
match = pattern.match(text, pos)
|
||||
# We don't match if the match doesn't move the cursor
|
||||
# because that causes infinite loops
|
||||
Log.trace { "#{match}, #{pattern.inspect}, #{text}, #{pos}" }
|
||||
return false, pos, [] of Token if match.nil? || match.end == 0
|
||||
# Log.trace { "#{match}, #{pattern.inspect}, #{text}, #{pos}" }
|
||||
tokens = [] of Token
|
||||
# Emit the tokens
|
||||
actions.each do |action|
|
||||
# Emit the token
|
||||
@ -28,7 +29,12 @@ module Tartrazine
|
||||
|
||||
def initialize(node : XML::Node, multiline, dotall, ignorecase)
|
||||
@xml = node.to_s
|
||||
@pattern = Re2.new(node["pattern"], multiline, dotall, ignorecase)
|
||||
@pattern = Re2.new(
|
||||
node["pattern"],
|
||||
multiline,
|
||||
dotall,
|
||||
ignorecase,
|
||||
anchored: true)
|
||||
add_actions(node)
|
||||
end
|
||||
|
||||
@ -80,4 +86,24 @@ module Tartrazine
|
||||
add_actions(node)
|
||||
end
|
||||
end
|
||||
|
||||
# This is a hack to workaround that Crystal seems to disallow
|
||||
# having regexes multiline but not dot_all
|
||||
class Re2 < Regex
|
||||
@source = "fa"
|
||||
@options = Regex::Options::None
|
||||
@jit = true
|
||||
|
||||
def initialize(pattern : String, multiline = false, dotall = false, ignorecase = false, anchored = false)
|
||||
flags = LibPCRE2::UTF | LibPCRE2::DUPNAMES |
|
||||
LibPCRE2::UCP
|
||||
flags |= LibPCRE2::MULTILINE if multiline
|
||||
flags |= LibPCRE2::DOTALL if dotall
|
||||
flags |= LibPCRE2::CASELESS if ignorecase
|
||||
flags |= LibPCRE2::ANCHORED if anchored
|
||||
@re = Regex::PCRE2.compile(pattern, flags) do |error_message|
|
||||
raise Exception.new(error_message)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
@ -63,6 +63,8 @@ module Tartrazine
|
||||
tokens = [] of Token
|
||||
pos = 0
|
||||
matched = false
|
||||
time = 0
|
||||
count = 0
|
||||
|
||||
# Respect the `ensure_nl` config option
|
||||
if text.size > 0 && text[-1] != '\n' && config[:ensure_nl] && !usingself
|
||||
@ -184,25 +186,6 @@ module Tartrazine
|
||||
def self.lexer(name : String) : Lexer
|
||||
Lexer.from_xml(File.read("lexers/#{name}.xml"))
|
||||
end
|
||||
|
||||
# This is a hack to workaround that Crystal seems to disallow
|
||||
# having regexes multiline but not dot_all
|
||||
class Re2 < Regex
|
||||
@source = "fa"
|
||||
@options = Regex::Options::None
|
||||
@jit = true
|
||||
|
||||
def initialize(pattern : String, multiline = false, dotall = false, ignorecase = false)
|
||||
flags = LibPCRE2::UTF | LibPCRE2::DUPNAMES |
|
||||
LibPCRE2::UCP | LibPCRE2::ANCHORED
|
||||
flags |= LibPCRE2::MULTILINE if multiline
|
||||
flags |= LibPCRE2::DOTALL if dotall
|
||||
flags |= LibPCRE2::CASELESS if ignorecase
|
||||
@re = Regex::PCRE2.compile(pattern, flags) do |error_message|
|
||||
raise Exception.new(error_message)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# Convenience macros to parse XML
|
||||
|
Loading…
Reference in New Issue
Block a user