mirror of
https://github.com/ralsina/tartrazine.git
synced 2024-09-19 23:11:22 +00:00
refactor
This commit is contained in:
parent
0c86e91b0b
commit
e0f697f1f9
32
src/rules.cr
32
src/rules.cr
@ -5,18 +5,19 @@ require "./actions"
|
|||||||
# state of the lexer.
|
# state of the lexer.
|
||||||
module Tartrazine
|
module Tartrazine
|
||||||
# This rule matches via a regex pattern
|
# This rule matches via a regex pattern
|
||||||
|
|
||||||
class Rule
|
class Rule
|
||||||
property pattern : Regex = Re2.new ""
|
property pattern : Regex = Re2.new ""
|
||||||
property actions : Array(Action) = [] of Action
|
property actions : Array(Action) = [] of Action
|
||||||
property xml : String = "foo"
|
property xml : String = "foo"
|
||||||
|
|
||||||
def match(text, pos, lexer) : Tuple(Bool, Int32, Array(Token))
|
def match(text, pos, lexer) : Tuple(Bool, Int32, Array(Token))
|
||||||
tokens = [] of Token
|
|
||||||
match = pattern.match(text, pos)
|
match = pattern.match(text, pos)
|
||||||
# We don't match if the match doesn't move the cursor
|
# We don't match if the match doesn't move the cursor
|
||||||
# because that causes infinite loops
|
# because that causes infinite loops
|
||||||
Log.trace { "#{match}, #{pattern.inspect}, #{text}, #{pos}" }
|
|
||||||
return false, pos, [] of Token if match.nil? || match.end == 0
|
return false, pos, [] of Token if match.nil? || match.end == 0
|
||||||
|
# Log.trace { "#{match}, #{pattern.inspect}, #{text}, #{pos}" }
|
||||||
|
tokens = [] of Token
|
||||||
# Emit the tokens
|
# Emit the tokens
|
||||||
actions.each do |action|
|
actions.each do |action|
|
||||||
# Emit the token
|
# Emit the token
|
||||||
@ -28,7 +29,12 @@ module Tartrazine
|
|||||||
|
|
||||||
def initialize(node : XML::Node, multiline, dotall, ignorecase)
|
def initialize(node : XML::Node, multiline, dotall, ignorecase)
|
||||||
@xml = node.to_s
|
@xml = node.to_s
|
||||||
@pattern = Re2.new(node["pattern"], multiline, dotall, ignorecase)
|
@pattern = Re2.new(
|
||||||
|
node["pattern"],
|
||||||
|
multiline,
|
||||||
|
dotall,
|
||||||
|
ignorecase,
|
||||||
|
anchored: true)
|
||||||
add_actions(node)
|
add_actions(node)
|
||||||
end
|
end
|
||||||
|
|
||||||
@ -80,4 +86,24 @@ module Tartrazine
|
|||||||
add_actions(node)
|
add_actions(node)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# This is a hack to workaround that Crystal seems to disallow
|
||||||
|
# having regexes multiline but not dot_all
|
||||||
|
class Re2 < Regex
|
||||||
|
@source = "fa"
|
||||||
|
@options = Regex::Options::None
|
||||||
|
@jit = true
|
||||||
|
|
||||||
|
def initialize(pattern : String, multiline = false, dotall = false, ignorecase = false, anchored = false)
|
||||||
|
flags = LibPCRE2::UTF | LibPCRE2::DUPNAMES |
|
||||||
|
LibPCRE2::UCP
|
||||||
|
flags |= LibPCRE2::MULTILINE if multiline
|
||||||
|
flags |= LibPCRE2::DOTALL if dotall
|
||||||
|
flags |= LibPCRE2::CASELESS if ignorecase
|
||||||
|
flags |= LibPCRE2::ANCHORED if anchored
|
||||||
|
@re = Regex::PCRE2.compile(pattern, flags) do |error_message|
|
||||||
|
raise Exception.new(error_message)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
@ -63,6 +63,8 @@ module Tartrazine
|
|||||||
tokens = [] of Token
|
tokens = [] of Token
|
||||||
pos = 0
|
pos = 0
|
||||||
matched = false
|
matched = false
|
||||||
|
time = 0
|
||||||
|
count = 0
|
||||||
|
|
||||||
# Respect the `ensure_nl` config option
|
# Respect the `ensure_nl` config option
|
||||||
if text.size > 0 && text[-1] != '\n' && config[:ensure_nl] && !usingself
|
if text.size > 0 && text[-1] != '\n' && config[:ensure_nl] && !usingself
|
||||||
@ -184,25 +186,6 @@ module Tartrazine
|
|||||||
def self.lexer(name : String) : Lexer
|
def self.lexer(name : String) : Lexer
|
||||||
Lexer.from_xml(File.read("lexers/#{name}.xml"))
|
Lexer.from_xml(File.read("lexers/#{name}.xml"))
|
||||||
end
|
end
|
||||||
|
|
||||||
# This is a hack to workaround that Crystal seems to disallow
|
|
||||||
# having regexes multiline but not dot_all
|
|
||||||
class Re2 < Regex
|
|
||||||
@source = "fa"
|
|
||||||
@options = Regex::Options::None
|
|
||||||
@jit = true
|
|
||||||
|
|
||||||
def initialize(pattern : String, multiline = false, dotall = false, ignorecase = false)
|
|
||||||
flags = LibPCRE2::UTF | LibPCRE2::DUPNAMES |
|
|
||||||
LibPCRE2::UCP | LibPCRE2::ANCHORED
|
|
||||||
flags |= LibPCRE2::MULTILINE if multiline
|
|
||||||
flags |= LibPCRE2::DOTALL if dotall
|
|
||||||
flags |= LibPCRE2::CASELESS if ignorecase
|
|
||||||
@re = Regex::PCRE2.compile(pattern, flags) do |error_message|
|
|
||||||
raise Exception.new(error_message)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
|
||||||
# Convenience macros to parse XML
|
# Convenience macros to parse XML
|
||||||
|
Loading…
Reference in New Issue
Block a user