mirror of
https://github.com/ralsina/tartrazine.git
synced 2025-06-08 04:30:26 -03:00
Compare commits
12 Commits
ad664d9f93
...
cb09dff9f1
Author | SHA1 | Date | |
---|---|---|---|
cb09dff9f1 | |||
b589726352 | |||
a3a7b5bd9a | |||
58e8dac038 | |||
f72a40f095 | |||
bf257a5b82 | |||
029495590c | |||
115debdec6 | |||
4612db58fe | |||
f45a86c83a | |||
27008640a6 | |||
7db8fdc9e4 |
1
.gitignore
vendored
1
.gitignore
vendored
@ -7,3 +7,4 @@ chroma/
|
|||||||
pygments/
|
pygments/
|
||||||
shard.lock
|
shard.lock
|
||||||
.vscode/
|
.vscode/
|
||||||
|
.crystal/
|
||||||
|
16
README.md
16
README.md
@ -4,17 +4,17 @@ Tartrazine is a library to syntax-highlight code. It is
|
|||||||
a port of [Pygments](https://pygments.org/) to
|
a port of [Pygments](https://pygments.org/) to
|
||||||
[Crystal](https://crystal-lang.org/). Kind of.
|
[Crystal](https://crystal-lang.org/). Kind of.
|
||||||
|
|
||||||
It's not currently usable because it's not finished, but:
|
The CLI tool can be used to highlight many things in many styles.
|
||||||
|
|
||||||
* The lexers work for the implemented languages
|
|
||||||
* The provided styles work
|
|
||||||
* There is a very very simple HTML formatter
|
|
||||||
|
|
||||||
# A port of what? Why "kind of"?
|
# A port of what? Why "kind of"?
|
||||||
|
|
||||||
Because I did not read the Pygments code. And this is actually
|
Pygments is a staple of the Python ecosystem, and it's great.
|
||||||
based on [Chroma](https://github.com/alecthomas/chroma) ...
|
It lets you highlight code in many languages, and it has many
|
||||||
although I did not read that code either.
|
themes. Chroma is "Pygments for Go", it's actually a port of
|
||||||
|
Pygments to Go, and it's great too.
|
||||||
|
|
||||||
|
I wanted that in Crystal, so I started this project. But I did
|
||||||
|
not read much of the Pygments code. Or much of Chroma's.
|
||||||
|
|
||||||
Chroma has taken most of the Pygments lexers and turned them into
|
Chroma has taken most of the Pygments lexers and turned them into
|
||||||
XML descriptions. What I did was take those XML files from Chroma
|
XML descriptions. What I did was take those XML files from Chroma
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
name: tartrazine
|
name: tartrazine
|
||||||
version: 0.3.0
|
version: 0.4.0
|
||||||
|
|
||||||
authors:
|
authors:
|
||||||
- Roberto Alsina <roberto.alsina@gmail.com>
|
- Roberto Alsina <roberto.alsina@gmail.com>
|
||||||
|
@ -14,6 +14,7 @@ unicode_problems = {
|
|||||||
"#{__DIR__}/tests/java/test_string_literals.txt",
|
"#{__DIR__}/tests/java/test_string_literals.txt",
|
||||||
"#{__DIR__}/tests/json/test_strings.txt",
|
"#{__DIR__}/tests/json/test_strings.txt",
|
||||||
"#{__DIR__}/tests/systemd/example1.txt",
|
"#{__DIR__}/tests/systemd/example1.txt",
|
||||||
|
"#{__DIR__}/tests/c++/test_unicode_identifiers.txt",
|
||||||
}
|
}
|
||||||
|
|
||||||
# These testcases fail because of differences in the way chroma and tartrazine tokenize
|
# These testcases fail because of differences in the way chroma and tartrazine tokenize
|
||||||
|
@ -8,12 +8,20 @@ require "./tartrazine"
|
|||||||
# perform a list of actions. These actions can emit tokens
|
# perform a list of actions. These actions can emit tokens
|
||||||
# or change the state machine.
|
# or change the state machine.
|
||||||
module Tartrazine
|
module Tartrazine
|
||||||
class Action
|
struct Action
|
||||||
property type : String
|
|
||||||
property xml : XML::Node
|
|
||||||
property actions : Array(Action) = [] of Action
|
property actions : Array(Action) = [] of Action
|
||||||
|
property type : String
|
||||||
|
|
||||||
|
@depth : Int32 = 0
|
||||||
|
@lexer_name : String = ""
|
||||||
|
@states : Array(String) = [] of String
|
||||||
|
@states_to_push : Array(String) = [] of String
|
||||||
|
@token_type : String = ""
|
||||||
|
|
||||||
|
def initialize(@type : String, xml : XML::Node?)
|
||||||
|
known_types = %w(token push pop combined bygroups include using usingself)
|
||||||
|
raise Exception.new("Unknown action type: #{type}") unless known_types.includes? type
|
||||||
|
|
||||||
def initialize(@type : String, @xml : XML::Node?)
|
|
||||||
# Some actions may have actions in them, like this:
|
# Some actions may have actions in them, like this:
|
||||||
# <bygroups>
|
# <bygroups>
|
||||||
# <token type="GenericPrompt"/>
|
# <token type="GenericPrompt"/>
|
||||||
@ -23,10 +31,28 @@ module Tartrazine
|
|||||||
#
|
#
|
||||||
# The token actions match with the first 2 groups in the regex
|
# The token actions match with the first 2 groups in the regex
|
||||||
# the using action matches the 3rd and shunts it to another lexer
|
# the using action matches the 3rd and shunts it to another lexer
|
||||||
@xml.children.each do |node|
|
xml.children.each do |node|
|
||||||
next unless node.element?
|
next unless node.element?
|
||||||
@actions << Action.new(node.name, node)
|
@actions << Action.new(node.name, node)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# Prefetch the attributes we ned from the XML and keep them
|
||||||
|
case type
|
||||||
|
when "token"
|
||||||
|
@token_type = xml["type"]
|
||||||
|
when "push"
|
||||||
|
@states_to_push = xml.attributes.select { |attrib|
|
||||||
|
attrib.name == "state"
|
||||||
|
}.map &.content
|
||||||
|
when "pop"
|
||||||
|
@depth = xml["depth"].to_i
|
||||||
|
when "using"
|
||||||
|
@lexer_name = xml["lexer"].downcase
|
||||||
|
when "combined"
|
||||||
|
@states = xml.attributes.select { |attrib|
|
||||||
|
attrib.name == "state"
|
||||||
|
}.map &.content
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
# ameba:disable Metrics/CyclomaticComplexity
|
# ameba:disable Metrics/CyclomaticComplexity
|
||||||
@ -34,35 +60,22 @@ module Tartrazine
|
|||||||
case type
|
case type
|
||||||
when "token"
|
when "token"
|
||||||
raise Exception.new "Can't have a token without a match" if match.empty?
|
raise Exception.new "Can't have a token without a match" if match.empty?
|
||||||
[Token.new(type: xml["type"], value: String.new(match[match_group].value))]
|
[Token.new(type: @token_type, value: String.new(match[match_group].value))]
|
||||||
when "push"
|
when "push"
|
||||||
states_to_push = xml.attributes.select { |attrib|
|
to_push = @states_to_push.empty? ? [lexer.state_stack.last] : @states_to_push
|
||||||
attrib.name == "state"
|
to_push.each do |state|
|
||||||
}.map &.content
|
if state == "#pop" && lexer.state_stack.size > 1
|
||||||
if states_to_push.empty?
|
|
||||||
# Push without a state means push the current state
|
|
||||||
states_to_push = [lexer.state_stack.last]
|
|
||||||
end
|
|
||||||
states_to_push.each do |state|
|
|
||||||
if state == "#pop"
|
|
||||||
# Pop the state
|
# Pop the state
|
||||||
Log.trace { "Popping state" }
|
|
||||||
lexer.state_stack.pop
|
lexer.state_stack.pop
|
||||||
else
|
else
|
||||||
# Really push
|
# Really push
|
||||||
lexer.state_stack << state
|
lexer.state_stack << state
|
||||||
Log.trace { "Pushed #{lexer.state_stack}" }
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
[] of Token
|
[] of Token
|
||||||
when "pop"
|
when "pop"
|
||||||
depth = xml["depth"].to_i
|
to_pop = [@depth, lexer.state_stack.size - 1].min
|
||||||
Log.trace { "Popping #{depth} states" }
|
lexer.state_stack.pop(to_pop)
|
||||||
if lexer.state_stack.size <= depth
|
|
||||||
Log.trace { "Can't pop #{depth} states, only have #{lexer.state_stack.size}" }
|
|
||||||
else
|
|
||||||
lexer.state_stack.pop(depth)
|
|
||||||
end
|
|
||||||
[] of Token
|
[] of Token
|
||||||
when "bygroups"
|
when "bygroups"
|
||||||
# FIXME: handle
|
# FIXME: handle
|
||||||
@ -92,22 +105,15 @@ module Tartrazine
|
|||||||
when "using"
|
when "using"
|
||||||
# Shunt to another lexer entirely
|
# Shunt to another lexer entirely
|
||||||
return [] of Token if match.empty?
|
return [] of Token if match.empty?
|
||||||
lexer_name = xml["lexer"].downcase
|
Tartrazine.lexer(@lexer_name).tokenize(String.new(match[match_group].value), usingself: true)
|
||||||
Log.trace { "to tokenize: #{match[match_group]}" }
|
|
||||||
Tartrazine.lexer(lexer_name).tokenize(String.new(match[match_group].value), usingself: true)
|
|
||||||
when "usingself"
|
when "usingself"
|
||||||
# Shunt to another copy of this lexer
|
# Shunt to another copy of this lexer
|
||||||
return [] of Token if match.empty?
|
return [] of Token if match.empty?
|
||||||
|
new_lexer = lexer.copy
|
||||||
new_lexer = Lexer.from_xml(lexer.xml)
|
|
||||||
Log.trace { "to tokenize: #{match[match_group]}" }
|
|
||||||
new_lexer.tokenize(String.new(match[match_group].value), usingself: true)
|
new_lexer.tokenize(String.new(match[match_group].value), usingself: true)
|
||||||
when "combined"
|
when "combined"
|
||||||
# Combine two states into one anonymous state
|
# Combine two states into one anonymous state
|
||||||
states = xml.attributes.select { |attrib|
|
new_state = @states.map { |name|
|
||||||
attrib.name == "state"
|
|
||||||
}.map &.content
|
|
||||||
new_state = states.map { |name|
|
|
||||||
lexer.states[name]
|
lexer.states[name]
|
||||||
}.reduce { |state1, state2|
|
}.reduce { |state1, state2|
|
||||||
state1 + state2
|
state1 + state2
|
||||||
@ -116,7 +122,7 @@ module Tartrazine
|
|||||||
lexer.state_stack << new_state.name
|
lexer.state_stack << new_state.name
|
||||||
[] of Token
|
[] of Token
|
||||||
else
|
else
|
||||||
raise Exception.new("Unknown action type: #{type}: #{xml}")
|
raise Exception.new("Unknown action type: #{type}")
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -3,7 +3,7 @@ module BytesRegex
|
|||||||
|
|
||||||
class Regex
|
class Regex
|
||||||
def initialize(pattern : String, multiline = false, dotall = false, ignorecase = false, anchored = false)
|
def initialize(pattern : String, multiline = false, dotall = false, ignorecase = false, anchored = false)
|
||||||
flags = LibPCRE2::UTF | LibPCRE2::DUPNAMES | LibPCRE2::UCP
|
flags = LibPCRE2::UTF | LibPCRE2::UCP | LibPCRE2::NO_UTF_CHECK
|
||||||
flags |= LibPCRE2::MULTILINE if multiline
|
flags |= LibPCRE2::MULTILINE if multiline
|
||||||
flags |= LibPCRE2::DOTALL if dotall
|
flags |= LibPCRE2::DOTALL if dotall
|
||||||
flags |= LibPCRE2::CASELESS if ignorecase
|
flags |= LibPCRE2::CASELESS if ignorecase
|
||||||
@ -22,27 +22,26 @@ module BytesRegex
|
|||||||
end
|
end
|
||||||
raise Exception.new "Error #{msg} compiling regex at offset #{erroroffset}"
|
raise Exception.new "Error #{msg} compiling regex at offset #{erroroffset}"
|
||||||
end
|
end
|
||||||
|
@match_data = LibPCRE2.match_data_create_from_pattern(@re, nil)
|
||||||
end
|
end
|
||||||
|
|
||||||
def finalize
|
def finalize
|
||||||
|
LibPCRE2.match_data_free(@match_data)
|
||||||
LibPCRE2.code_free(@re)
|
LibPCRE2.code_free(@re)
|
||||||
end
|
end
|
||||||
|
|
||||||
def match(str : Bytes, pos = 0) : Array(Match)
|
def match(str : Bytes, pos = 0) : Array(Match)
|
||||||
match_data = LibPCRE2.match_data_create_from_pattern(@re, nil)
|
|
||||||
match = [] of Match
|
match = [] of Match
|
||||||
rc = LibPCRE2.match(
|
rc = LibPCRE2.match(
|
||||||
@re,
|
@re,
|
||||||
str,
|
str,
|
||||||
str.size,
|
str.size,
|
||||||
pos,
|
pos,
|
||||||
0,
|
LibPCRE2::NO_UTF_CHECK,
|
||||||
match_data,
|
@match_data,
|
||||||
nil)
|
nil)
|
||||||
if rc < 0
|
if rc > 0
|
||||||
# No match, do nothing
|
ovector = LibPCRE2.get_ovector_pointer(@match_data)
|
||||||
else
|
|
||||||
ovector = LibPCRE2.get_ovector_pointer(match_data)
|
|
||||||
(0...rc).each do |i|
|
(0...rc).each do |i|
|
||||||
m_start = ovector[2 * i]
|
m_start = ovector[2 * i]
|
||||||
m_size = ovector[2 * i + 1] - m_start
|
m_size = ovector[2 * i + 1] - m_start
|
||||||
@ -54,7 +53,6 @@ module BytesRegex
|
|||||||
match << Match.new(m_value, m_start, m_size)
|
match << Match.new(m_value, m_start, m_size)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
LibPCRE2.match_data_free(match_data)
|
|
||||||
match
|
match
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
require "../constants/token_abbrevs.cr"
|
require "../constants/token_abbrevs.cr"
|
||||||
require "../formatter"
|
require "../formatter"
|
||||||
|
require "html"
|
||||||
|
|
||||||
module Tartrazine
|
module Tartrazine
|
||||||
class Html < Formatter
|
class Html < Formatter
|
||||||
@ -67,8 +68,7 @@ module Tartrazine
|
|||||||
line_id = linkable_line_numbers? ? "id=\"#{line_number_id_prefix}#{i + 1}\"" : ""
|
line_id = linkable_line_numbers? ? "id=\"#{line_number_id_prefix}#{i + 1}\"" : ""
|
||||||
outp << "<span #{line_id} #{line_class} style=\"user-select: none;\">#{line_label} </span>"
|
outp << "<span #{line_id} #{line_class} style=\"user-select: none;\">#{line_label} </span>"
|
||||||
line.each do |token|
|
line.each do |token|
|
||||||
fragment = "<span class=\"#{get_css_class(token[:type])}\">#{token[:value]}</span>"
|
outp << "<span class=\"#{get_css_class(token[:type])}\">#{HTML.escape(token[:value])}</span>"
|
||||||
outp << fragment
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
outp << "</code></pre>"
|
outp << "</code></pre>"
|
||||||
@ -104,15 +104,17 @@ module Tartrazine
|
|||||||
|
|
||||||
# Given a token type, return the CSS class to use.
|
# Given a token type, return the CSS class to use.
|
||||||
def get_css_class(token : String) : String
|
def get_css_class(token : String) : String
|
||||||
return class_prefix + Abbreviations[token] if theme.styles.has_key?(token)
|
if !theme.styles.has_key? token
|
||||||
|
# Themes don't contain information for each specific
|
||||||
# Themes don't contain information for each specific
|
# token type. However, they may contain information
|
||||||
# token type. However, they may contain information
|
# for a parent style. Worst case, we go to the root
|
||||||
# for a parent style. Worst case, we go to the root
|
# (Background) style.
|
||||||
# (Background) style.
|
parent = theme.style_parents(token).reverse.find { |dad|
|
||||||
class_prefix + Abbreviations[theme.style_parents(token).reverse.find { |parent|
|
theme.styles.has_key?(dad)
|
||||||
theme.styles.has_key?(parent)
|
}
|
||||||
}]
|
theme.styles[token] = theme.styles[parent]
|
||||||
|
end
|
||||||
|
class_prefix + Abbreviations[token]
|
||||||
end
|
end
|
||||||
|
|
||||||
# Is this line in the highlighted ranges?
|
# Is this line in the highlighted ranges?
|
||||||
|
19
src/lexer.cr
19
src/lexer.cr
@ -56,12 +56,18 @@ module Tartrazine
|
|||||||
not_multiline: false,
|
not_multiline: false,
|
||||||
ensure_nl: false,
|
ensure_nl: false,
|
||||||
}
|
}
|
||||||
property xml : String = ""
|
# property xml : String = ""
|
||||||
|
|
||||||
property states = {} of String => State
|
property states = {} of String => State
|
||||||
|
|
||||||
property state_stack = ["root"]
|
property state_stack = ["root"]
|
||||||
|
|
||||||
|
def copy : Lexer
|
||||||
|
new_lexer = Lexer.new
|
||||||
|
new_lexer.config = config
|
||||||
|
new_lexer.states = states
|
||||||
|
new_lexer.state_stack = state_stack[0..-1]
|
||||||
|
new_lexer
|
||||||
|
end
|
||||||
|
|
||||||
# Turn the text into a list of tokens. The `usingself` parameter
|
# Turn the text into a list of tokens. The `usingself` parameter
|
||||||
# is true when the lexer is being used to tokenize a string
|
# is true when the lexer is being used to tokenize a string
|
||||||
# from a larger text that is already being tokenized.
|
# from a larger text that is already being tokenized.
|
||||||
@ -87,12 +93,10 @@ module Tartrazine
|
|||||||
if matched
|
if matched
|
||||||
# Move position forward, save the tokens,
|
# Move position forward, save the tokens,
|
||||||
# tokenize from the new position
|
# tokenize from the new position
|
||||||
# Log.trace { "MATCHED: #{rule.xml}" }
|
|
||||||
pos = new_pos
|
pos = new_pos
|
||||||
tokens += new_tokens
|
tokens += new_tokens
|
||||||
break
|
break
|
||||||
end
|
end
|
||||||
# Log.trace { "NOT MATCHED: #{rule.xml}" }
|
|
||||||
end
|
end
|
||||||
# If no rule matches, emit an error token
|
# If no rule matches, emit an error token
|
||||||
unless matched
|
unless matched
|
||||||
@ -158,7 +162,6 @@ module Tartrazine
|
|||||||
# ameba:disable Metrics/CyclomaticComplexity
|
# ameba:disable Metrics/CyclomaticComplexity
|
||||||
def self.from_xml(xml : String) : Lexer
|
def self.from_xml(xml : String) : Lexer
|
||||||
l = Lexer.new
|
l = Lexer.new
|
||||||
l.xml = xml
|
|
||||||
lexer = XML.parse(xml).first_element_child
|
lexer = XML.parse(xml).first_element_child
|
||||||
if lexer
|
if lexer
|
||||||
config = lexer.children.find { |node|
|
config = lexer.children.find { |node|
|
||||||
@ -222,9 +225,9 @@ module Tartrazine
|
|||||||
# A Lexer state. A state has a name and a list of rules.
|
# A Lexer state. A state has a name and a list of rules.
|
||||||
# The state machine has a state stack containing references
|
# The state machine has a state stack containing references
|
||||||
# to states to decide which rules to apply.
|
# to states to decide which rules to apply.
|
||||||
class State
|
struct State
|
||||||
property name : String = ""
|
property name : String = ""
|
||||||
property rules = [] of Rule
|
property rules = [] of BaseRule
|
||||||
|
|
||||||
def +(other : State)
|
def +(other : State)
|
||||||
new_state = State.new
|
new_state = State.new
|
||||||
|
@ -77,7 +77,7 @@ if options["-f"]
|
|||||||
|
|
||||||
if formatter.is_a?(Tartrazine::Html) && options["--css"]
|
if formatter.is_a?(Tartrazine::Html) && options["--css"]
|
||||||
File.open("#{options["-t"].as(String)}.css", "w") do |outf|
|
File.open("#{options["-t"].as(String)}.css", "w") do |outf|
|
||||||
outf.puts formatter.style_defs
|
outf << formatter.style_defs
|
||||||
end
|
end
|
||||||
exit 0
|
exit 0
|
||||||
end
|
end
|
||||||
@ -91,7 +91,7 @@ if options["-f"]
|
|||||||
puts output
|
puts output
|
||||||
else
|
else
|
||||||
File.open(options["-o"].as(String), "w") do |outf|
|
File.open(options["-o"].as(String), "w") do |outf|
|
||||||
outf.puts output
|
outf << output
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
76
src/rules.cr
76
src/rules.cr
@ -15,41 +15,11 @@ module Tartrazine
|
|||||||
alias Match = BytesRegex::Match
|
alias Match = BytesRegex::Match
|
||||||
alias MatchData = Array(Match)
|
alias MatchData = Array(Match)
|
||||||
|
|
||||||
class Rule
|
abstract struct BaseRule
|
||||||
property pattern : Regex = Regex.new ""
|
abstract def match(text : Bytes, pos, lexer) : Tuple(Bool, Int32, Array(Token))
|
||||||
|
abstract def initialize(node : XML::Node)
|
||||||
|
|
||||||
property actions : Array(Action) = [] of Action
|
property actions : Array(Action) = [] of Action
|
||||||
property xml : String = "foo"
|
|
||||||
|
|
||||||
def match(text : Bytes, pos, lexer) : Tuple(Bool, Int32, Array(Token))
|
|
||||||
match = pattern.match(text, pos)
|
|
||||||
# We don't match if the match doesn't move the cursor
|
|
||||||
# because that causes infinite loops
|
|
||||||
return false, pos, [] of Token if match.empty? || match[0].size == 0
|
|
||||||
# p! match, String.new(text[pos..pos+20])
|
|
||||||
# Log.trace { "#{match}, #{pattern.inspect}, #{text}, #{pos}" }
|
|
||||||
tokens = [] of Token
|
|
||||||
# Emit the tokens
|
|
||||||
actions.each do |action|
|
|
||||||
# Emit the token
|
|
||||||
tokens += action.emit(match, lexer)
|
|
||||||
end
|
|
||||||
Log.trace { "#{xml}, #{pos + match[0].size}, #{tokens}" }
|
|
||||||
return true, pos + match[0].size, tokens
|
|
||||||
end
|
|
||||||
|
|
||||||
def initialize(node : XML::Node, multiline, dotall, ignorecase)
|
|
||||||
@xml = node.to_s
|
|
||||||
pattern = node["pattern"]
|
|
||||||
# flags = Regex::Options::ANCHORED
|
|
||||||
# MULTILINE implies DOTALL which we don't want, so we
|
|
||||||
# use in-pattern flag (?m) instead
|
|
||||||
# flags |= Regex::Options::MULTILINE if multiline
|
|
||||||
pattern = "(?m)" + pattern if multiline
|
|
||||||
# flags |= Regex::Options::DOTALL if dotall
|
|
||||||
# flags |= Regex::Options::IGNORE_CASE if ignorecase
|
|
||||||
@pattern = Regex.new(pattern, multiline, dotall, ignorecase, true)
|
|
||||||
add_actions(node)
|
|
||||||
end
|
|
||||||
|
|
||||||
def add_actions(node : XML::Node)
|
def add_actions(node : XML::Node)
|
||||||
node.children.each do |child|
|
node.children.each do |child|
|
||||||
@ -59,23 +29,44 @@ module Tartrazine
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
struct Rule < BaseRule
|
||||||
|
property pattern : Regex = Regex.new ""
|
||||||
|
property actions : Array(Action) = [] of Action
|
||||||
|
|
||||||
|
def match(text : Bytes, pos, lexer) : Tuple(Bool, Int32, Array(Token))
|
||||||
|
match = pattern.match(text, pos)
|
||||||
|
|
||||||
|
# No match
|
||||||
|
return false, pos, [] of Token if match.size == 0
|
||||||
|
return true, pos + match[0].size, actions.flat_map { |action| action.emit(match, lexer) }
|
||||||
|
end
|
||||||
|
|
||||||
|
def initialize(node : XML::Node)
|
||||||
|
end
|
||||||
|
|
||||||
|
def initialize(node : XML::Node, multiline, dotall, ignorecase)
|
||||||
|
pattern = node["pattern"]
|
||||||
|
pattern = "(?m)" + pattern if multiline
|
||||||
|
@pattern = Regex.new(pattern, multiline, dotall, ignorecase, true)
|
||||||
|
add_actions(node)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
# This rule includes another state. If any of the rules of the
|
# This rule includes another state. If any of the rules of the
|
||||||
# included state matches, this rule matches.
|
# included state matches, this rule matches.
|
||||||
class IncludeStateRule < Rule
|
struct IncludeStateRule < BaseRule
|
||||||
property state : String = ""
|
property state : String = ""
|
||||||
|
|
||||||
def match(text, pos, lexer) : Tuple(Bool, Int32, Array(Token))
|
def match(text, pos, lexer) : Tuple(Bool, Int32, Array(Token))
|
||||||
Log.trace { "Including state #{state} from #{lexer.state_stack.last}" }
|
Log.trace { "Including state #{state} from #{lexer.state_stack.last}" }
|
||||||
lexer.states[state].rules.each do |rule|
|
lexer.states[state].rules.each do |rule|
|
||||||
matched, new_pos, new_tokens = rule.match(text, pos, lexer)
|
matched, new_pos, new_tokens = rule.match(text, pos, lexer)
|
||||||
Log.trace { "#{xml}, #{new_pos}, #{new_tokens}" } if matched
|
|
||||||
return true, new_pos, new_tokens if matched
|
return true, new_pos, new_tokens if matched
|
||||||
end
|
end
|
||||||
return false, pos, [] of Token
|
return false, pos, [] of Token
|
||||||
end
|
end
|
||||||
|
|
||||||
def initialize(node : XML::Node)
|
def initialize(node : XML::Node)
|
||||||
@xml = node.to_s
|
|
||||||
include_node = node.children.find { |child|
|
include_node = node.children.find { |child|
|
||||||
child.name == "include"
|
child.name == "include"
|
||||||
}
|
}
|
||||||
@ -85,17 +76,14 @@ module Tartrazine
|
|||||||
end
|
end
|
||||||
|
|
||||||
# This rule always matches, unconditionally
|
# This rule always matches, unconditionally
|
||||||
class UnconditionalRule < Rule
|
struct UnconditionalRule < BaseRule
|
||||||
|
NO_MATCH = [] of Match
|
||||||
|
|
||||||
def match(text, pos, lexer) : Tuple(Bool, Int32, Array(Token))
|
def match(text, pos, lexer) : Tuple(Bool, Int32, Array(Token))
|
||||||
tokens = [] of Token
|
return true, pos, actions.flat_map { |action| action.emit(NO_MATCH, lexer) }
|
||||||
actions.each do |action|
|
|
||||||
tokens += action.emit([] of Match, lexer)
|
|
||||||
end
|
|
||||||
return true, pos, tokens
|
|
||||||
end
|
end
|
||||||
|
|
||||||
def initialize(node : XML::Node)
|
def initialize(node : XML::Node)
|
||||||
@xml = node.to_s
|
|
||||||
add_actions(node)
|
add_actions(node)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
Loading…
x
Reference in New Issue
Block a user