A thing
This commit is contained in:
commit
ce63fe31e9
9
.editorconfig
Normal file
9
.editorconfig
Normal file
@ -0,0 +1,9 @@
|
||||
root = true
|
||||
|
||||
[*.cr]
|
||||
charset = utf-8
|
||||
end_of_line = lf
|
||||
insert_final_newline = true
|
||||
indent_style = space
|
||||
indent_size = 2
|
||||
trim_trailing_whitespace = true
|
9
.gitignore
vendored
Normal file
9
.gitignore
vendored
Normal file
@ -0,0 +1,9 @@
|
||||
/docs/
|
||||
/lib/
|
||||
/bin/
|
||||
/.shards/
|
||||
*.dwarf
|
||||
|
||||
# Libraries don't need dependency lock
|
||||
# Dependencies will be locked in applications that use them
|
||||
/shard.lock
|
21
LICENSE
Normal file
21
LICENSE
Normal file
@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2024 Roberto Alsina <roberto.alsina@gmail.com>
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
39
README.md
Normal file
39
README.md
Normal file
@ -0,0 +1,39 @@
|
||||
# cre2
|
||||
|
||||
TODO: Write a description here
|
||||
|
||||
## Installation
|
||||
|
||||
1. Add the dependency to your `shard.yml`:
|
||||
|
||||
```yaml
|
||||
dependencies:
|
||||
cre2:
|
||||
github: your-github-user/cre2
|
||||
```
|
||||
|
||||
2. Run `shards install`
|
||||
|
||||
## Usage
|
||||
|
||||
```crystal
|
||||
require "cre2"
|
||||
```
|
||||
|
||||
TODO: Write usage instructions here
|
||||
|
||||
## Development
|
||||
|
||||
TODO: Write development instructions here
|
||||
|
||||
## Contributing
|
||||
|
||||
1. Fork it (<https://github.com/your-github-user/cre2/fork>)
|
||||
2. Create your feature branch (`git checkout -b my-new-feature`)
|
||||
3. Commit your changes (`git commit -am 'Add some feature'`)
|
||||
4. Push to the branch (`git push origin my-new-feature`)
|
||||
5. Create a new Pull Request
|
||||
|
||||
## Contributors
|
||||
|
||||
- [Roberto Alsina](https://github.com/your-github-user) - creator and maintainer
|
9
shard.yml
Normal file
9
shard.yml
Normal file
@ -0,0 +1,9 @@
|
||||
name: cre2
|
||||
version: 0.1.0
|
||||
|
||||
authors:
|
||||
- Roberto Alsina <roberto.alsina@gmail.com>
|
||||
|
||||
crystal: '>= 1.13.0'
|
||||
|
||||
license: MIT
|
9
spec/cre2_spec.cr
Normal file
9
spec/cre2_spec.cr
Normal file
@ -0,0 +1,9 @@
|
||||
require "./spec_helper"
|
||||
|
||||
describe Cre2 do
|
||||
# TODO: Write tests
|
||||
|
||||
it "works" do
|
||||
false.should eq(true)
|
||||
end
|
||||
end
|
2
spec/spec_helper.cr
Normal file
2
spec/spec_helper.cr
Normal file
@ -0,0 +1,2 @@
|
||||
require "spec"
|
||||
require "../src/cre2"
|
5
src/Makefile
Normal file
5
src/Makefile
Normal file
@ -0,0 +1,5 @@
|
||||
all: cre2.o
|
||||
clean:
|
||||
rm -f cre2.o
|
||||
cre2.o: cre2.cpp cre2.h
|
||||
g++ -O3 -c -o cre2.o cre2.cpp
|
122
src/cre2.cpp
Normal file
122
src/cre2.cpp
Normal file
@ -0,0 +1,122 @@
|
||||
#include <re2/re2.h>
|
||||
#include "cre2.h"
|
||||
|
||||
#define TO_OPT(opt) (reinterpret_cast<RE2::Options *>(opt))
|
||||
|
||||
cre2_options *cre2_opt_new(void) {
|
||||
return reinterpret_cast<void*>(new RE2::Options());
|
||||
}
|
||||
|
||||
void cre2_opt_delete(cre2_options *opt) {
|
||||
delete TO_OPT(opt);
|
||||
}
|
||||
|
||||
|
||||
#define OPT_bool(name) \
|
||||
void cre2_opt_##name(cre2_options *opt, int flag) { \
|
||||
TO_OPT(opt)->set_##name(bool(flag)); \
|
||||
}
|
||||
|
||||
OPT_bool(posix_syntax)
|
||||
OPT_bool(longest_match)
|
||||
OPT_bool(log_errors)
|
||||
OPT_bool(literal)
|
||||
OPT_bool(never_nl)
|
||||
OPT_bool(dot_nl)
|
||||
OPT_bool(case_sensitive)
|
||||
OPT_bool(perl_classes)
|
||||
OPT_bool(word_boundary)
|
||||
OPT_bool(one_line)
|
||||
|
||||
#undef OPT_BOOL
|
||||
|
||||
|
||||
void cre2_opt_encoding(cre2_options *opt, encoding_t enc) {
|
||||
switch (enc) {
|
||||
case CRE2_UTF8:
|
||||
TO_OPT(opt)->set_encoding(RE2::Options::EncodingUTF8);
|
||||
break;
|
||||
case CRE2_Latin1:
|
||||
TO_OPT(opt)->set_encoding(RE2::Options::EncodingLatin1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void cre2_opt_max_mem(cre2_options *opt, int m) {
|
||||
TO_OPT(opt)->set_max_mem(m);
|
||||
}
|
||||
|
||||
|
||||
#define TO_RE2(re) (reinterpret_cast<RE2 *>(re))
|
||||
#define TO_CONST_RE2(re) (reinterpret_cast<const RE2 *>(re))
|
||||
|
||||
cre2 *cre2_new(const char *pattern, int patternlen, const cre2_options *opt) {
|
||||
re2::StringPiece pattern_re2(pattern, patternlen);
|
||||
return reinterpret_cast<void*>(
|
||||
new RE2(pattern_re2, *reinterpret_cast<const RE2::Options *>(opt)));
|
||||
}
|
||||
|
||||
void cre2_delete(cre2 *re) {
|
||||
delete TO_RE2(re);
|
||||
}
|
||||
|
||||
|
||||
int cre2_error_code(const cre2 *re) {
|
||||
return int(TO_CONST_RE2(re)->error_code());
|
||||
}
|
||||
|
||||
const char *cre2_error_string(const cre2 *re) {
|
||||
return TO_CONST_RE2(re)->error().c_str();
|
||||
}
|
||||
|
||||
void cre2_error_arg(const cre2 *re, struct string_piece *arg) {
|
||||
const std::string &argstr = TO_CONST_RE2(re)->error_arg();
|
||||
arg->data = argstr.data();
|
||||
arg->length = argstr.length();
|
||||
}
|
||||
|
||||
int cre2_num_capturing_groups(const cre2 *re) {
|
||||
return TO_CONST_RE2(re)->NumberOfCapturingGroups();
|
||||
}
|
||||
|
||||
int cre2_program_size(const cre2 *re) {
|
||||
return TO_CONST_RE2(re)->ProgramSize();
|
||||
}
|
||||
|
||||
|
||||
int cre2_match(
|
||||
const cre2 *re
|
||||
, const char *text
|
||||
, int textlen
|
||||
, int startpos
|
||||
, int endpos
|
||||
, anchor_t anchor
|
||||
, struct string_piece *match
|
||||
, int nmatch) {
|
||||
|
||||
re2::StringPiece text_re2(text, textlen);
|
||||
// FIXME: exceptions?
|
||||
re2::StringPiece *match_re2 = new re2::StringPiece[nmatch];
|
||||
|
||||
RE2::Anchor anchor_re2 = RE2::UNANCHORED;
|
||||
switch (anchor) {
|
||||
case CRE2_ANCHOR_START:
|
||||
anchor_re2 = RE2::ANCHOR_START; break;
|
||||
case CRE2_ANCHOR_BOTH:
|
||||
anchor_re2 = RE2::ANCHOR_BOTH; break;
|
||||
}
|
||||
|
||||
bool ret = TO_CONST_RE2(re)
|
||||
->Match(text_re2, startpos, endpos, anchor_re2, match_re2, nmatch);
|
||||
|
||||
if (ret) {
|
||||
for (int i=0; i<nmatch; i++) {
|
||||
match[i].data = match_re2[i].data();
|
||||
match[i].length = match_re2[i].length();
|
||||
}
|
||||
}
|
||||
|
||||
delete [] match_re2;
|
||||
|
||||
return int(ret);
|
||||
}
|
165
src/cre2.cr
Normal file
165
src/cre2.cr
Normal file
@ -0,0 +1,165 @@
|
||||
@[Link(ldflags: "#{__DIR__}/cre2.o -Wl,--copy-dt-needed-entries `pkg-config --libs re2`")]
|
||||
lib LibCre2
|
||||
type Options = Void*
|
||||
|
||||
fun opt_new = cre2_opt_new : Options
|
||||
fun opt_delete = cre2_opt_delete(op : Options) : Nil
|
||||
|
||||
fun opt_posix_syntax = cre2_opt_posix_syntax(op : Options, flag : Bool) : Nil
|
||||
fun opt_longest_match = cre2_opt_longest_match(op : Options, flag : Bool) : Nil
|
||||
fun opt_log_errors = cre2_opt_log_errors(op : Options, flag : Bool) : Nil
|
||||
fun opt_literal = cre2_opt_literal(op : Options, flag : Bool) : Nil
|
||||
fun opt_never_nl = cre2_opt_never_nl(op : Options, flag : Bool) : Nil
|
||||
fun opt_case_sensitive = cre2_opt_case_sensitive(op : Options, flag : Bool) : Nil
|
||||
fun opt_perl_classes = cre2_opt_perl_classes(op : Options, flag : Bool) : Nil
|
||||
fun opt_word_boundary = cre2_opt_word_boundary(op : Options, flag : Bool) : Nil
|
||||
fun opt_one_line = cre2_opt_one_line(op : Options, flag : Bool) : Nil
|
||||
fun opt_dot_nl = cre2_opt_dot_nl(op : Options, flag : Bool) : Nil
|
||||
fun opt_encoding = cre2_opt_encoding(op : Options, encoding : Int32) : Nil
|
||||
fun opt_max_mem = cre2_opt_max_mem(op : Options, flag : Bool) : Nil
|
||||
|
||||
struct StringPiece
|
||||
data : LibC::Char*
|
||||
length : Int32
|
||||
end
|
||||
|
||||
type CRe2 = Void*
|
||||
|
||||
fun new = cre2_new(pattern : LibC::Char*, patternlen : UInt32, opt : Options) : CRe2
|
||||
fun del = cre2_delete(re : CRe2) : Nil
|
||||
fun error_code = cre2_error_core(re : CRe2) : Int32
|
||||
fun num_capturing_groups = cre2_num_capturing_groups(re : CRe2) : Int32
|
||||
fun program_size(re : CRe2) : Int32
|
||||
|
||||
# Invalidated by further re use
|
||||
fun error_string = cre2_error_string(re : CRe2) : LibC::Char*
|
||||
fun error_arg = cre2_error_arg(re : CRe2, arg : StringPiece*) : Nil
|
||||
|
||||
CRE2_UNANCHORED = 1
|
||||
CRE2_ANCHOR_START = 2
|
||||
CRE2_ANCHOR_BOTH = 3
|
||||
|
||||
fun match = cre2_match(
|
||||
re : CRe2,
|
||||
text : LibC::Char*,
|
||||
textlen : UInt32,
|
||||
startpos : UInt32,
|
||||
endpos : UInt32,
|
||||
anchor : Int32,
|
||||
match : StringPiece*,
|
||||
nmatch : Int32
|
||||
) : Int32
|
||||
end
|
||||
|
||||
module CRe2
|
||||
struct MatchDataLike
|
||||
@str : String
|
||||
@matches : Pointer(LibCre2::StringPiece)
|
||||
@size : Int32
|
||||
|
||||
def initialize(@str, @matches, @size)
|
||||
end
|
||||
|
||||
def [](i : Int32) : String
|
||||
if i < @size && @matches[i].data != nil
|
||||
String.new(Slice.new(@matches[i].data, @matches[i].length))
|
||||
else
|
||||
""
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
class Regex < ::Regex
|
||||
@jit = false
|
||||
@re2 : LibCre2::CRe2
|
||||
@failed = false
|
||||
@anchored = LibCre2::CRE2_UNANCHORED
|
||||
|
||||
def initialize(@pattern : String, @options : Options = Options::DEFAULT)
|
||||
@source = @pattern
|
||||
@re = Regex::PCRE2.compile(pattern, pcre2_options(@options)) do |error_message|
|
||||
raise Exception.new(error_message)
|
||||
end
|
||||
# No multiline flag, it's handled on the pattern
|
||||
@pattern = "(?m)#{pattern}" if @options & Regex::Options::MULTILINE != 0
|
||||
@re2 = LibCre2.new("x", 1, cre2_options(@options))
|
||||
begin
|
||||
@re2 = LibCre2.new(@pattern, @pattern.size, cre2_options(@options))
|
||||
rescue ex : Exception
|
||||
@failed = true
|
||||
end
|
||||
end
|
||||
|
||||
def match(str : String, pos : Int32 = 0, options : Regex::MatchOptions = :none) : MatchData | MatchDataLike | Nil
|
||||
msize = LibCre2.num_capturing_groups(@re2) + 1
|
||||
if !@failed
|
||||
matches = Pointer(LibCre2::StringPiece).malloc(msize)
|
||||
if LibCre2.match(@re2, str, str.size, pos, str.size,
|
||||
@anchored, matches, msize) != 0
|
||||
MatchDataLike.new(str, matches, msize)
|
||||
else
|
||||
nil
|
||||
end
|
||||
else
|
||||
super
|
||||
end
|
||||
end
|
||||
|
||||
private def pcre2_options(options : Regex::Options)
|
||||
flags = LibPCRE2::UTF | LibPCRE2::DUPNAMES |
|
||||
LibPCRE2::UCP
|
||||
flags |= LibPCRE2::MULTILINE if options & Regex::Options::MULTILINE != 0
|
||||
flags |= LibPCRE2::DOTALL if options & Regex::Options::DOTALL != 0
|
||||
flags |= LibPCRE2::CASELESS if options & Regex::Options::IGNORE_CASE != 0
|
||||
flags |= LibPCRE2::ANCHORED if options & Regex::Options::ANCHORED != 0
|
||||
flags |= LibPCRE2::NO_UTF_CHECK
|
||||
flags
|
||||
end
|
||||
|
||||
private def cre2_options(options : Regex::Options) : LibCre2::Options
|
||||
opts = LibCre2.opt_new
|
||||
# Not configurable
|
||||
LibCre2.opt_posix_syntax(opts, false)
|
||||
LibCre2.opt_longest_match(opts, false)
|
||||
LibCre2.opt_log_errors(opts, false)
|
||||
|
||||
LibCre2.opt_case_sensitive(opts, options & Regex::Options::IGNORE_CASE == 0)
|
||||
# Not considered when posix_syntax is false
|
||||
# LibCre2.opt_perl_classes(opts, options & Regex::Options::PERL_CLASSES != 0)
|
||||
# LibCre2.opt_word_boundary(opts, options & Regex::Options::WORD_BOUNDARY != 0)
|
||||
# LibCre2.opt_one_line(opts, options & Regex::Options::ONE_LINE != 0)
|
||||
LibCre2.opt_dot_nl(opts, options & Regex::Options::DOTALL != 0)
|
||||
@anchored = LibCre2::CRE2_ANCHOR_BOTH if options & Regex::Options::ANCHORED != 0
|
||||
LibCre2.opt_encoding(opts, 1)
|
||||
opts
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
re = CRe2::Regex.new(".*(x).*", Regex::Options::ANCHORED | Regex::Options::MULTILINE)
|
||||
m = re.match("axb")
|
||||
p! m.try &.[0]
|
||||
p! m.try &.[1]
|
||||
|
||||
re = Regex.new(".*(x).*", Regex::Options::ANCHORED | Regex::Options::MULTILINE)
|
||||
m = re.match("axb")
|
||||
p! m.try &.[0]
|
||||
p! m.try &.[1]
|
||||
|
||||
# match = Pointer(LibCre2::StringPiece).malloc(10)
|
||||
# opts = LibCre2.opt_new
|
||||
# LibCre2.opt_posix_syntax(opts, true)
|
||||
# LibCre2.opt_longest_match(opts, true)
|
||||
# LibCre2.opt_perl_classes(opts, true)
|
||||
# LibCre2.opt_encoding(opts, 1)
|
||||
# # LibCre2.opt_one_line(opts, false)
|
||||
# # LibCre2.opt_never_nl(opts, false)
|
||||
|
||||
# pattern = "(\\s+)(foo)"
|
||||
# text = " foo"
|
||||
# re = LibCre2.new(pattern, pattern.size, opts)
|
||||
# p! LibCre2.match(re, text, text.size, 0, text.size,
|
||||
# LibCre2::CRE2_ANCHOR_START, match, 10)
|
||||
# (0...10).each do |i|
|
||||
# p! String.new(Slice.new(match[i].data, match[i].length))
|
||||
# end
|
67
src/cre2.h
Normal file
67
src/cre2.h
Normal file
@ -0,0 +1,67 @@
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
typedef void cre2_options;
|
||||
|
||||
typedef int encoding_t;
|
||||
#define CRE2_UTF8 1
|
||||
#define CRE2_Latin1 2
|
||||
|
||||
cre2_options *cre2_opt_new(void);
|
||||
void cre2_opt_delete(cre2_options *opt);
|
||||
|
||||
void cre2_opt_posix_syntax(cre2_options *opt, int flag);
|
||||
void cre2_opt_longest_match(cre2_options *opt, int flag);
|
||||
void cre2_opt_log_errors(cre2_options *opt, int flag);
|
||||
void cre2_opt_literal(cre2_options *opt, int flag);
|
||||
void cre2_opt_never_nl(cre2_options *opt, int flag);
|
||||
void cre2_opt_case_sensitive(cre2_options *opt, int flag);
|
||||
void cre2_opt_perl_classes(cre2_options *opt, int flag);
|
||||
void cre2_opt_word_boundary(cre2_options *opt, int flag);
|
||||
void cre2_opt_one_line(cre2_options *opt, int flag);
|
||||
void cre2_opt_dot_nl(cre2_options *opt, int flag);
|
||||
void cre2_opt_encoding(cre2_options *opt, encoding_t enc);
|
||||
void cre2_opt_max_mem(cre2_options *opt, int m);
|
||||
|
||||
|
||||
struct string_piece {
|
||||
const char *data;
|
||||
int length;
|
||||
};
|
||||
|
||||
|
||||
typedef void cre2;
|
||||
|
||||
cre2 *cre2_new(const char *pattern, int patternlen, const cre2_options *opt);
|
||||
void cre2_delete(cre2 *re);
|
||||
|
||||
int cre2_error_code(const cre2 *re);
|
||||
int cre2_num_capturing_groups(const cre2 *re);
|
||||
int cre2_program_size(const cre2 *re);
|
||||
|
||||
// invalidated by further re use
|
||||
const char *cre2_error_string(const cre2 *re);
|
||||
void cre2_error_arg(const cre2 *re, struct string_piece *arg);
|
||||
|
||||
|
||||
typedef int anchor_t;
|
||||
#define CRE2_UNANCHORED 1
|
||||
#define CRE2_ANCHOR_START 2
|
||||
#define CRE2_ANCHOR_BOTH 3
|
||||
|
||||
int cre2_match(
|
||||
const cre2 *re
|
||||
, const char *text
|
||||
, int textlen
|
||||
, int startpos
|
||||
, int endpos
|
||||
, anchor_t anchor
|
||||
, struct string_piece *match
|
||||
, int nmatch);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
BIN
src/cre2.o
Normal file
BIN
src/cre2.o
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user