This commit is contained in:
Roberto Alsina 2024-08-08 12:35:18 -03:00
commit ce63fe31e9
12 changed files with 457 additions and 0 deletions

9
.editorconfig Normal file
View File

@ -0,0 +1,9 @@
root = true
[*.cr]
charset = utf-8
end_of_line = lf
insert_final_newline = true
indent_style = space
indent_size = 2
trim_trailing_whitespace = true

9
.gitignore vendored Normal file
View File

@ -0,0 +1,9 @@
/docs/
/lib/
/bin/
/.shards/
*.dwarf
# Libraries don't need dependency lock
# Dependencies will be locked in applications that use them
/shard.lock

21
LICENSE Normal file
View File

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2024 Roberto Alsina <roberto.alsina@gmail.com>
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

39
README.md Normal file
View File

@ -0,0 +1,39 @@
# cre2
TODO: Write a description here
## Installation
1. Add the dependency to your `shard.yml`:
```yaml
dependencies:
cre2:
github: your-github-user/cre2
```
2. Run `shards install`
## Usage
```crystal
require "cre2"
```
TODO: Write usage instructions here
## Development
TODO: Write development instructions here
## Contributing
1. Fork it (<https://github.com/your-github-user/cre2/fork>)
2. Create your feature branch (`git checkout -b my-new-feature`)
3. Commit your changes (`git commit -am 'Add some feature'`)
4. Push to the branch (`git push origin my-new-feature`)
5. Create a new Pull Request
## Contributors
- [Roberto Alsina](https://github.com/your-github-user) - creator and maintainer

9
shard.yml Normal file
View File

@ -0,0 +1,9 @@
name: cre2
version: 0.1.0
authors:
- Roberto Alsina <roberto.alsina@gmail.com>
crystal: '>= 1.13.0'
license: MIT

9
spec/cre2_spec.cr Normal file
View File

@ -0,0 +1,9 @@
require "./spec_helper"
describe Cre2 do
# TODO: Write tests
it "works" do
false.should eq(true)
end
end

2
spec/spec_helper.cr Normal file
View File

@ -0,0 +1,2 @@
require "spec"
require "../src/cre2"

5
src/Makefile Normal file
View File

@ -0,0 +1,5 @@
all: cre2.o
clean:
rm -f cre2.o
cre2.o: cre2.cpp cre2.h
g++ -O3 -c -o cre2.o cre2.cpp

122
src/cre2.cpp Normal file
View File

@ -0,0 +1,122 @@
#include <re2/re2.h>
#include "cre2.h"
#define TO_OPT(opt) (reinterpret_cast<RE2::Options *>(opt))
cre2_options *cre2_opt_new(void) {
return reinterpret_cast<void*>(new RE2::Options());
}
void cre2_opt_delete(cre2_options *opt) {
delete TO_OPT(opt);
}
#define OPT_bool(name) \
void cre2_opt_##name(cre2_options *opt, int flag) { \
TO_OPT(opt)->set_##name(bool(flag)); \
}
OPT_bool(posix_syntax)
OPT_bool(longest_match)
OPT_bool(log_errors)
OPT_bool(literal)
OPT_bool(never_nl)
OPT_bool(dot_nl)
OPT_bool(case_sensitive)
OPT_bool(perl_classes)
OPT_bool(word_boundary)
OPT_bool(one_line)
#undef OPT_BOOL
void cre2_opt_encoding(cre2_options *opt, encoding_t enc) {
switch (enc) {
case CRE2_UTF8:
TO_OPT(opt)->set_encoding(RE2::Options::EncodingUTF8);
break;
case CRE2_Latin1:
TO_OPT(opt)->set_encoding(RE2::Options::EncodingLatin1);
break;
}
}
void cre2_opt_max_mem(cre2_options *opt, int m) {
TO_OPT(opt)->set_max_mem(m);
}
#define TO_RE2(re) (reinterpret_cast<RE2 *>(re))
#define TO_CONST_RE2(re) (reinterpret_cast<const RE2 *>(re))
cre2 *cre2_new(const char *pattern, int patternlen, const cre2_options *opt) {
re2::StringPiece pattern_re2(pattern, patternlen);
return reinterpret_cast<void*>(
new RE2(pattern_re2, *reinterpret_cast<const RE2::Options *>(opt)));
}
void cre2_delete(cre2 *re) {
delete TO_RE2(re);
}
int cre2_error_code(const cre2 *re) {
return int(TO_CONST_RE2(re)->error_code());
}
const char *cre2_error_string(const cre2 *re) {
return TO_CONST_RE2(re)->error().c_str();
}
void cre2_error_arg(const cre2 *re, struct string_piece *arg) {
const std::string &argstr = TO_CONST_RE2(re)->error_arg();
arg->data = argstr.data();
arg->length = argstr.length();
}
int cre2_num_capturing_groups(const cre2 *re) {
return TO_CONST_RE2(re)->NumberOfCapturingGroups();
}
int cre2_program_size(const cre2 *re) {
return TO_CONST_RE2(re)->ProgramSize();
}
int cre2_match(
const cre2 *re
, const char *text
, int textlen
, int startpos
, int endpos
, anchor_t anchor
, struct string_piece *match
, int nmatch) {
re2::StringPiece text_re2(text, textlen);
// FIXME: exceptions?
re2::StringPiece *match_re2 = new re2::StringPiece[nmatch];
RE2::Anchor anchor_re2 = RE2::UNANCHORED;
switch (anchor) {
case CRE2_ANCHOR_START:
anchor_re2 = RE2::ANCHOR_START; break;
case CRE2_ANCHOR_BOTH:
anchor_re2 = RE2::ANCHOR_BOTH; break;
}
bool ret = TO_CONST_RE2(re)
->Match(text_re2, startpos, endpos, anchor_re2, match_re2, nmatch);
if (ret) {
for (int i=0; i<nmatch; i++) {
match[i].data = match_re2[i].data();
match[i].length = match_re2[i].length();
}
}
delete [] match_re2;
return int(ret);
}

165
src/cre2.cr Normal file
View File

@ -0,0 +1,165 @@
@[Link(ldflags: "#{__DIR__}/cre2.o -Wl,--copy-dt-needed-entries `pkg-config --libs re2`")]
lib LibCre2
type Options = Void*
fun opt_new = cre2_opt_new : Options
fun opt_delete = cre2_opt_delete(op : Options) : Nil
fun opt_posix_syntax = cre2_opt_posix_syntax(op : Options, flag : Bool) : Nil
fun opt_longest_match = cre2_opt_longest_match(op : Options, flag : Bool) : Nil
fun opt_log_errors = cre2_opt_log_errors(op : Options, flag : Bool) : Nil
fun opt_literal = cre2_opt_literal(op : Options, flag : Bool) : Nil
fun opt_never_nl = cre2_opt_never_nl(op : Options, flag : Bool) : Nil
fun opt_case_sensitive = cre2_opt_case_sensitive(op : Options, flag : Bool) : Nil
fun opt_perl_classes = cre2_opt_perl_classes(op : Options, flag : Bool) : Nil
fun opt_word_boundary = cre2_opt_word_boundary(op : Options, flag : Bool) : Nil
fun opt_one_line = cre2_opt_one_line(op : Options, flag : Bool) : Nil
fun opt_dot_nl = cre2_opt_dot_nl(op : Options, flag : Bool) : Nil
fun opt_encoding = cre2_opt_encoding(op : Options, encoding : Int32) : Nil
fun opt_max_mem = cre2_opt_max_mem(op : Options, flag : Bool) : Nil
struct StringPiece
data : LibC::Char*
length : Int32
end
type CRe2 = Void*
fun new = cre2_new(pattern : LibC::Char*, patternlen : UInt32, opt : Options) : CRe2
fun del = cre2_delete(re : CRe2) : Nil
fun error_code = cre2_error_core(re : CRe2) : Int32
fun num_capturing_groups = cre2_num_capturing_groups(re : CRe2) : Int32
fun program_size(re : CRe2) : Int32
# Invalidated by further re use
fun error_string = cre2_error_string(re : CRe2) : LibC::Char*
fun error_arg = cre2_error_arg(re : CRe2, arg : StringPiece*) : Nil
CRE2_UNANCHORED = 1
CRE2_ANCHOR_START = 2
CRE2_ANCHOR_BOTH = 3
fun match = cre2_match(
re : CRe2,
text : LibC::Char*,
textlen : UInt32,
startpos : UInt32,
endpos : UInt32,
anchor : Int32,
match : StringPiece*,
nmatch : Int32
) : Int32
end
module CRe2
struct MatchDataLike
@str : String
@matches : Pointer(LibCre2::StringPiece)
@size : Int32
def initialize(@str, @matches, @size)
end
def [](i : Int32) : String
if i < @size && @matches[i].data != nil
String.new(Slice.new(@matches[i].data, @matches[i].length))
else
""
end
end
end
class Regex < ::Regex
@jit = false
@re2 : LibCre2::CRe2
@failed = false
@anchored = LibCre2::CRE2_UNANCHORED
def initialize(@pattern : String, @options : Options = Options::DEFAULT)
@source = @pattern
@re = Regex::PCRE2.compile(pattern, pcre2_options(@options)) do |error_message|
raise Exception.new(error_message)
end
# No multiline flag, it's handled on the pattern
@pattern = "(?m)#{pattern}" if @options & Regex::Options::MULTILINE != 0
@re2 = LibCre2.new("x", 1, cre2_options(@options))
begin
@re2 = LibCre2.new(@pattern, @pattern.size, cre2_options(@options))
rescue ex : Exception
@failed = true
end
end
def match(str : String, pos : Int32 = 0, options : Regex::MatchOptions = :none) : MatchData | MatchDataLike | Nil
msize = LibCre2.num_capturing_groups(@re2) + 1
if !@failed
matches = Pointer(LibCre2::StringPiece).malloc(msize)
if LibCre2.match(@re2, str, str.size, pos, str.size,
@anchored, matches, msize) != 0
MatchDataLike.new(str, matches, msize)
else
nil
end
else
super
end
end
private def pcre2_options(options : Regex::Options)
flags = LibPCRE2::UTF | LibPCRE2::DUPNAMES |
LibPCRE2::UCP
flags |= LibPCRE2::MULTILINE if options & Regex::Options::MULTILINE != 0
flags |= LibPCRE2::DOTALL if options & Regex::Options::DOTALL != 0
flags |= LibPCRE2::CASELESS if options & Regex::Options::IGNORE_CASE != 0
flags |= LibPCRE2::ANCHORED if options & Regex::Options::ANCHORED != 0
flags |= LibPCRE2::NO_UTF_CHECK
flags
end
private def cre2_options(options : Regex::Options) : LibCre2::Options
opts = LibCre2.opt_new
# Not configurable
LibCre2.opt_posix_syntax(opts, false)
LibCre2.opt_longest_match(opts, false)
LibCre2.opt_log_errors(opts, false)
LibCre2.opt_case_sensitive(opts, options & Regex::Options::IGNORE_CASE == 0)
# Not considered when posix_syntax is false
# LibCre2.opt_perl_classes(opts, options & Regex::Options::PERL_CLASSES != 0)
# LibCre2.opt_word_boundary(opts, options & Regex::Options::WORD_BOUNDARY != 0)
# LibCre2.opt_one_line(opts, options & Regex::Options::ONE_LINE != 0)
LibCre2.opt_dot_nl(opts, options & Regex::Options::DOTALL != 0)
@anchored = LibCre2::CRE2_ANCHOR_BOTH if options & Regex::Options::ANCHORED != 0
LibCre2.opt_encoding(opts, 1)
opts
end
end
end
re = CRe2::Regex.new(".*(x).*", Regex::Options::ANCHORED | Regex::Options::MULTILINE)
m = re.match("axb")
p! m.try &.[0]
p! m.try &.[1]
re = Regex.new(".*(x).*", Regex::Options::ANCHORED | Regex::Options::MULTILINE)
m = re.match("axb")
p! m.try &.[0]
p! m.try &.[1]
# match = Pointer(LibCre2::StringPiece).malloc(10)
# opts = LibCre2.opt_new
# LibCre2.opt_posix_syntax(opts, true)
# LibCre2.opt_longest_match(opts, true)
# LibCre2.opt_perl_classes(opts, true)
# LibCre2.opt_encoding(opts, 1)
# # LibCre2.opt_one_line(opts, false)
# # LibCre2.opt_never_nl(opts, false)
# pattern = "(\\s+)(foo)"
# text = " foo"
# re = LibCre2.new(pattern, pattern.size, opts)
# p! LibCre2.match(re, text, text.size, 0, text.size,
# LibCre2::CRE2_ANCHOR_START, match, 10)
# (0...10).each do |i|
# p! String.new(Slice.new(match[i].data, match[i].length))
# end

67
src/cre2.h Normal file
View File

@ -0,0 +1,67 @@
#ifdef __cplusplus
extern "C" {
#endif
typedef void cre2_options;
typedef int encoding_t;
#define CRE2_UTF8 1
#define CRE2_Latin1 2
cre2_options *cre2_opt_new(void);
void cre2_opt_delete(cre2_options *opt);
void cre2_opt_posix_syntax(cre2_options *opt, int flag);
void cre2_opt_longest_match(cre2_options *opt, int flag);
void cre2_opt_log_errors(cre2_options *opt, int flag);
void cre2_opt_literal(cre2_options *opt, int flag);
void cre2_opt_never_nl(cre2_options *opt, int flag);
void cre2_opt_case_sensitive(cre2_options *opt, int flag);
void cre2_opt_perl_classes(cre2_options *opt, int flag);
void cre2_opt_word_boundary(cre2_options *opt, int flag);
void cre2_opt_one_line(cre2_options *opt, int flag);
void cre2_opt_dot_nl(cre2_options *opt, int flag);
void cre2_opt_encoding(cre2_options *opt, encoding_t enc);
void cre2_opt_max_mem(cre2_options *opt, int m);
struct string_piece {
const char *data;
int length;
};
typedef void cre2;
cre2 *cre2_new(const char *pattern, int patternlen, const cre2_options *opt);
void cre2_delete(cre2 *re);
int cre2_error_code(const cre2 *re);
int cre2_num_capturing_groups(const cre2 *re);
int cre2_program_size(const cre2 *re);
// invalidated by further re use
const char *cre2_error_string(const cre2 *re);
void cre2_error_arg(const cre2 *re, struct string_piece *arg);
typedef int anchor_t;
#define CRE2_UNANCHORED 1
#define CRE2_ANCHOR_START 2
#define CRE2_ANCHOR_BOTH 3
int cre2_match(
const cre2 *re
, const char *text
, int textlen
, int startpos
, int endpos
, anchor_t anchor
, struct string_piece *match
, int nmatch);
#ifdef __cplusplus
} // extern "C"
#endif

BIN
src/cre2.o Normal file

Binary file not shown.