return errors from parse() #1

Merged
ralsina merged 12 commits from return-errors into main 2023-07-14 01:05:13 +00:00
6 changed files with 163 additions and 126 deletions

1
.gitignore vendored
View File

@ -54,3 +54,4 @@ dkms.conf
shortcodes.c
shortcodes
tests
.cgreen*

View File

@ -1,16 +1,13 @@
CC=tcc
all: tests
run: shortcodes
./shortcodes
CC=gcc
all: test
shortcodes.c: shortcodes.rl
ragel -G2 shortcodes.rl -o shortcodes.c
shortcodes: shortcodes.c
$(CC) shortcodes.c -g -o shortcodes
tests: shortcodes.c shortcodes.h tests.c
$(CC) tests.c shortcodes.c -lbg -lcgreen -g -o tests
test: tests
./tests
tests.so: shortcodes.c tests.c
$(CC) -fPIC -shared -g -o $@ $^ -lbg -lcgreen
clean:
rm -f shortcodes shortcodes.c
.PHONY: test run
rm -f shortcodes.c *.o *.so tests
test: tests.so
cgreen-runner $^
debug:
cgreen-debug tests.so
.PHONY: test debug

View File

@ -2,7 +2,6 @@
## Things That May Get Done
* Produce decent error structures
* Inform tag type (% or <) in result
* Handle other kinds of quotes
* Handle escaping quote characters
@ -11,5 +10,6 @@
* Add self-closing shortcodes `{{< foo />}}`
(also, how TF do they work?)
* Add inline shortcodes `{{< time.inline >}}{{ now }}{{< /time.inline >}}`
* Start integrating with Crystal
* ~~Produce decent error structures~~
* ~~Detect mismatched start/end like {{% foo >}}~~

View File

@ -1,12 +1,23 @@
#ifndef SHORTCODES_H
#define SHORTCODES_H
#pragma once
// A chunk is a reference to a piece of string
// from "start" relative to its start
// and goes on for len characters.
struct chunk
{
int start, len;
unsigned int start, len;
};
typedef struct chunk chunk;
// An error
struct sc_error
{
unsigned int position;
unsigned int code;
};
typedef struct sc_error sc_error;
// Describes a parsed shortcode
struct shortcode
{
chunk whole;
@ -15,9 +26,34 @@ struct shortcode
char matching;
chunk argnames[100];
chunk argvals[100];
int argcount;
unsigned int argcount;
};
typedef struct shortcode shortcode;
shortcode *parse(char *);
#endif
struct sc_result
{
shortcode sc[100];
unsigned int sccount;
sc_error errors[10];
unsigned int errcount;
};
typedef struct sc_result sc_result;
// Error codes
/* You are closing the wrong shortcode.
Example:
{{% foo %}} {{% /bar %}}
*/
#define ERR_MISMATCHED_CLOSING_TAG 1
/* You are using mismatched brackets.
Example:
{{% foo >}}
*/
#define ERR_MISMATCHED_BRACKET 2
sc_result parse(char *);

View File

@ -1,7 +1,6 @@
#include <string.h>
#include "shortcodes.h"
%%{
machine shortcode;
@ -59,6 +58,9 @@
// Since it's mismatched, remove the name
sc_list[c_sc].name.start = 0;
sc_list[c_sc].name.len=0;
result.errors[result.errcount].position = p-start-2;
result.errors[result.errcount].code = ERR_MISMATCHED_BRACKET;
result.errcount++;
};
shortcode = ((start_p content end_p) | (start_b content end_b))
@ -85,8 +87,6 @@
matched_shortcode = (shortcode any* closing_shortcode)
@ {
sc_list[c_sc-1].matching = 1;
sc_list[c_sc-1].whole.len = p-start-sc_list[c_sc-1].whole.start + 1;
if (
sc_list[c_sc-1].name.len != sc_list[c_sc].name.len ||
strncmp(
@ -94,18 +94,23 @@
start + sc_list[c_sc].name.start,
sc_list[c_sc-1].name.len) !=0)
{
return NULL;
result.errors[result.errcount].position =
sc_list[c_sc].whole.start;
result.errors[result.errcount].code = ERR_MISMATCHED_CLOSING_TAG;
result.errcount++;
} else {
// The previous shortcode is matching (mark it)
sc_list[c_sc-1].matching = 1;
sc_list[c_sc-1].whole.len = p-start-sc_list[c_sc-1].whole.start + 1;
}
// Reuse this shortcode entry for next one
sc_list[c_sc].name.start = 0;
sc_list[c_sc].name.len=0;
// Do NOT increase c_sc
};
main := (any* (shortcode | matched_shortcode | mismatched))*;
}%%
shortcode *parse(char *input) {
sc_result parse(char *input) {
%%write data;
char *eof, *ts, *te = 0;
@ -115,24 +120,16 @@ shortcode *parse(char *input) {
char *p = input;
char *pe = p + strlen(input);
shortcode sc_list[1000];
sc_result result;
shortcode *sc_list = result.sc;
int c_sc = 0;
int sc_start = 0;
int sc_end = 0;
char *mark = p;
char *data_mark = p;
%% write init;
%% write exec;
return sc_list;
result.sccount = c_sc;
return result;
}
// int main(int argc, char **argv) {
// parse(
// "bbb{{% sarasa sar1 sar2 \"sar3\" %}}ccc"
// "{{< c1 arg2 >}}foobar{{% /c1%}}aaa{{% sarasa name=\"pepe\" %}}");
// return 0;
// };

172
tests.c Executable file → Normal file
View File

@ -6,10 +6,11 @@ Describe(parse);
BeforeEach(parse) {}
AfterEach(parse) {}
shortcode *result;
sc_result result;
str s;
chunk_s(char *buffer, chunk c){
void chunk_s(char *buffer, chunk c)
{
str_copyb(&s, buffer + c.start, c.len);
}
@ -18,7 +19,8 @@ Ensure(parse, empty_string)
char *input = "";
result = parse(input);
// This means no shortcodes in it
assert_that(result[0].name.len, is_equal_to(0));
assert_that(result.sccount, is_equal_to(0));
assert_that(result.errcount, is_equal_to(0));
}
Ensure(parse, simple_shortcode)
@ -26,24 +28,41 @@ Ensure(parse, simple_shortcode)
char *input = "foobar {{% shortcode %}}blah";
result = parse(input);
// Only 1 shortcode
assert_that(result[1].name.len, is_equal_to(0));
assert_that(result.sccount, is_equal_to(1));
// It's a simple one called shortcode, no args
chunk_s(input, result[0].name);
chunk_s(input, result.sc[0].name);
assert_that(s.s, is_equal_to_string("shortcode"));
assert_that(result[0].matching, is_equal_to(0));
assert_that(result[0].argcount, is_equal_to(0));
assert_that(result.sc[0].matching, is_equal_to(0));
assert_that(result.sc[0].argcount, is_equal_to(0));
// The whole shortcode is the whole thing
chunk_s(input, result[0].whole);
chunk_s(input, result.sc[0].whole);
assert_that(s.s, is_equal_to_string("{{% shortcode %}}"));
assert_that(result.errcount, is_equal_to(0));
}
Ensure(parse, mismatched_tags)
{
char *input = "foobar {{% shortcode %}}blah{{% /foo %}}";
result = parse(input);
// One shortcode, one error
assert_that(result.sccount, is_equal_to(1));
assert_that(result.errcount, is_equal_to(1));
assert_that(result.errors[0].code, is_equal_to(ERR_MISMATCHED_CLOSING_TAG));
str_copyb(&s, input + result.errors[0].position, 8);
assert_that(s.s, is_equal_to_string("{{% /foo"));
}
Ensure(parse, mismatched_brackets)
{
char *input = "foobar {{% shortcode >}}blah";
result = parse(input);
// No shortcodes
assert_that(result[0].name.len, is_equal_to(0));
// No shortcodes, 1 error
assert_that(result.sccount, is_equal_to(0));
assert_that(result.errcount, is_equal_to(1));
assert_that(result.errors[0].code, is_equal_to(ERR_MISMATCHED_BRACKET));
str_copyb(&s, input + result.errors[0].position, 3);
assert_that(s.s, is_equal_to_string(">}}"));
}
Ensure(parse, mismatched_brackets_inside_data_are_ok)
@ -51,11 +70,12 @@ Ensure(parse, mismatched_brackets_inside_data_are_ok)
char *input = "foobar {{% sc %}} >}}blah {{% /sc %}} ";
result = parse(input);
// 1 shortcode
assert_that(result[1].name.len, is_equal_to(0));
chunk_s(input, result[0].whole);
assert_that(result.sccount, is_equal_to(1));
chunk_s(input, result.sc[0].whole);
assert_that(s.s, is_equal_to_string("{{% sc %}} >}}blah {{% /sc %}}"));
chunk_s(input, result[0].data);
chunk_s(input, result.sc[0].data);
assert_that(s.s, is_equal_to_string(" >}}blah "));
assert_that(result.errcount, is_equal_to(0));
}
Ensure(parse, mismatched_brackets_in_qval_are_ok)
@ -63,11 +83,12 @@ Ensure(parse, mismatched_brackets_in_qval_are_ok)
char *input = "foobar {{% sc \">}}blah\" %}} {{% /sc %}}";
result = parse(input);
// 1 shortcode
assert_that(result[1].name.len, is_equal_to(0));
chunk_s(input, result[0].whole);
assert_that(result.sccount, is_equal_to(1));
chunk_s(input, result.sc[0].whole);
assert_that(s.s, is_equal_to_string("{{% sc \">}}blah\" %}} {{% /sc %}}"));
chunk_s(input, result[0].argvals[0]);
chunk_s(input, result.sc[0].argvals[0]);
assert_that(s.s, is_equal_to_string(">}}blah"));
assert_that(result.errcount, is_equal_to(0));
}
Ensure(parse, inner_spaces_optional)
@ -75,16 +96,17 @@ Ensure(parse, inner_spaces_optional)
char *input = "foobar {{% shortcode%}}blah";
result = parse(input);
// Only 1 shortcode
assert_that(result[1].name.len, is_equal_to(0));
assert_that(result.sccount, is_equal_to(1));
// It's a simple one called shortcode, no args
chunk_s(input, result[0].name);
chunk_s(input, result.sc[0].name);
assert_that(s.s, is_equal_to_string("shortcode"));
assert_that(result[0].matching, is_equal_to(0));
assert_that(result[0].argcount, is_equal_to(0));
assert_that(result.sc[0].matching, is_equal_to(0));
assert_that(result.sc[0].argcount, is_equal_to(0));
// The whole shortcode is the whole thing
chunk_s(input, result[0].whole);
chunk_s(input, result.sc[0].whole);
assert_that(s.s, is_equal_to_string("{{% shortcode%}}"));
assert_that(result.errcount, is_equal_to(0));
}
Ensure(parse, name_can_be_path)
@ -92,13 +114,14 @@ Ensure(parse, name_can_be_path)
char *input = "foobar {{% shortcode/foo/bar %}}blah";
result = parse(input);
// Only 1 shortcode
assert_that(result[1].name.len, is_equal_to(0));
assert_that(result.sccount, is_equal_to(1));
// It's a simple one called shortcode, no args
chunk_s(input, result[0].name);
chunk_s(input, result.sc[0].name);
assert_that(s.s, is_equal_to_string("shortcode/foo/bar"));
assert_that(result[0].matching, is_equal_to(0));
assert_that(result[0].argcount, is_equal_to(0));
assert_that(result.sc[0].matching, is_equal_to(0));
assert_that(result.sc[0].argcount, is_equal_to(0));
assert_that(result.errcount, is_equal_to(0));
}
Ensure(parse, multiple_shortcodes)
@ -106,24 +129,25 @@ Ensure(parse, multiple_shortcodes)
char *input = "foobar {{% shortcode %}}blah {{<sc2 >}}blahblah";
result = parse(input);
// 2 shortcodes
assert_that(result[2].name.len, is_equal_to(0));
assert_that(result.sccount, is_equal_to(2));
// It's a simple one called shortcode, no args
chunk_s(input, result[0].name);
chunk_s(input, result.sc[0].name);
assert_that(s.s, is_equal_to_string("shortcode"));
assert_that(result[0].matching, is_equal_to(0));
assert_that(result[0].argcount, is_equal_to(0));
assert_that(result.sc[0].matching, is_equal_to(0));
assert_that(result.sc[0].argcount, is_equal_to(0));
// The whole shortcode is the whole thing
chunk_s(input, result[0].whole);
chunk_s(input, result.sc[0].whole);
assert_that(s.s, is_equal_to_string("{{% shortcode %}}"));
// It's a simple one called sc2, no args
chunk_s(input, result[1].name);
chunk_s(input, result.sc[1].name);
assert_that(s.s, is_equal_to_string("sc2"));
assert_that(result[1].matching, is_equal_to(0));
assert_that(result[1].argcount, is_equal_to(0));
assert_that(result.sc[1].matching, is_equal_to(0));
assert_that(result.sc[1].argcount, is_equal_to(0));
// The whole shortcode is the whole thing
chunk_s(input, result[1].whole);
chunk_s(input, result.sc[1].whole);
assert_that(s.s, is_equal_to_string("{{<sc2 >}}"));
assert_that(result.errcount, is_equal_to(0));
}
Ensure(parse, matching_shortcode)
@ -132,19 +156,20 @@ Ensure(parse, matching_shortcode)
result = parse(input);
// Only 1 shortcode
assert_that(result[1].name.len, is_equal_to(0));
assert_that(result.sccount, is_equal_to(1));
// It's a matching one called shortcode, no args
chunk_s(input, result[0].name);
chunk_s(input, result.sc[0].name);
assert_that(s.s, is_equal_to_string("shortcode"));
assert_that(result[0].matching, is_equal_to(1));
assert_that(result[0].argcount, is_equal_to(0));
assert_that(result.sc[0].matching, is_equal_to(1));
assert_that(result.sc[0].argcount, is_equal_to(0));
// data is the stuff between the shortcode tags
chunk_s(input, result[0].data);
chunk_s(input, result.sc[0].data);
assert_that(s.s, is_equal_to_string("foo bar"));
// The whole shortcode is the whole thing
chunk_s(input, result[0].whole);
chunk_s(input, result.sc[0].whole);
assert_that(s.s, is_equal_to_string("{{% shortcode %}}foo bar{{% /shortcode %}}"));
assert_that(result.errcount, is_equal_to(0));
}
Ensure(parse, shortcode_args)
@ -152,69 +177,50 @@ Ensure(parse, shortcode_args)
char *input = "foobar {{% shortcode foo \"bar\" 42 bat=v1 baz=\"v2\" %}}blah";
result = parse(input);
// Only 1 shortcode
assert_that(result[1].name.len, is_equal_to(0));
assert_that(result.sccount, is_equal_to(1));
// The whole shortcode is the whole thing
chunk_s(input, result[0].whole);
chunk_s(input, result.sc[0].whole);
assert_that(s.s, is_equal_to_string("{{% shortcode foo \"bar\" 42 bat=v1 baz=\"v2\" %}}"));
// Name is shortcode
chunk_s(input, result[0].name);
chunk_s(input, result.sc[0].name);
assert_that(s.s, is_equal_to_string("shortcode"));
assert_that(result[0].matching, is_equal_to(0));
assert_that(result.sc[0].matching, is_equal_to(0));
// Has 5 args
assert_that(result[0].argcount, is_equal_to(5));
assert_that(result.sc[0].argcount, is_equal_to(5));
// Arg1 is foo, no name
assert_that(result[0].argnames[0].len, is_equal_to(0));
chunk_s(input, result[0].argvals[0]);
assert_that(result.sc[0].argnames[0].len, is_equal_to(0));
chunk_s(input, result.sc[0].argvals[0]);
assert_that(s.s, is_equal_to_string("foo"));
// Arg2 is bar, no name
assert_that(result[0].argnames[1].len, is_equal_to(0));
chunk_s(input, result[0].argvals[1]);
assert_that(result.sc[0].argnames[1].len, is_equal_to(0));
chunk_s(input, result.sc[0].argvals[1]);
assert_that(s.s, is_equal_to_string("bar"));
// Arg3 is 42, no name
assert_that(result[0].argnames[2].len, is_equal_to(0));
chunk_s(input, result[0].argvals[2]);
assert_that(result.sc[0].argnames[2].len, is_equal_to(0));
chunk_s(input, result.sc[0].argvals[2]);
assert_that(s.s, is_equal_to_string("42"));
// Arg4 is bat=v1
chunk_s(input, result[0].argnames[3]);
chunk_s(input, result.sc[0].argnames[3]);
assert_that(s.s, is_equal_to_string("bat"));
chunk_s(input, result[0].argvals[3]);
chunk_s(input, result.sc[0].argvals[3]);
assert_that(s.s, is_equal_to_string("v1"));
// Arg5 is baz=v2
chunk_s(input, result[0].argnames[4]);
chunk_s(input, result.sc[0].argnames[4]);
assert_that(s.s, is_equal_to_string("baz"));
chunk_s(input, result[0].argvals[4]);
chunk_s(input, result.sc[0].argvals[4]);
assert_that(s.s, is_equal_to_string("v2"));
assert_that(result.errcount, is_equal_to(0));
}
Ensure(parse, escaped_shortcode)
{
char *input = "foobar \\{{% shortcode %}}";
result = parse(input);
// No shortcodes
assert_that(result[0].name.len, is_equal_to(0));
}
int main(int argc, char **argv)
{
str_init(&s);
TestSuite *suite = create_test_suite();
add_test_with_context(suite, parse, empty_string);
add_test_with_context(suite, parse, simple_shortcode);
add_test_with_context(suite, parse, mismatched_brackets);
add_test_with_context(suite, parse, mismatched_brackets_inside_data_are_ok);
add_test_with_context(suite, parse, mismatched_brackets_in_qval_are_ok);
add_test_with_context(suite, parse, name_can_be_path);
add_test_with_context(suite, parse, inner_spaces_optional);
add_test_with_context(suite, parse, multiple_shortcodes);
add_test_with_context(suite, parse, matching_shortcode);
add_test_with_context(suite, parse, shortcode_args);
// Bugs
// add_test_with_context(suite, parse, escaped_shortcode);
return run_test_suite(suite, create_text_reporter());
}
// BUG?
// Ensure(parse, escaped_shortcode)
// {
// char *input = "foobar \\{{% shortcode %}}";
// result = parse(input);
// // No shortcodes
// assert_that(result.sc[0].name.len, is_equal_to(0));
// }