Compare commits

..

10 Commits

Author SHA1 Message Date
7ead97a5e7 Finished with error reporting for now 2023-07-13 22:02:45 -03:00
62fd9b5029 Add closing tag error reporting 2023-07-13 21:57:47 -03:00
34a16bd6bd Add error reporting/checking 2023-07-13 21:24:37 -03:00
4dcd067ca7 Makefile 2023-07-13 20:59:35 -03:00
35acfee0b6 housekeeping 2023-07-13 07:44:49 -03:00
f6e775474d makefile tweaks 2023-07-12 22:26:21 -03:00
96f08fcaa0 back to tcc 2023-07-12 22:23:33 -03:00
47ee73edbe tests pass again 2023-07-12 22:22:22 -03:00
f471642f43 use cgreen-runner 2023-07-12 22:08:16 -03:00
5350dbd126 Initial implementation (everything broke) 2023-07-12 21:58:39 -03:00
15 changed files with 253 additions and 4289 deletions

4
.gitignore vendored
View File

@@ -51,3 +51,7 @@ Module.symvers
Mkfile.old Mkfile.old
dkms.conf dkms.conf
shortcodes.c
shortcodes
tests
.cgreen*

21
LICENSE
View File

@@ -1,21 +0,0 @@
MIT License
Copyright (c) 2023 Roberto Alsina <roberto.alsina@gmail.com>
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

13
Makefile Normal file
View File

@@ -0,0 +1,13 @@
CC=gcc
all: test
shortcodes.c: shortcodes.rl
ragel -G2 shortcodes.rl -o shortcodes.c
tests.so: shortcodes.c tests.c
$(CC) -fPIC -shared -g -o $@ $^ -lbg -lcgreen
clean:
rm -f shortcodes.c *.o *.so tests
test: tests.so
cgreen-runner $^
debug:
cgreen-debug tests.so
.PHONY: test debug

View File

@@ -1,7 +1,5 @@
# Shortcodes # Shortcodes
## What it is
This is a parser for the shortcode spec as explained in the Hugo docs This is a parser for the shortcode spec as explained in the Hugo docs
and used in Hugo and Nikola. Approximately. and used in Hugo and Nikola. Approximately.
@@ -11,50 +9,14 @@ It probably won't be 100% identical, but I'll try to make it
as close as practical. as close as practical.
* Implemented in Ragel + C for performance * Implemented in Ragel + C for performance
* Allocates no memory, because all strings are references to * Allocates no memory, because all strings are references to
pieces of input. pieces of input.
## What works What works:
* Detect shortcodes with names * Detect shortcodes with names
* Standalone and matched shortcodes * Standalone and matched shortcodes
* Capture data between tags in matched shortcodes * Capture data between tags in matched shortcodes
* Capture arguments with and without names * Capture arguments with and without names
* Capture values with and without quotes (with details, see [TODO](TODO.md)) * Capture values with and without quotes (with details, see TODO above)
## Building
You need [Ragel](http://www.colm.net/open-source/ragel/) and a C compiler.
Ragel is used to generate `shortcodes.c` out of `shortcodes.rl`.
As a convenience there is a generated `shortcodes.c` in the repo,
Then:
```shell
cd src && make
```
To run tests:
```shell
crystal spec
```
## Installation
1. Add the dependency to your `shard.yml`:
```yaml
dependencies:
cr-discount:
github: ralsina/shortcodes
```
2. Run `shards install`
## Usage
```crystal
require "shortcodes"
```

View File

@@ -2,6 +2,7 @@
## Things That May Get Done ## Things That May Get Done
* Produce decent error structures
* Inform tag type (% or <) in result * Inform tag type (% or <) in result
* Handle other kinds of quotes * Handle other kinds of quotes
* Handle escaping quote characters * Handle escaping quote characters
@@ -11,5 +12,4 @@
(also, how TF do they work?) (also, how TF do they work?)
* Add inline shortcodes `{{< time.inline >}}{{ now }}{{< /time.inline >}}` * Add inline shortcodes `{{< time.inline >}}{{ now }}{{< /time.inline >}}`
* Start integrating with Crystal * Start integrating with Crystal
* ~~Produce decent error structures~~
* ~~Detect mismatched start/end like {{% foo >}}~~ * ~~Detect mismatched start/end like {{% foo >}}~~

View File

@@ -1,12 +0,0 @@
name: shortcodes
version: 0.1.0
authors:
- Roberto Alsina <roberto.alsina@gmail.com>
crystal: 1.8.2
scripts:
postinstall: cd src && make shortcodes.o
license: MIT

View File

@@ -5,8 +5,7 @@
// and goes on for len characters. // and goes on for len characters.
struct chunk struct chunk
{ {
unsigned int start; unsigned int start, len;
unsigned int len;
}; };
typedef struct chunk chunk; typedef struct chunk chunk;
@@ -57,4 +56,4 @@ Example:
#define ERR_MISMATCHED_BRACKET 2 #define ERR_MISMATCHED_BRACKET 2
sc_result parse(char *, unsigned int); sc_result parse(char *);

View File

@@ -110,20 +110,18 @@
}%% }%%
sc_result parse(char *input, unsigned int len) { sc_result parse(char *input) {
%%write data; %%write data;
char *eof = input + len; char *eof, *ts, *te = 0;
char *ts, *te = 0;
int cs, act = 0; int cs, act = 0;
char *start = input; char *start = input;
char *p = input; char *p = input;
char *pe = p + len; char *pe = p + strlen(input);
sc_result result; sc_result result;
result.errcount = 0; shortcode *sc_list = result.sc;
shortcode *sc_list = result.sc;
int c_sc = 0; int c_sc = 0;
char *mark = p; char *mark = p;

View File

@@ -1,136 +0,0 @@
require "./spec_helper"
include Shortcodes
describe "Shortcodes" do
it "should parse empty string" do
result = parse("")
result.shortcodes.size.should eq 0
result.errors.size.should eq 0
end
it "should parse simeple shortcode" do
result = parse("foobar {{% shortcode %}}blah")
result.shortcodes.size.should eq 1
result.errors.size.should eq 0
result.shortcodes[0].name.should eq "shortcode"
result.shortcodes[0].matching.should eq 0
result.shortcodes[0].args.size.should eq 0
result.shortcodes[0].whole.should eq "{{% shortcode %}}"
end
it "should report mismatched tags" do
input = "foobar {{% shortcode %}}blah{{% /foo %}}"
result = parse(input)
result.shortcodes.size.should eq 1
result.errors.size.should eq 1
result.errors[0].code.should eq ERR_MISMATCHED_CLOSING_TAG
input[result.errors[0].position, 8].should eq "{{% /foo"
end
it "should report mismatched brackets" do
input = "foobar {{% shortcode >}}blah"
result = parse(input)
result.shortcodes.size.should eq 0
result.errors.size.should eq 1
result.errors[0].code.should eq ERR_MISMATCHED_BRACKET
input[result.errors[0].position, 3].should eq ">}}"
end
it "should accept mismatched brackets inside data are ok" do
input = "foobar {{% sc %}} >}}blah {{% /sc %}} "
result = parse(input)
result.shortcodes.size.should eq 1
result.errors.size.should eq 0
result.shortcodes[0].whole.should eq "{{% sc %}} >}}blah {{% /sc %}}"
result.shortcodes[0].data.should eq " >}}blah "
result.shortcodes[0].args.size.should eq 0
end
it "should accept mismatched brackets in qvals" do
input = "foobar {{% sc \">}}blah\" %}} {{% /sc %}}"
result = parse(input)
result.shortcodes.size.should eq 1
result.errors.size.should eq 0
result.shortcodes[0].whole.should eq "{{% sc \">}}blah\" %}} {{% /sc %}}"
result.shortcodes[0].args.size.should eq 1
result.shortcodes[0].args[0].@value.should eq ">}}blah"
end
it "should consider spaces in shortcodes optional" do
input = "foobar {{% shortcode%}}blah"
result = parse(input)
result.shortcodes.size.should eq 1
result.errors.size.should eq 0
result.shortcodes[0].name.should eq "shortcode"
result.shortcodes[0].matching.should eq 0
result.shortcodes[0].args.size.should eq 0
result.shortcodes[0].whole.should eq "{{% shortcode%}}"
end
it "should allow path-like names" do
input = "foobar {{% shortcode/foo/bar %}}blah"
result = parse(input)
result.shortcodes.size.should eq 1
result.errors.size.should eq 0
result.shortcodes[0].name.should eq "shortcode/foo/bar"
result.shortcodes[0].matching.should eq 0
result.shortcodes[0].args.size.should eq 0
result.shortcodes[0].whole.should eq "{{% shortcode/foo/bar %}}"
end
it "should parse multiple shortcodes" do
input = "foobar {{% shortcode %}}blah {{<sc2 >}}blahblah"
result = parse(input)
result.shortcodes.size.should eq 2
result.errors.size.should eq 0
result.shortcodes[0].name.should eq "shortcode"
result.shortcodes[0].matching.should eq 0
result.shortcodes[0].args.size.should eq 0
result.shortcodes[0].whole.should eq "{{% shortcode %}}"
result.shortcodes[1].name.should eq "sc2"
result.shortcodes[1].matching.should eq 0
result.shortcodes[1].args.size.should eq 0
result.shortcodes[1].whole.should eq "{{<sc2 >}}"
end
it "should parse matching shortcodes" do
input = "foobar {{% shortcode %}}blah {{% /shortcode %}} blah"
result = parse(input)
result.shortcodes.size.should eq 1
result.errors.size.should eq 0
result.shortcodes[0].name.should eq "shortcode"
result.shortcodes[0].matching.should eq 1
result.shortcodes[0].args.size.should eq 0
result.shortcodes[0].whole.should eq "{{% shortcode %}}blah {{% /shortcode %}}"
result.shortcodes[0].data.should eq "blah "
end
it "should parse shortcode args" do
input = "foobar {{% shortcode foo \"bar\" 42 bat=v1 baz=\"v2\" %}}blah"
result = parse(input)
result.shortcodes.size.should eq 1
result.errors.size.should eq 0
result.shortcodes[0].name.should eq "shortcode"
result.shortcodes[0].matching.should eq 0
result.shortcodes[0].args.size.should eq 5
result.shortcodes[0].args[0].name.should eq ""
result.shortcodes[0].args[0].value.should eq "foo"
result.shortcodes[0].args[1].name.should eq ""
result.shortcodes[0].args[1].value.should eq "bar"
result.shortcodes[0].args[2].name.should eq ""
result.shortcodes[0].args[2].value.should eq "42"
result.shortcodes[0].args[3].name.should eq "bat"
result.shortcodes[0].args[3].value.should eq "v1"
result.shortcodes[0].args[4].name.should eq "baz"
result.shortcodes[0].args[4].value.should eq "v2"
result.shortcodes[0].whole.should eq "{{% shortcode foo \"bar\" 42 bat=v1 baz=\"v2\" %}}"
end
# BUG?
# it "should ignore escaped shortcodes" do
# input = "foobar \\{{% shortcode %}}blah"
# result = parse(input)
# result.shortcodes.size.should eq 0
# result.errors.size.should eq 0
# end
end

View File

@@ -1,2 +0,0 @@
require "spec"
require "../src/shortcodes"

View File

@@ -1,7 +0,0 @@
all: shortcodes.o
shortcodes.c: shortcodes.rl
ragel -G2 shortcodes.rl -o shortcodes.c
clean:
rm -f shortcodes.o
%o: %c
$(CC) -g -c -o $@ $^

View File

@@ -1 +0,0 @@
require "../spec/**"

File diff suppressed because it is too large Load Diff

View File

@@ -1,118 +0,0 @@
@[Link(ldflags: "#{__DIR__}/shortcodes.o")]
lib LibShortcodes
struct Chunk
start : UInt32
len : UInt32
end
struct ScError
position : UInt32
code : UInt32
end
struct Shortcode
whole : Chunk
name : Chunk
data : Chunk
matching : LibC::Char
argnames : Chunk[100]
argvals : Chunk[100]
argcount : UInt32
end
struct ScResult
shortcodes : Shortcode[100]
sccount : UInt32
errors : ScError[10]
errcount : UInt32
end
fun parse(input : Pointer(LibC::Char), len : UInt32) : ScResult
end
module Shortcodes
struct Arg
property name : String = ""
property value : String = ""
def initialize(@name, @value)
end
end
struct Error
property position : UInt32
property code : UInt32
def initialize(@position, @code)
end
end
struct Shortcode
property name : String = ""
property data : String = ""
property matching : Int32 = 0
property args : Array(Arg) = [] of Arg
property whole : String = ""
def initialize(
@name,
@data,
@matching,
@args,
@whole
)
end
end
struct Result
property shortcodes : Array(Shortcode) = [] of Shortcode
property errors : Array(Error) = [] of Error
end
def extract(c : LibShortcodes::Chunk, s : String)
s[c.start, c.len]
end
def parse(input : String)
r = LibShortcodes.parse(input.to_unsafe, input.bytesize)
result = Result.new
(0...r.sccount).each do |i|
sc = r.shortcodes[i]
args = [] of Arg
(0...sc.argcount).each do |j|
args << Arg.new(
extract(sc.argnames[j], input),
extract(sc.argvals[j], input),
)
end
result.shortcodes << Shortcode.new(
extract(sc.name, input),
extract(sc.data, input),
sc.matching,
args,
extract(sc.whole, input),
)
end
(0...r.errcount).each do |k|
result.errors << Error.new(
r.errors[k].position,
r.errors[k].code,
)
end
result
end
# ### Error codes
#
# You are closing the wrong shortcode.
# Example:
# {{% foo %}} {{% /bar %}}
ERR_MISMATCHED_CLOSING_TAG = 1
# You are using mismatched brackets.
# Example:
# {{% foo >}}
ERR_MISMATCHED_BRACKET = 2
end

226
tests.c Normal file
View File

@@ -0,0 +1,226 @@
#include <cgreen/cgreen.h>
#include <bglibs/str.h>
#include "shortcodes.h"
Describe(parse);
BeforeEach(parse) {}
AfterEach(parse) {}
sc_result result;
str s;
void chunk_s(char *buffer, chunk c)
{
str_copyb(&s, buffer + c.start, c.len);
}
Ensure(parse, empty_string)
{
char *input = "";
result = parse(input);
// This means no shortcodes in it
assert_that(result.sccount, is_equal_to(0));
assert_that(result.errcount, is_equal_to(0));
}
Ensure(parse, simple_shortcode)
{
char *input = "foobar {{% shortcode %}}blah";
result = parse(input);
// Only 1 shortcode
assert_that(result.sccount, is_equal_to(1));
// It's a simple one called shortcode, no args
chunk_s(input, result.sc[0].name);
assert_that(s.s, is_equal_to_string("shortcode"));
assert_that(result.sc[0].matching, is_equal_to(0));
assert_that(result.sc[0].argcount, is_equal_to(0));
// The whole shortcode is the whole thing
chunk_s(input, result.sc[0].whole);
assert_that(s.s, is_equal_to_string("{{% shortcode %}}"));
assert_that(result.errcount, is_equal_to(0));
}
Ensure(parse, mismatched_tags)
{
char *input = "foobar {{% shortcode %}}blah{{% /foo %}}";
result = parse(input);
// One shortcode, one error
assert_that(result.sccount, is_equal_to(1));
assert_that(result.errcount, is_equal_to(1));
assert_that(result.errors[0].code, is_equal_to(ERR_MISMATCHED_CLOSING_TAG));
str_copyb(&s, input + result.errors[0].position, 8);
assert_that(s.s, is_equal_to_string("{{% /foo"));
}
Ensure(parse, mismatched_brackets)
{
char *input = "foobar {{% shortcode >}}blah";
result = parse(input);
// No shortcodes, 1 error
assert_that(result.sccount, is_equal_to(0));
assert_that(result.errcount, is_equal_to(1));
assert_that(result.errors[0].code, is_equal_to(ERR_MISMATCHED_BRACKET));
str_copyb(&s, input + result.errors[0].position, 3);
assert_that(s.s, is_equal_to_string(">}}"));
}
Ensure(parse, mismatched_brackets_inside_data_are_ok)
{
char *input = "foobar {{% sc %}} >}}blah {{% /sc %}} ";
result = parse(input);
// 1 shortcode
assert_that(result.sccount, is_equal_to(1));
chunk_s(input, result.sc[0].whole);
assert_that(s.s, is_equal_to_string("{{% sc %}} >}}blah {{% /sc %}}"));
chunk_s(input, result.sc[0].data);
assert_that(s.s, is_equal_to_string(" >}}blah "));
assert_that(result.errcount, is_equal_to(0));
}
Ensure(parse, mismatched_brackets_in_qval_are_ok)
{
char *input = "foobar {{% sc \">}}blah\" %}} {{% /sc %}}";
result = parse(input);
// 1 shortcode
assert_that(result.sccount, is_equal_to(1));
chunk_s(input, result.sc[0].whole);
assert_that(s.s, is_equal_to_string("{{% sc \">}}blah\" %}} {{% /sc %}}"));
chunk_s(input, result.sc[0].argvals[0]);
assert_that(s.s, is_equal_to_string(">}}blah"));
assert_that(result.errcount, is_equal_to(0));
}
Ensure(parse, inner_spaces_optional)
{
char *input = "foobar {{% shortcode%}}blah";
result = parse(input);
// Only 1 shortcode
assert_that(result.sccount, is_equal_to(1));
// It's a simple one called shortcode, no args
chunk_s(input, result.sc[0].name);
assert_that(s.s, is_equal_to_string("shortcode"));
assert_that(result.sc[0].matching, is_equal_to(0));
assert_that(result.sc[0].argcount, is_equal_to(0));
// The whole shortcode is the whole thing
chunk_s(input, result.sc[0].whole);
assert_that(s.s, is_equal_to_string("{{% shortcode%}}"));
assert_that(result.errcount, is_equal_to(0));
}
Ensure(parse, name_can_be_path)
{
char *input = "foobar {{% shortcode/foo/bar %}}blah";
result = parse(input);
// Only 1 shortcode
assert_that(result.sccount, is_equal_to(1));
// It's a simple one called shortcode, no args
chunk_s(input, result.sc[0].name);
assert_that(s.s, is_equal_to_string("shortcode/foo/bar"));
assert_that(result.sc[0].matching, is_equal_to(0));
assert_that(result.sc[0].argcount, is_equal_to(0));
assert_that(result.errcount, is_equal_to(0));
}
Ensure(parse, multiple_shortcodes)
{
char *input = "foobar {{% shortcode %}}blah {{<sc2 >}}blahblah";
result = parse(input);
// 2 shortcodes
assert_that(result.sccount, is_equal_to(2));
// It's a simple one called shortcode, no args
chunk_s(input, result.sc[0].name);
assert_that(s.s, is_equal_to_string("shortcode"));
assert_that(result.sc[0].matching, is_equal_to(0));
assert_that(result.sc[0].argcount, is_equal_to(0));
// The whole shortcode is the whole thing
chunk_s(input, result.sc[0].whole);
assert_that(s.s, is_equal_to_string("{{% shortcode %}}"));
// It's a simple one called sc2, no args
chunk_s(input, result.sc[1].name);
assert_that(s.s, is_equal_to_string("sc2"));
assert_that(result.sc[1].matching, is_equal_to(0));
assert_that(result.sc[1].argcount, is_equal_to(0));
// The whole shortcode is the whole thing
chunk_s(input, result.sc[1].whole);
assert_that(s.s, is_equal_to_string("{{<sc2 >}}"));
assert_that(result.errcount, is_equal_to(0));
}
Ensure(parse, matching_shortcode)
{
char *input = "blah {{% shortcode %}}foo bar{{% /shortcode %}} blah";
result = parse(input);
// Only 1 shortcode
assert_that(result.sccount, is_equal_to(1));
// It's a matching one called shortcode, no args
chunk_s(input, result.sc[0].name);
assert_that(s.s, is_equal_to_string("shortcode"));
assert_that(result.sc[0].matching, is_equal_to(1));
assert_that(result.sc[0].argcount, is_equal_to(0));
// data is the stuff between the shortcode tags
chunk_s(input, result.sc[0].data);
assert_that(s.s, is_equal_to_string("foo bar"));
// The whole shortcode is the whole thing
chunk_s(input, result.sc[0].whole);
assert_that(s.s, is_equal_to_string("{{% shortcode %}}foo bar{{% /shortcode %}}"));
assert_that(result.errcount, is_equal_to(0));
}
Ensure(parse, shortcode_args)
{
char *input = "foobar {{% shortcode foo \"bar\" 42 bat=v1 baz=\"v2\" %}}blah";
result = parse(input);
// Only 1 shortcode
assert_that(result.sccount, is_equal_to(1));
// The whole shortcode is the whole thing
chunk_s(input, result.sc[0].whole);
assert_that(s.s, is_equal_to_string("{{% shortcode foo \"bar\" 42 bat=v1 baz=\"v2\" %}}"));
// Name is shortcode
chunk_s(input, result.sc[0].name);
assert_that(s.s, is_equal_to_string("shortcode"));
assert_that(result.sc[0].matching, is_equal_to(0));
// Has 5 args
assert_that(result.sc[0].argcount, is_equal_to(5));
// Arg1 is foo, no name
assert_that(result.sc[0].argnames[0].len, is_equal_to(0));
chunk_s(input, result.sc[0].argvals[0]);
assert_that(s.s, is_equal_to_string("foo"));
// Arg2 is bar, no name
assert_that(result.sc[0].argnames[1].len, is_equal_to(0));
chunk_s(input, result.sc[0].argvals[1]);
assert_that(s.s, is_equal_to_string("bar"));
// Arg3 is 42, no name
assert_that(result.sc[0].argnames[2].len, is_equal_to(0));
chunk_s(input, result.sc[0].argvals[2]);
assert_that(s.s, is_equal_to_string("42"));
// Arg4 is bat=v1
chunk_s(input, result.sc[0].argnames[3]);
assert_that(s.s, is_equal_to_string("bat"));
chunk_s(input, result.sc[0].argvals[3]);
assert_that(s.s, is_equal_to_string("v1"));
// Arg5 is baz=v2
chunk_s(input, result.sc[0].argnames[4]);
assert_that(s.s, is_equal_to_string("baz"));
chunk_s(input, result.sc[0].argvals[4]);
assert_that(s.s, is_equal_to_string("v2"));
assert_that(result.errcount, is_equal_to(0));
}
// BUG?
// Ensure(parse, escaped_shortcode)
// {
// char *input = "foobar \\{{% shortcode %}}";
// result = parse(input);
// // No shortcodes
// assert_that(result.sc[0].name.len, is_equal_to(0));
// }