Compare commits

..

14 Commits

12 changed files with 4250 additions and 261 deletions

4
.gitignore vendored
View File

@@ -51,7 +51,3 @@ Module.symvers
Mkfile.old Mkfile.old
dkms.conf dkms.conf
shortcodes.c
shortcodes
tests
.cgreen*

View File

@@ -1,13 +0,0 @@
CC=gcc
all: test
shortcodes.c: shortcodes.rl
ragel -G2 shortcodes.rl -o shortcodes.c
tests.so: shortcodes.c tests.c
$(CC) -fPIC -shared -g -o $@ $^ -lbg -lcgreen
clean:
rm -f shortcodes.c *.o *.so tests
test: tests.so
cgreen-runner $^
debug:
cgreen-debug tests.so
.PHONY: test debug

View File

@@ -1,5 +1,7 @@
# Shortcodes # Shortcodes
## What it is
This is a parser for the shortcode spec as explained in the Hugo docs This is a parser for the shortcode spec as explained in the Hugo docs
and used in Hugo and Nikola. Approximately. and used in Hugo and Nikola. Approximately.
@@ -9,14 +11,50 @@ It probably won't be 100% identical, but I'll try to make it
as close as practical. as close as practical.
* Implemented in Ragel + C for performance * Implemented in Ragel + C for performance
* Allocates no memory, because all strings are references to * Allocates no memory, because all strings are references to
pieces of input. pieces of input.
What works: ## What works
* Detect shortcodes with names * Detect shortcodes with names
* Standalone and matched shortcodes * Standalone and matched shortcodes
* Capture data between tags in matched shortcodes * Capture data between tags in matched shortcodes
* Capture arguments with and without names * Capture arguments with and without names
* Capture values with and without quotes (with details, see TODO above) * Capture values with and without quotes (with details, see [TODO](TODO.md))
## Building
You need [Ragel](http://www.colm.net/open-source/ragel/) and a C compiler.
Ragel is used to generate `shortcodes.c` out of `shortcodes.rl`.
As a convenience there is a generated `shortcodes.c` in the repo,
Then:
```shell
cd src && make
```
To run tests:
```shell
crystal spec
```
## Installation
1. Add the dependency to your `shard.yml`:
```yaml
dependencies:
cr-discount:
github: ralsina/shortcodes
```
2. Run `shards install`
## Usage
```crystal
require "shortcodes"
```

View File

@@ -6,4 +6,7 @@ authors:
crystal: 1.8.2 crystal: 1.8.2
scripts:
postinstall: cd src && make shortcodes.o
license: MIT license: MIT

View File

@@ -1,9 +1,136 @@
require "./spec_helper" require "./spec_helper"
include Shortcodes
describe Shortcodes do describe "Shortcodes" do
# TODO: Write tests it "should parse empty string" do
result = parse("")
it "works" do result.shortcodes.size.should eq 0
false.should eq(true) result.errors.size.should eq 0
end end
it "should parse simeple shortcode" do
result = parse("foobar {{% shortcode %}}blah")
result.shortcodes.size.should eq 1
result.errors.size.should eq 0
result.shortcodes[0].name.should eq "shortcode"
result.shortcodes[0].matching.should eq 0
result.shortcodes[0].args.size.should eq 0
result.shortcodes[0].whole.should eq "{{% shortcode %}}"
end
it "should report mismatched tags" do
input = "foobar {{% shortcode %}}blah{{% /foo %}}"
result = parse(input)
result.shortcodes.size.should eq 1
result.errors.size.should eq 1
result.errors[0].code.should eq ERR_MISMATCHED_CLOSING_TAG
input[result.errors[0].position, 8].should eq "{{% /foo"
end
it "should report mismatched brackets" do
input = "foobar {{% shortcode >}}blah"
result = parse(input)
result.shortcodes.size.should eq 0
result.errors.size.should eq 1
result.errors[0].code.should eq ERR_MISMATCHED_BRACKET
input[result.errors[0].position, 3].should eq ">}}"
end
it "should accept mismatched brackets inside data are ok" do
input = "foobar {{% sc %}} >}}blah {{% /sc %}} "
result = parse(input)
result.shortcodes.size.should eq 1
result.errors.size.should eq 0
result.shortcodes[0].whole.should eq "{{% sc %}} >}}blah {{% /sc %}}"
result.shortcodes[0].data.should eq " >}}blah "
result.shortcodes[0].args.size.should eq 0
end
it "should accept mismatched brackets in qvals" do
input = "foobar {{% sc \">}}blah\" %}} {{% /sc %}}"
result = parse(input)
result.shortcodes.size.should eq 1
result.errors.size.should eq 0
result.shortcodes[0].whole.should eq "{{% sc \">}}blah\" %}} {{% /sc %}}"
result.shortcodes[0].args.size.should eq 1
result.shortcodes[0].args[0].@value.should eq ">}}blah"
end
it "should consider spaces in shortcodes optional" do
input = "foobar {{% shortcode%}}blah"
result = parse(input)
result.shortcodes.size.should eq 1
result.errors.size.should eq 0
result.shortcodes[0].name.should eq "shortcode"
result.shortcodes[0].matching.should eq 0
result.shortcodes[0].args.size.should eq 0
result.shortcodes[0].whole.should eq "{{% shortcode%}}"
end
it "should allow path-like names" do
input = "foobar {{% shortcode/foo/bar %}}blah"
result = parse(input)
result.shortcodes.size.should eq 1
result.errors.size.should eq 0
result.shortcodes[0].name.should eq "shortcode/foo/bar"
result.shortcodes[0].matching.should eq 0
result.shortcodes[0].args.size.should eq 0
result.shortcodes[0].whole.should eq "{{% shortcode/foo/bar %}}"
end
it "should parse multiple shortcodes" do
input = "foobar {{% shortcode %}}blah {{<sc2 >}}blahblah"
result = parse(input)
result.shortcodes.size.should eq 2
result.errors.size.should eq 0
result.shortcodes[0].name.should eq "shortcode"
result.shortcodes[0].matching.should eq 0
result.shortcodes[0].args.size.should eq 0
result.shortcodes[0].whole.should eq "{{% shortcode %}}"
result.shortcodes[1].name.should eq "sc2"
result.shortcodes[1].matching.should eq 0
result.shortcodes[1].args.size.should eq 0
result.shortcodes[1].whole.should eq "{{<sc2 >}}"
end
it "should parse matching shortcodes" do
input = "foobar {{% shortcode %}}blah {{% /shortcode %}} blah"
result = parse(input)
result.shortcodes.size.should eq 1
result.errors.size.should eq 0
result.shortcodes[0].name.should eq "shortcode"
result.shortcodes[0].matching.should eq 1
result.shortcodes[0].args.size.should eq 0
result.shortcodes[0].whole.should eq "{{% shortcode %}}blah {{% /shortcode %}}"
result.shortcodes[0].data.should eq "blah "
end
it "should parse shortcode args" do
input = "foobar {{% shortcode foo \"bar\" 42 bat=v1 baz=\"v2\" %}}blah"
result = parse(input)
result.shortcodes.size.should eq 1
result.errors.size.should eq 0
result.shortcodes[0].name.should eq "shortcode"
result.shortcodes[0].matching.should eq 0
result.shortcodes[0].args.size.should eq 5
result.shortcodes[0].args[0].name.should eq ""
result.shortcodes[0].args[0].value.should eq "foo"
result.shortcodes[0].args[1].name.should eq ""
result.shortcodes[0].args[1].value.should eq "bar"
result.shortcodes[0].args[2].name.should eq ""
result.shortcodes[0].args[2].value.should eq "42"
result.shortcodes[0].args[3].name.should eq "bat"
result.shortcodes[0].args[3].value.should eq "v1"
result.shortcodes[0].args[4].name.should eq "baz"
result.shortcodes[0].args[4].value.should eq "v2"
result.shortcodes[0].whole.should eq "{{% shortcode foo \"bar\" 42 bat=v1 baz=\"v2\" %}}"
end
# BUG?
# it "should ignore escaped shortcodes" do
# input = "foobar \\{{% shortcode %}}blah"
# result = parse(input)
# result.shortcodes.size.should eq 0
# result.errors.size.should eq 0
# end
end end

7
src/Makefile Normal file
View File

@@ -0,0 +1,7 @@
all: shortcodes.o
shortcodes.c: shortcodes.rl
ragel -G2 shortcodes.rl -o shortcodes.c
clean:
rm -f shortcodes.o
%o: %c
$(CC) -g -c -o $@ $^

1
src/run_tests.cr Normal file
View File

@@ -0,0 +1 @@
require "../spec/**"

3941
src/shortcodes.c Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -1,6 +1,118 @@
# TODO: Write documentation for `Shortcodes` @[Link(ldflags: "#{__DIR__}/shortcodes.o")]
module Shortcodes lib LibShortcodes
VERSION = "0.1.0" struct Chunk
start : UInt32
len : UInt32
end
# TODO: Put your code here struct ScError
position : UInt32
code : UInt32
end
struct Shortcode
whole : Chunk
name : Chunk
data : Chunk
matching : LibC::Char
argnames : Chunk[100]
argvals : Chunk[100]
argcount : UInt32
end
struct ScResult
shortcodes : Shortcode[100]
sccount : UInt32
errors : ScError[10]
errcount : UInt32
end
fun parse(input : Pointer(LibC::Char), len : UInt32) : ScResult
end
module Shortcodes
struct Arg
property name : String = ""
property value : String = ""
def initialize(@name, @value)
end
end
struct Error
property position : UInt32
property code : UInt32
def initialize(@position, @code)
end
end
struct Shortcode
property name : String = ""
property data : String = ""
property matching : Int32 = 0
property args : Array(Arg) = [] of Arg
property whole : String = ""
def initialize(
@name,
@data,
@matching,
@args,
@whole
)
end
end
struct Result
property shortcodes : Array(Shortcode) = [] of Shortcode
property errors : Array(Error) = [] of Error
end
def extract(c : LibShortcodes::Chunk, s : String)
s[c.start, c.len]
end
def parse(input : String)
r = LibShortcodes.parse(input.to_unsafe, input.bytesize)
result = Result.new
(0...r.sccount).each do |i|
sc = r.shortcodes[i]
args = [] of Arg
(0...sc.argcount).each do |j|
args << Arg.new(
extract(sc.argnames[j], input),
extract(sc.argvals[j], input),
)
end
result.shortcodes << Shortcode.new(
extract(sc.name, input),
extract(sc.data, input),
sc.matching,
args,
extract(sc.whole, input),
)
end
(0...r.errcount).each do |k|
result.errors << Error.new(
r.errors[k].position,
r.errors[k].code,
)
end
result
end
# ### Error codes
#
# You are closing the wrong shortcode.
# Example:
# {{% foo %}} {{% /bar %}}
ERR_MISMATCHED_CLOSING_TAG = 1
# You are using mismatched brackets.
# Example:
# {{% foo >}}
ERR_MISMATCHED_BRACKET = 2
end end

View File

@@ -5,7 +5,8 @@
// and goes on for len characters. // and goes on for len characters.
struct chunk struct chunk
{ {
unsigned int start, len; unsigned int start;
unsigned int len;
}; };
typedef struct chunk chunk; typedef struct chunk chunk;
@@ -56,4 +57,4 @@ Example:
#define ERR_MISMATCHED_BRACKET 2 #define ERR_MISMATCHED_BRACKET 2
sc_result parse(char *); sc_result parse(char *, unsigned int);

View File

@@ -110,18 +110,20 @@
}%% }%%
sc_result parse(char *input) { sc_result parse(char *input, unsigned int len) {
%%write data; %%write data;
char *eof, *ts, *te = 0; char *eof = input + len;
char *ts, *te = 0;
int cs, act = 0; int cs, act = 0;
char *start = input; char *start = input;
char *p = input; char *p = input;
char *pe = p + strlen(input); char *pe = p + len;
sc_result result; sc_result result;
shortcode *sc_list = result.sc; result.errcount = 0;
shortcode *sc_list = result.sc;
int c_sc = 0; int c_sc = 0;
char *mark = p; char *mark = p;

226
tests.c
View File

@@ -1,226 +0,0 @@
#include <cgreen/cgreen.h>
#include <bglibs/str.h>
#include "shortcodes.h"
Describe(parse);
BeforeEach(parse) {}
AfterEach(parse) {}
sc_result result;
str s;
void chunk_s(char *buffer, chunk c)
{
str_copyb(&s, buffer + c.start, c.len);
}
Ensure(parse, empty_string)
{
char *input = "";
result = parse(input);
// This means no shortcodes in it
assert_that(result.sccount, is_equal_to(0));
assert_that(result.errcount, is_equal_to(0));
}
Ensure(parse, simple_shortcode)
{
char *input = "foobar {{% shortcode %}}blah";
result = parse(input);
// Only 1 shortcode
assert_that(result.sccount, is_equal_to(1));
// It's a simple one called shortcode, no args
chunk_s(input, result.sc[0].name);
assert_that(s.s, is_equal_to_string("shortcode"));
assert_that(result.sc[0].matching, is_equal_to(0));
assert_that(result.sc[0].argcount, is_equal_to(0));
// The whole shortcode is the whole thing
chunk_s(input, result.sc[0].whole);
assert_that(s.s, is_equal_to_string("{{% shortcode %}}"));
assert_that(result.errcount, is_equal_to(0));
}
Ensure(parse, mismatched_tags)
{
char *input = "foobar {{% shortcode %}}blah{{% /foo %}}";
result = parse(input);
// One shortcode, one error
assert_that(result.sccount, is_equal_to(1));
assert_that(result.errcount, is_equal_to(1));
assert_that(result.errors[0].code, is_equal_to(ERR_MISMATCHED_CLOSING_TAG));
str_copyb(&s, input + result.errors[0].position, 8);
assert_that(s.s, is_equal_to_string("{{% /foo"));
}
Ensure(parse, mismatched_brackets)
{
char *input = "foobar {{% shortcode >}}blah";
result = parse(input);
// No shortcodes, 1 error
assert_that(result.sccount, is_equal_to(0));
assert_that(result.errcount, is_equal_to(1));
assert_that(result.errors[0].code, is_equal_to(ERR_MISMATCHED_BRACKET));
str_copyb(&s, input + result.errors[0].position, 3);
assert_that(s.s, is_equal_to_string(">}}"));
}
Ensure(parse, mismatched_brackets_inside_data_are_ok)
{
char *input = "foobar {{% sc %}} >}}blah {{% /sc %}} ";
result = parse(input);
// 1 shortcode
assert_that(result.sccount, is_equal_to(1));
chunk_s(input, result.sc[0].whole);
assert_that(s.s, is_equal_to_string("{{% sc %}} >}}blah {{% /sc %}}"));
chunk_s(input, result.sc[0].data);
assert_that(s.s, is_equal_to_string(" >}}blah "));
assert_that(result.errcount, is_equal_to(0));
}
Ensure(parse, mismatched_brackets_in_qval_are_ok)
{
char *input = "foobar {{% sc \">}}blah\" %}} {{% /sc %}}";
result = parse(input);
// 1 shortcode
assert_that(result.sccount, is_equal_to(1));
chunk_s(input, result.sc[0].whole);
assert_that(s.s, is_equal_to_string("{{% sc \">}}blah\" %}} {{% /sc %}}"));
chunk_s(input, result.sc[0].argvals[0]);
assert_that(s.s, is_equal_to_string(">}}blah"));
assert_that(result.errcount, is_equal_to(0));
}
Ensure(parse, inner_spaces_optional)
{
char *input = "foobar {{% shortcode%}}blah";
result = parse(input);
// Only 1 shortcode
assert_that(result.sccount, is_equal_to(1));
// It's a simple one called shortcode, no args
chunk_s(input, result.sc[0].name);
assert_that(s.s, is_equal_to_string("shortcode"));
assert_that(result.sc[0].matching, is_equal_to(0));
assert_that(result.sc[0].argcount, is_equal_to(0));
// The whole shortcode is the whole thing
chunk_s(input, result.sc[0].whole);
assert_that(s.s, is_equal_to_string("{{% shortcode%}}"));
assert_that(result.errcount, is_equal_to(0));
}
Ensure(parse, name_can_be_path)
{
char *input = "foobar {{% shortcode/foo/bar %}}blah";
result = parse(input);
// Only 1 shortcode
assert_that(result.sccount, is_equal_to(1));
// It's a simple one called shortcode, no args
chunk_s(input, result.sc[0].name);
assert_that(s.s, is_equal_to_string("shortcode/foo/bar"));
assert_that(result.sc[0].matching, is_equal_to(0));
assert_that(result.sc[0].argcount, is_equal_to(0));
assert_that(result.errcount, is_equal_to(0));
}
Ensure(parse, multiple_shortcodes)
{
char *input = "foobar {{% shortcode %}}blah {{<sc2 >}}blahblah";
result = parse(input);
// 2 shortcodes
assert_that(result.sccount, is_equal_to(2));
// It's a simple one called shortcode, no args
chunk_s(input, result.sc[0].name);
assert_that(s.s, is_equal_to_string("shortcode"));
assert_that(result.sc[0].matching, is_equal_to(0));
assert_that(result.sc[0].argcount, is_equal_to(0));
// The whole shortcode is the whole thing
chunk_s(input, result.sc[0].whole);
assert_that(s.s, is_equal_to_string("{{% shortcode %}}"));
// It's a simple one called sc2, no args
chunk_s(input, result.sc[1].name);
assert_that(s.s, is_equal_to_string("sc2"));
assert_that(result.sc[1].matching, is_equal_to(0));
assert_that(result.sc[1].argcount, is_equal_to(0));
// The whole shortcode is the whole thing
chunk_s(input, result.sc[1].whole);
assert_that(s.s, is_equal_to_string("{{<sc2 >}}"));
assert_that(result.errcount, is_equal_to(0));
}
Ensure(parse, matching_shortcode)
{
char *input = "blah {{% shortcode %}}foo bar{{% /shortcode %}} blah";
result = parse(input);
// Only 1 shortcode
assert_that(result.sccount, is_equal_to(1));
// It's a matching one called shortcode, no args
chunk_s(input, result.sc[0].name);
assert_that(s.s, is_equal_to_string("shortcode"));
assert_that(result.sc[0].matching, is_equal_to(1));
assert_that(result.sc[0].argcount, is_equal_to(0));
// data is the stuff between the shortcode tags
chunk_s(input, result.sc[0].data);
assert_that(s.s, is_equal_to_string("foo bar"));
// The whole shortcode is the whole thing
chunk_s(input, result.sc[0].whole);
assert_that(s.s, is_equal_to_string("{{% shortcode %}}foo bar{{% /shortcode %}}"));
assert_that(result.errcount, is_equal_to(0));
}
Ensure(parse, shortcode_args)
{
char *input = "foobar {{% shortcode foo \"bar\" 42 bat=v1 baz=\"v2\" %}}blah";
result = parse(input);
// Only 1 shortcode
assert_that(result.sccount, is_equal_to(1));
// The whole shortcode is the whole thing
chunk_s(input, result.sc[0].whole);
assert_that(s.s, is_equal_to_string("{{% shortcode foo \"bar\" 42 bat=v1 baz=\"v2\" %}}"));
// Name is shortcode
chunk_s(input, result.sc[0].name);
assert_that(s.s, is_equal_to_string("shortcode"));
assert_that(result.sc[0].matching, is_equal_to(0));
// Has 5 args
assert_that(result.sc[0].argcount, is_equal_to(5));
// Arg1 is foo, no name
assert_that(result.sc[0].argnames[0].len, is_equal_to(0));
chunk_s(input, result.sc[0].argvals[0]);
assert_that(s.s, is_equal_to_string("foo"));
// Arg2 is bar, no name
assert_that(result.sc[0].argnames[1].len, is_equal_to(0));
chunk_s(input, result.sc[0].argvals[1]);
assert_that(s.s, is_equal_to_string("bar"));
// Arg3 is 42, no name
assert_that(result.sc[0].argnames[2].len, is_equal_to(0));
chunk_s(input, result.sc[0].argvals[2]);
assert_that(s.s, is_equal_to_string("42"));
// Arg4 is bat=v1
chunk_s(input, result.sc[0].argnames[3]);
assert_that(s.s, is_equal_to_string("bat"));
chunk_s(input, result.sc[0].argvals[3]);
assert_that(s.s, is_equal_to_string("v1"));
// Arg5 is baz=v2
chunk_s(input, result.sc[0].argnames[4]);
assert_that(s.s, is_equal_to_string("baz"));
chunk_s(input, result.sc[0].argvals[4]);
assert_that(s.s, is_equal_to_string("v2"));
assert_that(result.errcount, is_equal_to(0));
}
// BUG?
// Ensure(parse, escaped_shortcode)
// {
// char *input = "foobar \\{{% shortcode %}}";
// result = parse(input);
// // No shortcodes
// assert_that(result.sc[0].name.len, is_equal_to(0));
// }