diff --git a/README.md b/README.md index 425a66e..f2963eb 100644 --- a/README.md +++ b/README.md @@ -4,17 +4,17 @@ Tartrazine is a library to syntax-highlight code. It is a port of [Pygments](https://pygments.org/) to [Crystal](https://crystal-lang.org/). Kind of. -It's not currently usable because it's not finished, but: - -* The lexers work for the implemented languages -* The provided styles work -* There is a very very simple HTML formatter +The CLI tool can be used to highlight many things in many styles. # A port of what? Why "kind of"? -Because I did not read the Pygments code. And this is actually -based on [Chroma](https://github.com/alecthomas/chroma) ... -although I did not read that code either. +Pygments is a staple of the Python ecosystem, and it's great. +It lets you highlight code in many languages, and it has many +themes. Chroma is "Pygments for Go", it's actually a port of +Pygments to Go, and it's great too. + +I wanted that in Crystal, so I started this project. But I did +not read much of the Pygments code. Or much of Chroma's. Chroma has taken most of the Pygments lexers and turned them into XML descriptions. What I did was take those XML files from Chroma diff --git a/spec/tartrazine_spec.cr b/spec/tartrazine_spec.cr index 36662bc..5125129 100644 --- a/spec/tartrazine_spec.cr +++ b/spec/tartrazine_spec.cr @@ -14,6 +14,7 @@ unicode_problems = { "#{__DIR__}/tests/java/test_string_literals.txt", "#{__DIR__}/tests/json/test_strings.txt", "#{__DIR__}/tests/systemd/example1.txt", + "#{__DIR__}/tests/c++/test_unicode_identifiers.txt", } # These testcases fail because of differences in the way chroma and tartrazine tokenize diff --git a/src/bytes_regex.cr b/src/bytes_regex.cr index 72b1151..656a299 100644 --- a/src/bytes_regex.cr +++ b/src/bytes_regex.cr @@ -3,7 +3,7 @@ module BytesRegex class Regex def initialize(pattern : String, multiline = false, dotall = false, ignorecase = false, anchored = false) - flags = LibPCRE2::UTF | LibPCRE2::DUPNAMES | LibPCRE2::UCP + flags = LibPCRE2::UTF | LibPCRE2::DUPNAMES | LibPCRE2::UCP | LibPCRE2::NO_UTF_CHECK flags |= LibPCRE2::MULTILINE if multiline flags |= LibPCRE2::DOTALL if dotall flags |= LibPCRE2::CASELESS if ignorecase @@ -36,7 +36,7 @@ module BytesRegex str, str.size, pos, - 0, + LibPCRE2::NO_UTF_CHECK, match_data, nil) if rc < 0