Copy the tokenizer input to avoid modifying the caller's copy.

Addresses #196. Several of the tokenizer's processing steps wind up editing the
source, and we don't want those changes to be observed by the caller, which may
use the source for other purposes afterward.

Signed-off-by: M. J. Fromberger <michael.j.fromberger@gmail.com>
This commit is contained in:
M. J. Fromberger 2019-01-29 10:12:33 -08:00
parent 169060e1cd
commit 7d277b11de

View File

@ -13,6 +13,10 @@ func Tokenize(content []byte) []string {
content = content[:byteLimit] content = content[:byteLimit]
} }
// Copy the input so that changes wrought by the tokenization steps do not
// modify the caller's copy of the input. See #196.
content = append([]byte(nil), content...)
tokens := make([][]byte, 0, 50) tokens := make([][]byte, 0, 50)
for _, extract := range extractTokens { for _, extract := range extractTokens {
var extractedTokens [][]byte var extractedTokens [][]byte