mirror of
https://github.com/ralsina/tartrazine.git
synced 2024-09-20 15:31:21 +00:00
Merge pull request #197 from creachadair/muckthebits
Prevent tokenization from modifying its input.
This commit is contained in:
commit
260dcfe002
@ -13,6 +13,10 @@ func Tokenize(content []byte) []string {
|
|||||||
content = content[:byteLimit]
|
content = content[:byteLimit]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Copy the input so that changes wrought by the tokenization steps do not
|
||||||
|
// modify the caller's copy of the input. See #196.
|
||||||
|
content = append([]byte(nil), content...)
|
||||||
|
|
||||||
tokens := make([][]byte, 0, 50)
|
tokens := make([][]byte, 0, 50)
|
||||||
for _, extract := range extractTokens {
|
for _, extract := range extractTokens {
|
||||||
var extractedTokens [][]byte
|
var extractedTokens [][]byte
|
||||||
|
@ -102,7 +102,10 @@ func TestTokenize(t *testing.T) {
|
|||||||
|
|
||||||
for _, test := range tests {
|
for _, test := range tests {
|
||||||
t.Run(test.name, func(t *testing.T) {
|
t.Run(test.name, func(t *testing.T) {
|
||||||
|
before := string(test.content)
|
||||||
tokens := Tokenize(test.content)
|
tokens := Tokenize(test.content)
|
||||||
|
after := string(test.content)
|
||||||
|
assert.Equal(t, before, after, "the input slice was modified")
|
||||||
assert.Equal(t, len(test.expected), len(tokens), fmt.Sprintf("token' slice length = %v, want %v", len(test.expected), len(tokens)))
|
assert.Equal(t, len(test.expected), len(tokens), fmt.Sprintf("token' slice length = %v, want %v", len(test.expected), len(tokens)))
|
||||||
for i, expectedToken := range test.expected {
|
for i, expectedToken := range test.expected {
|
||||||
assert.Equal(t, expectedToken, tokens[i], fmt.Sprintf("token = %v, want %v", tokens[i], expectedToken))
|
assert.Equal(t, expectedToken, tokens[i], fmt.Sprintf("token = %v, want %v", tokens[i], expectedToken))
|
||||||
|
Loading…
Reference in New Issue
Block a user