mirror of
https://github.com/ralsina/tartrazine.git
synced 2025-08-02 22:09:49 +00:00
token: new test case with Unicode replacement
Signed-off-by: Alexander Bezzubov <bzz@apache.org>
This commit is contained in:
@@ -115,6 +115,13 @@ func TestTokenize(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestTokenizerLatin1AsUtf8(t *testing.T) {
|
||||||
|
content := []byte("th\xe5 filling") // `th<74> filling`
|
||||||
|
t.Logf("%v - %q", content, string(content))
|
||||||
|
tokens := Tokenize(content)
|
||||||
|
require.Equal(t, 3, len(tokens))
|
||||||
|
}
|
||||||
|
|
||||||
func BenchmarkTokenizer_BaselineCopy(b *testing.B) {
|
func BenchmarkTokenizer_BaselineCopy(b *testing.B) {
|
||||||
b.ReportAllocs()
|
b.ReportAllocs()
|
||||||
for i := 0; i < b.N; i++ {
|
for i := 0; i < b.N; i++ {
|
||||||
|
Reference in New Issue
Block a user