mirror of
https://github.com/ralsina/tartrazine.git
synced 2025-05-23 08:30:07 -03:00
token: new test case with Unicode replacement
Signed-off-by: Alexander Bezzubov <bzz@apache.org>
This commit is contained in:
parent
278eaf1c22
commit
8bdc830833
@ -115,6 +115,13 @@ func TestTokenize(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestTokenizerLatin1AsUtf8(t *testing.T) {
|
||||
content := []byte("th\xe5 filling") // `th<74> filling`
|
||||
t.Logf("%v - %q", content, string(content))
|
||||
tokens := Tokenize(content)
|
||||
require.Equal(t, 3, len(tokens))
|
||||
}
|
||||
|
||||
func BenchmarkTokenizer_BaselineCopy(b *testing.B) {
|
||||
b.ReportAllocs()
|
||||
for i := 0; i < b.N; i++ {
|
||||
|
Loading…
x
Reference in New Issue
Block a user