mirror of
https://github.com/ralsina/tartrazine.git
synced 2025-06-18 22:23:07 -03:00
tokenizer: hide flex-based impl, avoid build failures on win
TestPlan: - go test -run TestTokenize ./internal/tokenizer - go test -tags flex -run TestTokenize ./internal/tokenizer (shold fail as default fixtures are from regex-based tokenizer)
This commit is contained in:
@ -1,3 +1,5 @@
|
|||||||
|
// +build flex
|
||||||
|
|
||||||
package flex
|
package flex
|
||||||
|
|
||||||
// #include <stdlib.h>
|
// #include <stdlib.h>
|
||||||
|
@ -4,6 +4,7 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
|
"github.com/go-enry/go-enry/v2/regex"
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
)
|
)
|
||||||
@ -136,7 +137,7 @@ func TestRegexpOnInvalidUtf8(t *testing.T) {
|
|||||||
{"th\u0100 filling", []string{"th", "filling"}}, // `thĀ filling`
|
{"th\u0100 filling", []string{"th", "filling"}}, // `thĀ filling`
|
||||||
{"привет, как дела?", []string{}}, // empty, no ASCII tokens
|
{"привет, как дела?", []string{}}, // empty, no ASCII tokens
|
||||||
}
|
}
|
||||||
re := reRegularToken
|
re := regex.MustCompile(`[0-9A-Za-z_\.@#\/\*]+`) // a reRegularToken from tokenizer.go
|
||||||
|
|
||||||
for _, content := range origContent {
|
for _, content := range origContent {
|
||||||
t.Run("", func(t *testing.T) {
|
t.Run("", func(t *testing.T) {
|
||||||
|
Reference in New Issue
Block a user