mirror of
https://github.com/ralsina/tartrazine.git
synced 2024-11-12 22:42:23 +00:00
ada6f15c93
Signed-off-by: Alexander Bezzubov <bzz@apache.org>
18 lines
564 B
Go
18 lines
564 B
Go
// +build flex
|
|
|
|
package tokenizer
|
|
|
|
import "gopkg.in/src-d/enry.v1/internal/tokenizer/flex"
|
|
|
|
// Tokenize returns lexical tokens from content. The tokens returned match what
|
|
// the Linguist library returns. At most the first ByteLimit bytes of content are tokenized.
|
|
// Splitting at a byte offset means it might partition a last multibyte unicode character
|
|
// in the middle of a token (but it should not affect results).
|
|
func Tokenize(content []byte) []string {
|
|
if len(content) > ByteLimit {
|
|
content = content[:ByteLimit]
|
|
}
|
|
|
|
return flex.TokenizeFlex(content)
|
|
}
|