tartrazine/internal/tokenizer/tokenize_c.go
Alexander Bezzubov ada6f15c93
address review feedback
Signed-off-by: Alexander Bezzubov <bzz@apache.org>
2019-04-16 19:38:48 +02:00

18 lines
564 B
Go

// +build flex
package tokenizer
import "gopkg.in/src-d/enry.v1/internal/tokenizer/flex"
// Tokenize returns lexical tokens from content. The tokens returned match what
// the Linguist library returns. At most the first ByteLimit bytes of content are tokenized.
// Splitting at a byte offset means it might partition a last multibyte unicode character
// in the middle of a token (but it should not affect results).
func Tokenize(content []byte) []string {
if len(content) > ByteLimit {
content = content[:ByteLimit]
}
return flex.TokenizeFlex(content)
}