mirror of
https://github.com/ralsina/tartrazine.git
synced 2025-06-19 14:43:05 -03:00
address review feedback
Signed-off-by: Alexander Bezzubov <bzz@apache.org>
This commit is contained in:
@ -4,12 +4,13 @@ package tokenizer
|
||||
|
||||
import "gopkg.in/src-d/enry.v1/internal/tokenizer/flex"
|
||||
|
||||
// Tokenize returns language-agnostic lexical tokens from content. The tokens
|
||||
// returned should match what the Linguist library returns. At most the first
|
||||
// 100KB of content are tokenized.
|
||||
// Tokenize returns lexical tokens from content. The tokens returned match what
|
||||
// the Linguist library returns. At most the first ByteLimit bytes of content are tokenized.
|
||||
// Splitting at a byte offset means it might partition a last multibyte unicode character
|
||||
// in the middle of a token (but it should not affect results).
|
||||
func Tokenize(content []byte) []string {
|
||||
if len(content) > byteLimit {
|
||||
content = content[:byteLimit]
|
||||
if len(content) > ByteLimit {
|
||||
content = content[:ByteLimit]
|
||||
}
|
||||
|
||||
return flex.TokenizeFlex(content)
|
||||
|
Reference in New Issue
Block a user