mirror of
https://github.com/ralsina/tartrazine.git
synced 2025-05-23 08:30:07 -03:00
doc: improve API doc on review feedback
Signed-off-by: Alexander Bezzubov <bzz@apache.org>
This commit is contained in:
parent
ada6f15c93
commit
6c7b91cb91
@ -8,9 +8,11 @@ import (
|
||||
"gopkg.in/src-d/enry.v1/regex"
|
||||
)
|
||||
|
||||
// Tokenize returns lexical tokens from content. The tokens returned should match what
|
||||
// the Linguist library returns (but they are not, until https://github.com/src-d/enry/issues/193).
|
||||
// At most the first ByteLimit bytes of content are tokenized.
|
||||
// Tokenize returns lexical tokens from content. The tokens returned match what
|
||||
// the Linguist library returns. At most the first ByteLimit bytes of content are tokenized.
|
||||
//
|
||||
// BUG: Until https://github.com/src-d/enry/issues/193 is resolved, there are some
|
||||
// differences between this function and the Linguist output.
|
||||
func Tokenize(content []byte) []string {
|
||||
if len(content) > ByteLimit {
|
||||
content = content[:ByteLimit]
|
||||
|
@ -6,8 +6,6 @@ import "gopkg.in/src-d/enry.v1/internal/tokenizer/flex"
|
||||
|
||||
// Tokenize returns lexical tokens from content. The tokens returned match what
|
||||
// the Linguist library returns. At most the first ByteLimit bytes of content are tokenized.
|
||||
// Splitting at a byte offset means it might partition a last multibyte unicode character
|
||||
// in the middle of a token (but it should not affect results).
|
||||
func Tokenize(content []byte) []string {
|
||||
if len(content) > ByteLimit {
|
||||
content = content[:ByteLimit]
|
||||
|
Loading…
x
Reference in New Issue
Block a user