From 6c7b91cb91e382f6beb17c7fe4ee0e370e505f43 Mon Sep 17 00:00:00 2001 From: Alexander Bezzubov Date: Tue, 16 Apr 2019 13:05:45 +0200 Subject: [PATCH] doc: improve API doc on review feedback Signed-off-by: Alexander Bezzubov --- internal/tokenizer/tokenize.go | 8 +++++--- internal/tokenizer/tokenize_c.go | 2 -- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/internal/tokenizer/tokenize.go b/internal/tokenizer/tokenize.go index f4d5575..e7303bc 100644 --- a/internal/tokenizer/tokenize.go +++ b/internal/tokenizer/tokenize.go @@ -8,9 +8,11 @@ import ( "gopkg.in/src-d/enry.v1/regex" ) -// Tokenize returns lexical tokens from content. The tokens returned should match what -// the Linguist library returns (but they are not, until https://github.com/src-d/enry/issues/193). -// At most the first ByteLimit bytes of content are tokenized. +// Tokenize returns lexical tokens from content. The tokens returned match what +// the Linguist library returns. At most the first ByteLimit bytes of content are tokenized. +// +// BUG: Until https://github.com/src-d/enry/issues/193 is resolved, there are some +// differences between this function and the Linguist output. func Tokenize(content []byte) []string { if len(content) > ByteLimit { content = content[:ByteLimit] diff --git a/internal/tokenizer/tokenize_c.go b/internal/tokenizer/tokenize_c.go index 3ebf2a5..2d640ab 100644 --- a/internal/tokenizer/tokenize_c.go +++ b/internal/tokenizer/tokenize_c.go @@ -6,8 +6,6 @@ import "gopkg.in/src-d/enry.v1/internal/tokenizer/flex" // Tokenize returns lexical tokens from content. The tokens returned match what // the Linguist library returns. At most the first ByteLimit bytes of content are tokenized. -// Splitting at a byte offset means it might partition a last multibyte unicode character -// in the middle of a token (but it should not affect results). func Tokenize(content []byte) []string { if len(content) > ByteLimit { content = content[:ByteLimit]