From 6c7b91cb91e382f6beb17c7fe4ee0e370e505f43 Mon Sep 17 00:00:00 2001
From: Alexander Bezzubov <bzz@apache.org>
Date: Tue, 16 Apr 2019 13:05:45 +0200
Subject: [PATCH] doc: improve API doc on review feedback

Signed-off-by: Alexander Bezzubov <bzz@apache.org>
---
 internal/tokenizer/tokenize.go   | 8 +++++---
 internal/tokenizer/tokenize_c.go | 2 --
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/internal/tokenizer/tokenize.go b/internal/tokenizer/tokenize.go
index f4d5575..e7303bc 100644
--- a/internal/tokenizer/tokenize.go
+++ b/internal/tokenizer/tokenize.go
@@ -8,9 +8,11 @@ import (
 	"gopkg.in/src-d/enry.v1/regex"
 )
 
-// Tokenize returns lexical tokens from content. The tokens returned should match what
-// the Linguist library returns (but they are not, until https://github.com/src-d/enry/issues/193).
-// At most the first ByteLimit bytes of content are tokenized.
+// Tokenize returns lexical tokens from content. The tokens returned match what
+// the Linguist library returns. At most the first ByteLimit bytes of content are tokenized.
+//
+// BUG: Until https://github.com/src-d/enry/issues/193 is resolved, there are some
+// differences between this function and the Linguist output.
 func Tokenize(content []byte) []string {
 	if len(content) > ByteLimit {
 		content = content[:ByteLimit]
diff --git a/internal/tokenizer/tokenize_c.go b/internal/tokenizer/tokenize_c.go
index 3ebf2a5..2d640ab 100644
--- a/internal/tokenizer/tokenize_c.go
+++ b/internal/tokenizer/tokenize_c.go
@@ -6,8 +6,6 @@ import "gopkg.in/src-d/enry.v1/internal/tokenizer/flex"
 
 // Tokenize returns lexical tokens from content. The tokens returned match what
 // the Linguist library returns. At most the first ByteLimit bytes of content are tokenized.
-// Splitting at a byte offset means it might partition a last multibyte unicode character
-// in the middle of a token (but it should not affect results).
 func Tokenize(content []byte) []string {
 	if len(content) > ByteLimit {
 		content = content[:ByteLimit]