From 4027b494b3b969736d0dd12f9e3c09ff776b9384 Mon Sep 17 00:00:00 2001 From: "M. J. Fromberger" Date: Tue, 29 Jan 2019 10:41:06 -0800 Subject: [PATCH] Add documentation comments to package tokenizer. Although this package is internal, it still exports an API and deserves some comments. Serves in partial satisfaction of #195. Signed-off-by: M. J. Fromberger --- internal/tokenizer/tokenize.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/internal/tokenizer/tokenize.go b/internal/tokenizer/tokenize.go index dadbccd..d7f1c43 100644 --- a/internal/tokenizer/tokenize.go +++ b/internal/tokenizer/tokenize.go @@ -1,3 +1,6 @@ +// Package tokenizer implements file tokenization used by the enry file +// classifier. This package is an implementation detail of enry and should not +// be imported by other packages. package tokenizer import ( @@ -8,6 +11,9 @@ import ( const byteLimit = 100000 +// Tokenize returns classification tokens from content. The tokens returned +// should match what the Linguist library returns. At most the first 100KB of +// content are tokenized. func Tokenize(content []byte) []string { if len(content) > byteLimit { content = content[:byteLimit]