diff --git a/internal/tokenizer/tokenize.go b/internal/tokenizer/tokenize.go index dadbccd..d7f1c43 100644 --- a/internal/tokenizer/tokenize.go +++ b/internal/tokenizer/tokenize.go @@ -1,3 +1,6 @@ +// Package tokenizer implements file tokenization used by the enry file +// classifier. This package is an implementation detail of enry and should not +// be imported by other packages. package tokenizer import ( @@ -8,6 +11,9 @@ import ( const byteLimit = 100000 +// Tokenize returns classification tokens from content. The tokens returned +// should match what the Linguist library returns. At most the first 100KB of +// content are tokenized. func Tokenize(content []byte) []string { if len(content) > byteLimit { content = content[:byteLimit]