Merge pull request #198 from creachadair/indocumentado

Add documentation comments to package tokenizer.
This commit is contained in:
Alexander 2019-01-29 21:18:56 +01:00 committed by GitHub
commit fe18dc0830
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -1,3 +1,6 @@
// Package tokenizer implements file tokenization used by the enry content
// classifier. This package is an implementation detail of enry and should not
// be imported by other packages.
package tokenizer
import (
@ -8,6 +11,9 @@ import (
const byteLimit = 100000
// Tokenize returns language-agnostic lexical tokens from content. The tokens
// returned should match what the Linguist library returns. At most the first
// 100KB of content are tokenized.
func Tokenize(content []byte) []string {
if len(content) > byteLimit {
content = content[:byteLimit]