mirror of
https://github.com/ralsina/tartrazine.git
synced 2024-11-10 13:32:24 +00:00
Merge pull request #198 from creachadair/indocumentado
Add documentation comments to package tokenizer.
This commit is contained in:
commit
fe18dc0830
@ -1,3 +1,6 @@
|
||||
// Package tokenizer implements file tokenization used by the enry content
|
||||
// classifier. This package is an implementation detail of enry and should not
|
||||
// be imported by other packages.
|
||||
package tokenizer
|
||||
|
||||
import (
|
||||
@ -8,6 +11,9 @@ import (
|
||||
|
||||
const byteLimit = 100000
|
||||
|
||||
// Tokenize returns language-agnostic lexical tokens from content. The tokens
|
||||
// returned should match what the Linguist library returns. At most the first
|
||||
// 100KB of content are tokenized.
|
||||
func Tokenize(content []byte) []string {
|
||||
if len(content) > byteLimit {
|
||||
content = content[:byteLimit]
|
||||
|
Loading…
Reference in New Issue
Block a user