mirror of
https://github.com/ralsina/tartrazine.git
synced 2025-05-23 16:40:08 -03:00
Add documentation comments to package tokenizer.
Although this package is internal, it still exports an API and deserves some comments. Serves in partial satisfaction of #195. Signed-off-by: M. J. Fromberger <michael.j.fromberger@gmail.com>
This commit is contained in:
parent
260dcfe002
commit
4027b494b3
@ -1,3 +1,6 @@
|
|||||||
|
// Package tokenizer implements file tokenization used by the enry file
|
||||||
|
// classifier. This package is an implementation detail of enry and should not
|
||||||
|
// be imported by other packages.
|
||||||
package tokenizer
|
package tokenizer
|
||||||
|
|
||||||
import (
|
import (
|
||||||
@ -8,6 +11,9 @@ import (
|
|||||||
|
|
||||||
const byteLimit = 100000
|
const byteLimit = 100000
|
||||||
|
|
||||||
|
// Tokenize returns classification tokens from content. The tokens returned
|
||||||
|
// should match what the Linguist library returns. At most the first 100KB of
|
||||||
|
// content are tokenized.
|
||||||
func Tokenize(content []byte) []string {
|
func Tokenize(content []byte) []string {
|
||||||
if len(content) > byteLimit {
|
if len(content) > byteLimit {
|
||||||
content = content[:byteLimit]
|
content = content[:byteLimit]
|
||||||
|
Loading…
x
Reference in New Issue
Block a user