We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
2 parents 260dcfe + 5245079 commit fe18dc0Copy full SHA for fe18dc0
internal/tokenizer/tokenize.go
@@ -1,3 +1,6 @@
1
+// Package tokenizer implements file tokenization used by the enry content
2
+// classifier. This package is an implementation detail of enry and should not
3
+// be imported by other packages.
4
package tokenizer
5
6
import (
@@ -8,6 +11,9 @@ import (
8
11
9
12
const byteLimit = 100000
10
13
14
+// Tokenize returns language-agnostic lexical tokens from content. The tokens
15
+// returned should match what the Linguist library returns. At most the first
16
+// 100KB of content are tokenized.
17
func Tokenize(content []byte) []string {
18
if len(content) > byteLimit {
19
content = content[:byteLimit]
0 commit comments