Skip to content

Commit f3ceaa6

Browse files
committed
token: refactor & simplify test fixtures
Signed-off-by: Alexander Bezzubov <[email protected]>
1 parent 9a7b370 commit f3ceaa6

File tree

1 file changed

+8
-8
lines changed

1 file changed

+8
-8
lines changed

internal/tokenizer/tokenize_test.go

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -127,23 +127,23 @@ func TestTokenizerLatin1AsUtf8(t *testing.T) {
127127

128128
func TestRegexpOnInvalidUtf8(t *testing.T) {
129129
origContent := []struct {
130-
bytes []byte
130+
text string
131131
tokens []string
132132
}{
133-
{[]byte("th\xe0 filling"), []string{"th", "filling"}}, // `th� filling`
134-
{[]byte("th\u0100 filling"), []string{"th", "filling"}}, // `thĀ filling`
135-
{[]byte("привет, как дела?"), []string{}}, // empty, no ASCII tokens
133+
{"th\xe0 filling", []string{"th", "filling"}}, // `th� filling`
134+
{"th\u0100 filling", []string{"th", "filling"}}, // `thĀ filling`
135+
{"привет, как дела?", []string{}}, // empty, no ASCII tokens
136136
}
137137
re := reRegularToken
138138

139139
for _, content := range origContent {
140140
t.Run("", func(t *testing.T) {
141-
t.Logf("%v - %q", content, string(content.bytes))
142-
143-
tokens := re.FindAll(content.bytes, -1)
141+
t.Logf("%v - %q", content, content.text)
142+
input := []byte(content.text)
143+
tokens := re.FindAll(input, -1)
144144
require.Equal(t, len(content.tokens), len(tokens))
145145

146-
newContent := re.ReplaceAll(content.bytes, []byte(` `))
146+
newContent := re.ReplaceAll(input, []byte(` `))
147147
t.Logf("content:%q, tokens:[", newContent)
148148
for i, token := range tokens {
149149
t.Logf("\t%q,", string(token))

0 commit comments

Comments
 (0)