@@ -127,23 +127,23 @@ func TestTokenizerLatin1AsUtf8(t *testing.T) {
127127
128128func TestRegexpOnInvalidUtf8 (t * testing.T ) {
129129 origContent := []struct {
130- bytes [] byte
130+ text string
131131 tokens []string
132132 }{
133- {[] byte ( "th\xe0 filling" ) , []string {"th" , "filling" }}, // `th� filling`
134- {[] byte ( "th\u0100 filling" ) , []string {"th" , "filling" }}, // `thĀ filling`
135- {[] byte ( "привет, как дела?" ) , []string {}}, // empty, no ASCII tokens
133+ {"th\xe0 filling" , []string {"th" , "filling" }}, // `th� filling`
134+ {"th\u0100 filling" , []string {"th" , "filling" }}, // `thĀ filling`
135+ {"привет, как дела?" , []string {}}, // empty, no ASCII tokens
136136 }
137137 re := reRegularToken
138138
139139 for _ , content := range origContent {
140140 t .Run ("" , func (t * testing.T ) {
141- t .Logf ("%v - %q" , content , string ( content .bytes ) )
142-
143- tokens := re .FindAll (content . bytes , - 1 )
141+ t .Logf ("%v - %q" , content , content .text )
142+ input := [] byte ( content . text )
143+ tokens := re .FindAll (input , - 1 )
144144 require .Equal (t , len (content .tokens ), len (tokens ))
145145
146- newContent := re .ReplaceAll (content . bytes , []byte (` ` ))
146+ newContent := re .ReplaceAll (input , []byte (` ` ))
147147 t .Logf ("content:%q, tokens:[" , newContent )
148148 for i , token := range tokens {
149149 t .Logf ("\t %q," , string (token ))
0 commit comments