Skip to content

Commit c34b715

Browse files
junklib: Fix the regression caused by the previous commit / failing test.
Added a new utf8 string copy function `u8_strnbcpy_size`, that can handle zero-separated multi-value strings.
1 parent 531323e commit c34b715

3 files changed

Lines changed: 43 additions & 6 deletions

File tree

src/junklib.c

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -552,12 +552,8 @@ ddb_iconv (const char *cs_out, const char *cs_in, char *out, int outlen, const c
552552
// to utf8 branch
553553
if (!strcasecmp (cs_out, UTF8_STR)) {
554554
if (!strcasecmp (cs_in, UTF8_STR)) {
555-
int result = u8_strnbcpy(out, in, outlen-1);
556-
out[result] = 0;
557-
int valid = u8_valid (out, result, NULL);
558-
if (valid) {
559-
len = result;
560-
}
555+
int result = u8_strnbcpy_size(out, in, inlen, outlen);
556+
len = result;
561557
}
562558
else if (!strcasecmp (cs_in, "cp1251")) {
563559
len = cp1251_to_utf8 (in, inlen, out, outlen);

src/utf8.c

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,40 @@ int u8_strnbcpy (char *dest, const char* src, int num_bytes) {
278278
return nb - num_bytes;
279279
}
280280

281+
/* Copies as many characters from src as would not exceed num_bytes in the destination */
282+
/* Uses input_size as the input size. */
283+
/* \0 characters are copied as-is. */
284+
/* returns the number of bytes copied. */
285+
int u8_strnbcpy_size (char *dest, const char* src, int input_size, int num_bytes) {
286+
int32_t prev_index = 0;
287+
int32_t index = 0;
288+
int32_t nb = num_bytes;
289+
290+
while (index <= input_size && num_bytes > 0 && dest) {
291+
if (src[index] == 0) {
292+
*dest++ = 0;
293+
num_bytes--;
294+
prev_index = ++index;
295+
continue;
296+
}
297+
u8_inc (src, &index);
298+
if (index > input_size) {
299+
break;
300+
}
301+
int32_t charlen = index - prev_index;
302+
if (charlen > num_bytes) {
303+
break;
304+
}
305+
memcpy (dest, &src[prev_index], charlen);
306+
prev_index = index;
307+
dest += charlen;
308+
num_bytes -= charlen;
309+
}
310+
return nb - num_bytes;
311+
}
312+
313+
314+
281315
/* copies a character from src to dest provided it does not exceed num_bytes */
282316
/* returns the number of bytes copied, not counting a null terminator, which is not written */
283317
int u8_charcpy (char *dest, const char *src, int num_bytes) {

src/utf8.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,13 @@ int u8_strncpy (char *dest, const char* src, int num_chars);
6666
*/
6767
int u8_strnbcpy (char *dest, const char* src, int num_bytes);
6868

69+
/* Copies as many characters from src as would not exceed num_bytes in the destination;
70+
Uses input_size as the input size.
71+
\0 characters are copied as-is.
72+
returns the number of bytes copied.
73+
*/
74+
int u8_strnbcpy_size (char *dest, const char* src, int input_size, int num_bytes);
75+
6976
/* copy single utf8 character of up to num_bytes bytes large, only if num_bytes is large enough;
7077
return number of bytes copied
7178
*/

0 commit comments

Comments
 (0)