Skip to content

Commit a54d13e

Browse files
committed
Fix unicode conversion truncation bug
The in_iconv_u16() function resolves "alignment" issues where the length of the input string is not mod(4). The solution trims the extra bytes off the input string. If the input string is total less than 4 bytes, then those extra bytes are put in a 4-byte array and are converted. However, if the input string is longer, then those extra bytes are lost. This fix saves the extra "unaligned" bytes in the 4-byte array and converts them afterwards so we don't accidentally lose 1 to 2 characters.
1 parent 143d23c commit a54d13e

1 file changed

Lines changed: 47 additions & 8 deletions

File tree

libclamav/entconv.c

Lines changed: 47 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -688,17 +688,31 @@ static int in_iconv_u16(const m_area_t* in_m_area, iconv_t* iconv_struct, m_area
688688
return 0;
689689
}
690690
/* convert encoding conv->tmp_area. conv->out_area */
691-
alignfix = inleft % 4; /* iconv gives an error if we give him 3 bytes to convert,
692-
and we are using ucs4, ditto for utf16, and 1 byte*/
691+
692+
/*
693+
* iconv gives an error if we give it less than 4 bytes to convert,
694+
* and we are using ucs4, ditto for utf16, and 1 byte.
695+
*
696+
* If an alignfix is needed, we just trim the extra un-aligned
697+
* bytes from the buffer.
698+
* We hold on to the extra bytes, putting them into an aligned 4-byte
699+
* buffer (tmp4), and then convert them with a final call to iconv.
700+
*/
701+
alignfix = inleft % 4;
693702
inleft -= alignfix;
694703

695-
if (!inleft && alignfix) {
696-
/* EOF, and we have less than 4 bytes to convert */
704+
if (alignfix) {
705+
/* Number of bytes is not mod(4).
706+
* Copy the unaligned bytes from the end of the input.*/
697707
memset(tmp4, 0, 4);
698-
memcpy(tmp4, input, alignfix);
699-
input = tmp4;
700-
inleft = 4;
701-
alignfix = 0;
708+
memcpy(tmp4, input + inleft, alignfix);
709+
710+
if (inleft == 0) {
711+
/* Total number of bytes was < 4, so we only have the "unaligned" bytes to convert. */
712+
inleft = 4;
713+
input = tmp4;
714+
alignfix = 0;
715+
}
702716
}
703717

704718
while (inleft && (outleft >= 2)) { /* iconv doesn't like inleft to be 0 */
@@ -711,6 +725,18 @@ static int in_iconv_u16(const m_area_t* in_m_area, iconv_t* iconv_struct, m_area
711725
break;
712726
}
713727
/*cli_dbgmsg(MODULE_NAME "iconv error:%s\n", cli_strerror(errno, err, sizeof(err)));*/
728+
} else if (0 == inleft) {
729+
cli_dbgmsg(MODULE_NAME "iconv consumed all input\n");
730+
if (alignfix) {
731+
/* Convert the "unaligned" bytes. */
732+
inleft = 4;
733+
input = tmp4;
734+
alignfix = 0;
735+
continue;
736+
} else {
737+
/* no more data */
738+
break;
739+
}
714740
} else if (outleft == outleft_last) {
715741
cli_dbgmsg(MODULE_NAME "iconv stall (no output)\n");
716742
} else {
@@ -726,8 +752,21 @@ static int in_iconv_u16(const m_area_t* in_m_area, iconv_t* iconv_struct, m_area
726752
*out++ = 0;
727753
*out++ = *input++;
728754
inleft--;
755+
756+
if (0 == inleft && alignfix) {
757+
/* Convert the "unaligned" bytes. */
758+
inleft = 4;
759+
input = tmp4;
760+
alignfix = 0;
761+
continue;
762+
} else {
763+
/* no more data */
764+
break;
765+
}
729766
}
767+
730768
cli_dbgmsg("in_iconv_u16: unprocessed bytes: %lu\n", (unsigned long)inleft);
769+
731770
if (out_m_area->length >= 0 && out_m_area->length >= (off_t)outleft) {
732771
out_m_area->length -= (off_t)outleft;
733772
} else {

0 commit comments

Comments
 (0)