Skip to content

Commit bcf2e5e

Browse files
committed
Increase limit for finding PE files embedded in other PE files
I am seeing missed detections since we changed to prohibit embedded file type identification when inside an embedded file. In particular, I'm seeing this issue with PE files that contain multiple other MSEXE as well as a variety of false positives for PE file headers. For example, imagine a PE with four concatenated DLL's, like so: ``` [ EXE file | DLL #1 | DLL #2 | DLL #3 | DLL #4 ] ``` And note that false positives for embedded MSEXE files are fairly common. So there may be a few mixed in there. Before limiting embedded file identification we might interpret the file structure something like this: ``` MSEXE: { embedded MSEXE #1: false positive, embedded MSEXE #2: false positive, embedded MSEXE #3: false positive, embedded MSEXE #4: DLL #1: { embedded MSEXE #1: false positive, embedded MSEXE #2: DLL #2: { embedded MSEXE #1: DLL #3: { embedded MSEXE #1: false positive, embedded MSEXE #2: false positive, embedded MSEXE #3: false positive, embedded MSEXE #4: false positive, embedded MSEXE #5: DLL #4 } embedded MSEXE #2: false positive, embedded MSEXE #3: false positive, embedded MSEXE #4: false positive, embedded MSEXE #5: false positive, embedded MSEXE #6: DLL #4 } embedded MSEXE #3: DLL #3, embedded MSEXE #4: false positive, embedded MSEXE #5: false positive, embedded MSEXE #6: false positive, embedded MSEXE #7: false positive, embedded MSEXE #8: DLL #4 } } ``` This is obviously terrible, which is why why we don't allow detecting embedded files within other embedded files. So after we enforce that limit, the same file may be interpreted like this instead: ``` MSEXE: { embedded MSEXE #1: false positive, embedded MSEXE #2: false positive, embedded MSEXE #3: false positive, embedded MSEXE #4: DLL #1, embedded MSEXE #5: false positive, embedded MSEXE #6: DLL #2, embedded MSEXE #7: DLL #3, embedded MSEXE #8: false positive, embedded MSEXE #9: false positive, embedded MSEXE #10: false positive, embedded MSEXE #11: false positive, embedded MSEXE #12: DLL #4 } ``` That's great! Except that we now exceed the "MAX_EMBEDDED_OBJ" limit for embedded type matches (limit 10, but 12 found). That means we won't see or extract the 4th DLL anymore. My solution is to lift the limit when adding an matched MSEXE type. We already do this for matched ZIPSFX types. While doing this, I've significantly tidied up the limits checks to make it more readble, and removed duplicate checks from within the `ac_addtype()` function. CLAM-2897
1 parent 06bf061 commit bcf2e5e

File tree

1 file changed

+143
-30
lines changed

1 file changed

+143
-30
lines changed

libclamav/matcher-ac.c

Lines changed: 143 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -833,8 +833,9 @@ int cli_ac_chklsig(const char *expr, const char *end, uint32_t *lsigcnt, unsigne
833833
return -1;
834834
}
835835

836-
for (i += 2; i + 1 < len && (isdigit(expr[i + 1]) || expr[i + 1] == ','); i++)
837-
;
836+
for (i += 2; i + 1 < len && (isdigit(expr[i + 1]) || expr[i + 1] == ','); i++) {
837+
continue;
838+
}
838839
}
839840

840841
if (&expr[i + 1] == rend)
@@ -1625,36 +1626,49 @@ void cli_ac_freedata(struct cli_ac_data *data)
16251626
}
16261627
}
16271628

1628-
/* returns only CL_SUCCESS or CL_EMEM */
1629-
inline static int ac_addtype(struct cli_matched_type **list, cli_file_t type, off_t offset, const cli_ctx *ctx)
1629+
/**
1630+
* @brief Add a match for an object type to the list of matched types.
1631+
*
1632+
* Important: The caller is responsible for checking limits!
1633+
*
1634+
* @param list Pointer to the list of matched types. *list may be NULL if no types have been added yet.
1635+
* @param type The type of the embedded object.
1636+
* @param offset The offset of the embedded object.
1637+
* @param ctx The context information. May be NULL.
1638+
* @return cl_error_t CL_SUCCESS regardless if added, or CL_EMEM if memory allocation failed.
1639+
*/
1640+
inline static cl_error_t ac_addtype(struct cli_matched_type **list, cli_file_t type, off_t offset, const cli_ctx *ctx)
16301641
{
1631-
struct cli_matched_type *tnode, *tnode_last;
1642+
struct cli_matched_type *tnode;
16321643

1633-
if (type == CL_TYPE_ZIPSFX) {
1634-
if (*list && ctx && ctx->engine->maxfiles && (*list)->cnt > ctx->engine->maxfiles)
1635-
return CL_SUCCESS;
1636-
} else if (*list && (*list)->cnt >= MAX_EMBEDDED_OBJ) {
1637-
return CL_SUCCESS;
1638-
}
1639-
1640-
if (!(tnode = calloc(1, sizeof(struct cli_matched_type)))) {
1644+
tnode = calloc(1, sizeof(struct cli_matched_type));
1645+
if (NULL == tnode) {
16411646
cli_errmsg("cli_ac_addtype: Can't allocate memory for new type node\n");
16421647
return CL_EMEM;
16431648
}
16441649

16451650
tnode->type = type;
16461651
tnode->offset = offset;
16471652

1648-
tnode_last = *list;
1649-
while (tnode_last && tnode_last->next)
1650-
tnode_last = tnode_last->next;
1653+
if (*list) {
1654+
// Add to end of existing list.
1655+
struct cli_matched_type *tnode_last = *list;
16511656

1652-
if (tnode_last)
1657+
while (tnode_last && tnode_last->next) {
1658+
tnode_last = tnode_last->next;
1659+
}
16531660
tnode_last->next = tnode;
1654-
else
1661+
} else {
1662+
// First type in the list.
16551663
*list = tnode;
1664+
}
16561665

16571666
(*list)->cnt++;
1667+
1668+
if (UNLIKELY(cli_get_debug_flag())) {
1669+
cli_dbgmsg("ac_addtype: added %s embedded object at offset " STDi64 ". Embedded object count: %d\n", cli_ftname(type), (uint64_t)offset, (*list)->cnt);
1670+
}
1671+
16581672
return CL_SUCCESS;
16591673
}
16601674

@@ -1999,14 +2013,65 @@ cl_error_t cli_ac_scanbuff(
19992013

20002014
cli_dbgmsg("Matched signature for file type %s\n", pt->virname);
20012015
type = pt->type;
2002-
if ((ftoffset != NULL) &&
2003-
((*ftoffset == NULL) || (*ftoffset)->cnt < MAX_EMBEDDED_OBJ || type == CL_TYPE_ZIPSFX) && (type >= CL_TYPE_SFX || ((ftype == CL_TYPE_MSEXE || ftype == CL_TYPE_ZIP || ftype == CL_TYPE_MSOLE2) && type == CL_TYPE_MSEXE))) {
2004-
/* FIXME: the first offset in the array is most likely the correct one but
2005-
* it may happen it is not
2006-
*/
2007-
for (j = 1; j <= CLI_DEFAULT_AC_TRACKLEN + 1 && offmatrix[0][j] != (uint32_t)-1; j++)
2008-
if (ac_addtype(ftoffset, type, offmatrix[pt->parts - 1][j], ctx))
2009-
return CL_EMEM;
2016+
2017+
if (ftoffset != NULL) {
2018+
// Caller provided a pointer to record matched types.
2019+
bool too_many_types = false;
2020+
bool supported_type = false;
2021+
2022+
if (*ftoffset != NULL) {
2023+
// Have some type matches already. Check limits.
2024+
2025+
if (ctx && ((type == CL_TYPE_ZIPSFX) ||
2026+
(type == CL_TYPE_MSEXE && ftype == CL_TYPE_MSEXE))) {
2027+
// When ctx present, limit the number of type matches using ctx->engine->maxfiles for specific types.
2028+
// Reasoning:
2029+
// ZIP local file header entries likely to be numerous if a single ZIP appended to the scanned file.
2030+
// MSEXE can contain many embedded MSEXE entries and MSEXE type false positives matches.
2031+
2032+
if (ctx->engine->maxfiles == 0) {
2033+
// Max-files limit is disabled.
2034+
} else if ((*ftoffset)->cnt >= ctx->engine->maxfiles) {
2035+
if (UNLIKELY(cli_get_debug_flag())) {
2036+
cli_dbgmsg("ac_addtype: Can't add %s type at offset " STDu64 " to list of embedded type matches. Reached maxfiles limit of %u\n", cli_ftname(type), (*ftoffset)->offset, ctx->engine->maxfiles);
2037+
}
2038+
too_many_types = true;
2039+
}
2040+
} else {
2041+
// Limit the number of type matches using MAX_EMBEDDED_OBJ.
2042+
if ((*ftoffset)->cnt >= MAX_EMBEDDED_OBJ) {
2043+
if (UNLIKELY(cli_get_debug_flag())) {
2044+
cli_dbgmsg("ac_addtype: Can't add %s type at offset " STDu64 " to list of embedded type matches. Reached MAX_EMBEDDED_OBJ limit of %u\n", cli_ftname(type), (*ftoffset)->offset, MAX_EMBEDDED_OBJ);
2045+
}
2046+
too_many_types = true;
2047+
}
2048+
}
2049+
}
2050+
2051+
// Filter to supported types.
2052+
if (
2053+
// Found type is MBR.
2054+
type == CL_TYPE_MBR ||
2055+
// Found type is any SFX type (i.e., ZIPSFX, RARSFX, 7ZSSFX, etc.).
2056+
type >= CL_TYPE_SFX ||
2057+
// Found type is an MSEXE, but only if host file type is one of MSEXE, ZIP, or MSOLE2.
2058+
(type == CL_TYPE_MSEXE && (ftype == CL_TYPE_MSEXE || ftype == CL_TYPE_ZIP || ftype == CL_TYPE_MSOLE2))) {
2059+
2060+
supported_type = true;
2061+
}
2062+
2063+
if (supported_type && !too_many_types) {
2064+
/* FIXME: the first offset in the array is most likely the correct one but
2065+
* it may happen it is not
2066+
* Until we're certain and can fix this, we add all offsets in the list.
2067+
*/
2068+
for (j = 1; j <= CLI_DEFAULT_AC_TRACKLEN + 1 && offmatrix[0][j] != (uint32_t)-1; j++) {
2069+
ret = ac_addtype(ftoffset, type, offmatrix[pt->parts - 1][j], ctx);
2070+
if (CL_SUCCESS != ret) {
2071+
return ret;
2072+
}
2073+
}
2074+
}
20102075
}
20112076

20122077
memset(offmatrix[0], (uint32_t)-1, pt->parts * (CLI_DEFAULT_AC_TRACKLEN + 2) * sizeof(uint32_t));
@@ -2066,11 +2131,59 @@ cl_error_t cli_ac_scanbuff(
20662131

20672132
cli_dbgmsg("Matched signature for file type %s at %u\n", pt->virname, realoff);
20682133
type = pt->type;
2069-
if ((ftoffset != NULL) &&
2070-
((*ftoffset == NULL) || (*ftoffset)->cnt < MAX_EMBEDDED_OBJ || type == CL_TYPE_ZIPSFX) && (type == CL_TYPE_MBR || type >= CL_TYPE_SFX || ((ftype == CL_TYPE_MSEXE || ftype == CL_TYPE_ZIP || ftype == CL_TYPE_MSOLE2) && type == CL_TYPE_MSEXE))) {
20712134

2072-
if (ac_addtype(ftoffset, type, realoff, ctx))
2073-
return CL_EMEM;
2135+
if (ftoffset != NULL) {
2136+
// Caller provided a pointer to record matched types.
2137+
bool too_many_types = false;
2138+
bool supported_type = false;
2139+
2140+
if (*ftoffset != NULL) {
2141+
// Have some type matches already. Check limits.
2142+
2143+
if (ctx && ((type == CL_TYPE_ZIPSFX) ||
2144+
(type == CL_TYPE_MSEXE && ftype == CL_TYPE_MSEXE))) {
2145+
// When ctx present, limit the number of type matches using ctx->engine->maxfiles for specific types.
2146+
// Reasoning:
2147+
// ZIP local file header entries likely to be numerous if a single ZIP appended to the scanned file.
2148+
// MSEXE can contain many embedded MSEXE entries and MSEXE type false positives matches.
2149+
2150+
if (ctx->engine->maxfiles == 0) {
2151+
// Max-files limit is disabled.
2152+
} else if ((*ftoffset)->cnt >= ctx->engine->maxfiles) {
2153+
if (UNLIKELY(cli_get_debug_flag())) {
2154+
cli_dbgmsg("ac_addtype: Can't add %s type at offset " STDu64 " to list of embedded type matches. Reached maxfiles limit of %u\n", cli_ftname(type), (*ftoffset)->offset, ctx->engine->maxfiles);
2155+
}
2156+
too_many_types = true;
2157+
}
2158+
} else {
2159+
// Limit the number of type matches using MAX_EMBEDDED_OBJ.
2160+
if ((*ftoffset)->cnt >= MAX_EMBEDDED_OBJ) {
2161+
if (UNLIKELY(cli_get_debug_flag())) {
2162+
cli_dbgmsg("ac_addtype: Can't add %s type at offset " STDu64 " to list of embedded type matches. Reached MAX_EMBEDDED_OBJ limit of %u\n", cli_ftname(type), (*ftoffset)->offset, MAX_EMBEDDED_OBJ);
2163+
}
2164+
too_many_types = true;
2165+
}
2166+
}
2167+
}
2168+
2169+
// Filter to supported types.
2170+
if (
2171+
// Found type is MBR.
2172+
type == CL_TYPE_MBR ||
2173+
// Found type is any SFX type (i.e., ZIPSFX, RARSFX, 7ZSSFX, etc.).
2174+
type >= CL_TYPE_SFX ||
2175+
// Found type is an MSEXE, but only if host file type is one of MSEXE, ZIP, or MSOLE2.
2176+
(type == CL_TYPE_MSEXE && (ftype == CL_TYPE_MSEXE || ftype == CL_TYPE_ZIP || ftype == CL_TYPE_MSOLE2))) {
2177+
2178+
supported_type = true;
2179+
}
2180+
2181+
if (supported_type && !too_many_types) {
2182+
ret = ac_addtype(ftoffset, type, realoff, ctx);
2183+
if (CL_SUCCESS != ret) {
2184+
return ret;
2185+
}
2186+
}
20742187
}
20752188
}
20762189
} else {

0 commit comments

Comments
 (0)