diff --git a/libclamav/unzip.c b/libclamav/unzip.c index 29bf42afa0..ec9046fbc6 100644 --- a/libclamav/unzip.c +++ b/libclamav/unzip.c @@ -119,7 +119,7 @@ static cl_error_t unz( { char obuf[BUFSIZ] = {0}; char *tempfile = NULL; - int out_file, ret = CL_CLEAN; + int out_file, ret = CL_SUCCESS; int res = 1; size_t written = 0; @@ -146,8 +146,8 @@ static cl_error_t unz( if (csize < usize) { unsigned int fake = *num_files_unzipped + 1; cli_dbgmsg("cli_unzip: attempting to inflate stored file with inconsistent size\n"); - if (CL_CLEAN == (ret = unz(src, csize, usize, ALG_DEFLATE, 0, &fake, ctx, - tmpd, zcb, original_filename, decrypted))) { + if (CL_SUCCESS == (ret = unz(src, csize, usize, ALG_DEFLATE, 0, &fake, ctx, + tmpd, zcb, original_filename, decrypted))) { (*num_files_unzipped)++; res = fake - (*num_files_unzipped); } else @@ -575,7 +575,11 @@ static inline cl_error_t zdecrypt( pass_any = pass_any->next; } - cli_dbgmsg("cli_unzip: decrypt - skipping encrypted file, no valid passwords\n"); + cli_dbgmsg("cli_unzip: decrypt failed - will attempt to unzip as if it were not encrypted\n"); + + ret = unz(src, csize, usize, LOCAL_HEADER_method, LOCAL_HEADER_flags, + num_files_unzipped, ctx, tmpd, zcb, original_filename, false); + return CL_SUCCESS; } @@ -957,7 +961,7 @@ static int sort_by_file_offset(const void *first, const void *second) * @param coff The central directory offset * @param[out] catalogue A catalogue of zip_records found in the central directory. * @param[out] num_records The number of records in the catalogue. - * @return cl_error_t CL_CLEAN if no overlapping files + * @return cl_error_t CL_SUCCESS if no overlapping files * @return cl_error_t CL_VIRUS if overlapping files and heuristic alerts are enabled * @return cl_error_t CL_EFORMAT if overlapping files and heuristic alerts are disabled * @return cl_error_t CL_ETIMEOUT if the scan time limit is exceeded. @@ -971,8 +975,8 @@ cl_error_t index_the_central_directory( struct zip_record **catalogue, size_t *num_records) { - cl_error_t status = CL_CLEAN; - cl_error_t ret = CL_CLEAN; + cl_error_t status = CL_ERROR; + cl_error_t ret; size_t num_record_blocks = 0; size_t index = 0; @@ -992,13 +996,13 @@ cl_error_t index_the_central_directory( *catalogue = NULL; *num_records = 0; - zip_catalogue = (struct zip_record *)malloc(sizeof(struct zip_record) * ZIP_RECORDS_CHECK_BLOCKSIZE); - if (NULL == zip_catalogue) { - status = CL_EMEM; - goto done; - } + CLI_CALLOC_OR_GOTO_DONE( + zip_catalogue, + 1, + sizeof(struct zip_record) * ZIP_RECORDS_CHECK_BLOCKSIZE, + status = CL_EMEM); + num_record_blocks = 1; - memset(zip_catalogue, 0, sizeof(struct zip_record) * ZIP_RECORDS_CHECK_BLOCKSIZE); cli_dbgmsg("cli_unzip: checking for non-recursive zip bombs...\n"); @@ -1047,8 +1051,6 @@ cl_error_t index_the_central_directory( } if (records_count % ZIP_RECORDS_CHECK_BLOCKSIZE == 0) { - struct zip_record *zip_catalogue_new = NULL; - cli_dbgmsg(" cli_unzip: Exceeded zip record block size, allocating more space...\n"); /* allocate more space for zip records */ @@ -1059,17 +1061,15 @@ cl_error_t index_the_central_directory( goto done; } - zip_catalogue_new = cli_max_realloc(zip_catalogue, sizeof(struct zip_record) * ZIP_RECORDS_CHECK_BLOCKSIZE * (num_record_blocks + 1)); - if (NULL == zip_catalogue_new) { - status = CL_EMEM; - goto done; - } - zip_catalogue = zip_catalogue_new; - zip_catalogue_new = NULL; + CLI_MAX_REALLOC_OR_GOTO_DONE( + zip_catalogue, + sizeof(struct zip_record) * ZIP_RECORDS_CHECK_BLOCKSIZE * (num_record_blocks + 1), + status = CL_EMEM); num_record_blocks++; /* zero out the memory for the new records */ - memset(&(zip_catalogue[records_count]), 0, sizeof(struct zip_record) * (ZIP_RECORDS_CHECK_BLOCKSIZE * num_record_blocks - records_count)); + memset(&(zip_catalogue[records_count]), 0, + sizeof(struct zip_record) * (ZIP_RECORDS_CHECK_BLOCKSIZE * num_record_blocks - records_count)); } } while (1); @@ -1167,11 +1167,460 @@ cl_error_t index_the_central_directory( return status; } +/** + * @brief Index local file headers between two file offsets + * + * This function indexes every file within certain file offsets in a zip file. + * It places the indexed local file headers into a catalogue. If there are + * already elements in the catalogue, it appends the found files to the + * catalogue. + * + * The caller is responsible for freeing the catalogue. + * The catalogue may contain duplicate items, which should be skipped. + * + * @param ctx The scanning context + * @param map The file map + * @param fsize The file size + * @param start_offset The start file offset + * @param end_offset The end file offset + * @param file_count The number of files extracted from the zip file thus far + * @param[out] temp_catalogue A catalogue of zip_records. Found files between the two offset bounds will be appended to this list. + * @param[out] num_records The number of records in the catalogue. + * @return cl_error_t CL_SUCCESS if no overlapping files + * @return cl_error_t CL_VIRUS if overlapping files and heuristic alerts are enabled + * @return cl_error_t CL_EFORMAT if overlapping files and heuristic alerts are disabled + * @return cl_error_t CL_ETIMEOUT if the scan time limit is exceeded. + * @return cl_error_t CL_EMEM for memory allocation errors. + */ +cl_error_t index_local_file_headers_within_bounds( + cli_ctx *ctx, + fmap_t *map, + uint32_t fsize, + uint32_t start_offset, + uint32_t end_offset, + uint32_t file_count, + struct zip_record **temp_catalogue, + size_t *num_records) +{ + cl_error_t status = CL_ERROR; + cl_error_t ret; + + size_t num_record_blocks = 0; + size_t index = 0; + + uint32_t coff = 0; + uint32_t total_file_count = file_count; + const char *ptr = NULL; + struct zip_record *zip_catalogue = NULL; + bool exceeded_max_files = false; + + if (NULL == temp_catalogue || NULL == num_records) { + cli_errmsg("index_local_file_headers_within_bounds: Invalid NULL arguments\n"); + goto done; + } + + zip_catalogue = *temp_catalogue; + + /* + * Allocate zip_record if it is empty. If not empty, we will append file headers to the list + */ + if (NULL == zip_catalogue) { + CLI_CALLOC_OR_GOTO_DONE( + zip_catalogue, + 1, + sizeof(struct zip_record) * ZIP_RECORDS_CHECK_BLOCKSIZE, + status = CL_EMEM); + + *num_records = 0; + } + + num_record_blocks = (*num_records / ZIP_RECORDS_CHECK_BLOCKSIZE) + 1; + index = *num_records; + + if (start_offset > fsize || end_offset > fsize || start_offset > end_offset) { + cli_errmsg("index_local_file_headers_within_bounds: Invalid offset arguments\n"); + goto done; + } + + /* + * Search for local file headers between the start and end offsets. Append found file headers to zip_catalogue + */ + for (coff = start_offset; coff < end_offset; coff++) { + if (!(ptr = fmap_need_off_once(map, coff, 4))) + continue; + if (cli_readint32(ptr) == ZIP_MAGIC_LOCAL_FILE_HEADER) { + // increment coff by the size of the found local file header + file data + coff += parse_local_file_header( + map, + coff, + fsize - coff, + NULL, + total_file_count + 1, + NULL, + &ret, + ctx, + NULL, + 1, + NULL, + &(zip_catalogue[index])); + // decrement coff by 1 to account for the increment at the end of the loop + coff -= 1; + + if (CL_EPARSE != ret) { + // Found a record. + index++; + total_file_count++; + } + + if (ret == CL_VIRUS) { + status = CL_VIRUS; + goto done; + } + + if (cli_checktimelimit(ctx) != CL_SUCCESS) { + cli_dbgmsg("cli_unzip: Time limit reached (max: %u)\n", ctx->engine->maxscantime); + status = CL_ETIMEOUT; + goto done; + } + + /* stop checking file entries if we'll exceed maxfiles */ + if (ctx->engine->maxfiles && file_count >= ctx->engine->maxfiles) { + cli_dbgmsg("cli_unzip: Files limit reached (max: %u)\n", ctx->engine->maxfiles); + cli_append_potentially_unwanted_if_heur_exceedsmax(ctx, "Heuristics.Limits.Exceeded.MaxFiles"); + exceeded_max_files = true; // Set a bool so we can return the correct status code later. + // We still need to scan the files we found while reviewing the file records up to this limit. + break; + } + + if (index % ZIP_RECORDS_CHECK_BLOCKSIZE == 0) { + struct zip_record *zip_catalogue_new = NULL; + + cli_dbgmsg(" cli_unzip: Exceeded zip record block size, allocating more space...\n"); + + /* allocate more space for zip records */ + if (sizeof(struct zip_record) * ZIP_RECORDS_CHECK_BLOCKSIZE * (num_record_blocks + 1) < + sizeof(struct zip_record) * ZIP_RECORDS_CHECK_BLOCKSIZE * (num_record_blocks)) { + cli_errmsg("cli_unzip: Number of file records in zip will exceed the max for current architecture (integer overflow)\n"); + status = CL_EFORMAT; + goto done; + } + + zip_catalogue_new = cli_max_realloc(zip_catalogue, sizeof(struct zip_record) * ZIP_RECORDS_CHECK_BLOCKSIZE * (num_record_blocks + 1)); + if (NULL == zip_catalogue_new) { + status = CL_EMEM; + goto done; + } + zip_catalogue = zip_catalogue_new; + zip_catalogue_new = NULL; + + num_record_blocks++; + /* zero out the memory for the new records */ + memset(&(zip_catalogue[index]), 0, sizeof(struct zip_record) * (ZIP_RECORDS_CHECK_BLOCKSIZE * num_record_blocks - index)); + } + } + } + + *temp_catalogue = zip_catalogue; + *num_records = index; + status = CL_SUCCESS; + +done: + if (CL_SUCCESS != status) { + if (NULL != zip_catalogue) { + size_t i; + for (i = 0; i < index; i++) { + if (NULL != zip_catalogue[i].original_filename) { + free(zip_catalogue[i].original_filename); + zip_catalogue[i].original_filename = NULL; + } + } + free(zip_catalogue); + zip_catalogue = NULL; + *temp_catalogue = NULL; // zip_catalogue and *temp_catalogue have the same value. Set temp_catalogue to NULL to ensure no use after free + } + + if (exceeded_max_files) { + status = CL_EMAXFILES; + } + } + + return status; +} + +/** + * @brief Add files not present in the central directory to the catalogue + * + * This function indexes every file not present in the central directory. + * It searches through all the local file headers in the zip file and + * adds any that are found that were not already in the catalogue. + * + * The caller is responsible for freeing the catalogue. + * The catalogue may contain duplicate items, which should be skipped. + * + * @param ctx The scanning context + * @param map The file map + * @param fsize The file size + * @param[in, out] catalogue A catalogue of zip_records found in the central directory. + * @param[in, out] num_records The number of records in the catalogue. + * @return cl_error_t CL_SUCCESS if no overlapping files + * @return cl_error_t CL_VIRUS if overlapping files and heuristic alerts are enabled + * @return cl_error_t CL_EFORMAT if overlapping files and heuristic alerts are disabled + * @return cl_error_t CL_ETIMEOUT if the scan time limit is exceeded. + * @return cl_error_t CL_EMEM for memory allocation errors. + */ +cl_error_t index_local_file_headers( + cli_ctx *ctx, + fmap_t *map, + uint32_t fsize, + struct zip_record **catalogue, + size_t *num_records) +{ + cl_error_t status = CL_ERROR; + cl_error_t ret; + + uint32_t i = 0; + uint32_t start_offset = 0; + uint32_t end_offset = 0; + size_t total_files_found = 0; + + struct zip_record *temp_catalogue = NULL; + struct zip_record *combined_catalogue = NULL; + struct zip_record *curr_record = NULL; + struct zip_record *next_record = NULL; + struct zip_record *prev_record = NULL; + size_t local_file_headers_count = 0; + uint32_t num_overlapping_files = 0; + bool exceeded_max_files = false; + + if (NULL == catalogue || NULL == num_records || NULL == *catalogue) { + cli_dbgmsg("index_local_file_headers: Invalid NULL arguments\n"); + goto done; + } + + total_files_found = *num_records; + + /* + * Generate a list of zip records found before, between, and after the zip records already in catalogue + * First, scan between the start of the file and the first zip_record offset (or the end of the file if no zip_records have been found) + */ + if (*num_records == 0) { + end_offset = fsize; + } else { + end_offset = (*catalogue)[0].local_header_offset; + } + + ret = index_local_file_headers_within_bounds( + ctx, + map, + fsize, + start_offset, + end_offset, + total_files_found, + &temp_catalogue, + &local_file_headers_count); + if (CL_SUCCESS != ret) { + goto done; + } + + total_files_found += local_file_headers_count; + + /* + * Search for zip records between the zip records already in the catalogue + */ + for (i = 0; i < *num_records; i++) { + // Before searching for more files, check if number of found files exceeds maxfiles + if (ctx->engine->maxfiles && total_files_found >= ctx->engine->maxfiles) { + cli_dbgmsg("cli_unzip: Files limit reached (max: %u)\n", ctx->engine->maxfiles); + cli_append_potentially_unwanted_if_heur_exceedsmax(ctx, "Heuristics.Limits.Exceeded.MaxFiles"); + exceeded_max_files = true; // Set a bool so we can return the correct status code later. + // We still need to scan the files we found while reviewing the file records up to this limit. + break; + } + + curr_record = &((*catalogue)[i]); + start_offset = curr_record->local_header_offset + curr_record->local_header_size + curr_record->compressed_size; + if (i + 1 == *num_records) { + end_offset = fsize; + } else { + next_record = &((*catalogue)[i + 1]); + end_offset = next_record->local_header_offset; + } + + ret = index_local_file_headers_within_bounds( + ctx, + map, + fsize, + start_offset, + end_offset, + total_files_found, + &temp_catalogue, + &local_file_headers_count); + if (CL_SUCCESS != ret) { + status = ret; + goto done; + } + + total_files_found = *num_records + local_file_headers_count; + + if (cli_checktimelimit(ctx) != CL_SUCCESS) { + cli_dbgmsg("cli_unzip: Time limit reached (max: %u)\n", ctx->engine->maxscantime); + status = CL_ETIMEOUT; + goto done; + } + } + + /* + * Combine the zip records already in the catalogue with the recently found zip records + * Only do this if new zip records were found + */ + if (local_file_headers_count > 0) { + CLI_CALLOC_OR_GOTO_DONE( + combined_catalogue, + 1, + sizeof(struct zip_record) * ZIP_RECORDS_CHECK_BLOCKSIZE * (total_files_found + 1), + status = CL_EMEM); + + // *num_records is the number of already found files + // local_file_headers_count is the number of new files found + // total_files_found is the sum of both of the above + uint32_t temp_catalogue_offset = 0; + uint32_t catalogue_offset = 0; + + for (i = 0; i < total_files_found; i++) { + // Conditions in which we add from temp_catalogue: it is the only one left OR + if (catalogue_offset >= *num_records || + (temp_catalogue_offset < local_file_headers_count && + temp_catalogue[temp_catalogue_offset].local_header_offset < (*catalogue)[catalogue_offset].local_header_offset)) { + // add entry from temp_catalogue into the list + combined_catalogue[i] = temp_catalogue[temp_catalogue_offset]; + temp_catalogue_offset++; + } else { + // add entry from the catalogue into the list + combined_catalogue[i] = (*catalogue)[catalogue_offset]; + catalogue_offset++; + } + + /* + * Detect overlapping files. + */ + if (i > 0) { + prev_record = &(combined_catalogue[i - 1]); + curr_record = &(combined_catalogue[i]); + + uint32_t prev_record_size = prev_record->local_header_size + prev_record->compressed_size; + uint32_t curr_record_size = curr_record->local_header_size + curr_record->compressed_size; + uint32_t prev_record_end; + uint32_t curr_record_end; + + /* Check for integer overflow in 32bit size & offset values */ + if ((UINT32_MAX - prev_record_size < prev_record->local_header_offset) || + (UINT32_MAX - curr_record_size < curr_record->local_header_offset)) { + cli_dbgmsg("cli_unzip: Integer overflow detected; invalid data sizes in zip file headers.\n"); + status = CL_EFORMAT; + goto done; + } + + prev_record_end = prev_record->local_header_offset + prev_record_size; + curr_record_end = curr_record->local_header_offset + curr_record_size; + + if (((curr_record->local_header_offset >= prev_record->local_header_offset) && (curr_record->local_header_offset + ZIP_RECORD_OVERLAP_FUDGE_FACTOR < prev_record_end)) || + ((prev_record->local_header_offset >= curr_record->local_header_offset) && (prev_record->local_header_offset + ZIP_RECORD_OVERLAP_FUDGE_FACTOR < curr_record_end))) { + /* Overlapping file detected */ + num_overlapping_files++; + + if ((curr_record->local_header_offset == prev_record->local_header_offset) && + (curr_record->local_header_size == prev_record->local_header_size) && + (curr_record->compressed_size == prev_record->compressed_size)) { + cli_dbgmsg("cli_unzip: Ignoring duplicate file entry @ 0x%x.\n", curr_record->local_header_offset); + } else { + cli_dbgmsg("cli_unzip: Overlapping files detected.\n"); + cli_dbgmsg(" previous file end: %u\n", prev_record_end); + cli_dbgmsg(" current file start: %u\n", curr_record->local_header_offset); + + if (ZIP_MAX_NUM_OVERLAPPING_FILES < num_overlapping_files) { + if (SCAN_HEURISTICS) { + status = cli_append_potentially_unwanted(ctx, "Heuristics.Zip.OverlappingFiles"); + } else { + status = CL_EFORMAT; + } + goto done; + } + } + } + } + + if (cli_checktimelimit(ctx) != CL_SUCCESS) { + cli_dbgmsg("cli_unzip: Time limit reached (max: %u)\n", ctx->engine->maxscantime); + status = CL_ETIMEOUT; + goto done; + } + } + + free(temp_catalogue); + temp_catalogue = NULL; + free(*catalogue); + *catalogue = combined_catalogue; + *num_records = total_files_found; + } else { + free(temp_catalogue); + temp_catalogue = NULL; + } + + status = CL_SUCCESS; + +done: + if (CL_SUCCESS != status) { + if (NULL != *catalogue) { + size_t i; + for (i = 0; i < (total_files_found - local_file_headers_count); i++) { + if (NULL != (*catalogue)[i].original_filename) { + free((*catalogue)[i].original_filename); + (*catalogue)[i].original_filename = NULL; + } + } + free(*catalogue); + *catalogue = NULL; + } + + if (NULL != temp_catalogue) { + size_t i; + for (i = 0; i < local_file_headers_count; i++) { + if (NULL != temp_catalogue[i].original_filename) { + free(temp_catalogue[i].original_filename); + temp_catalogue[i].original_filename = NULL; + } + } + free(temp_catalogue); + temp_catalogue = NULL; + } + + if (NULL != combined_catalogue) { + size_t i; + for (i = 0; i < total_files_found; i++) { + if (NULL != combined_catalogue[i].original_filename) { + free(combined_catalogue[i].original_filename); + combined_catalogue[i].original_filename = NULL; + } + } + free(combined_catalogue); + combined_catalogue = NULL; + } + + if (exceeded_max_files) { + status = CL_EMAXFILES; + } + } + + return status; +} + cl_error_t cli_unzip(cli_ctx *ctx) { + cl_error_t status = CL_ERROR; + cl_error_t ret; + unsigned int file_count = 0, num_files_unzipped = 0; - cl_error_t ret = CL_CLEAN; - uint32_t fsize, lhoff = 0, coff = 0; + uint32_t fsize, coff = 0; fmap_t *map = ctx->fmap; char *tmpd = NULL; const char *ptr; @@ -1184,12 +1633,12 @@ cl_error_t cli_unzip(cli_ctx *ctx) fsize = (uint32_t)map->len; if (sizeof(off_t) != sizeof(uint32_t) && (size_t)fsize != map->len) { cli_dbgmsg("cli_unzip: file too big\n"); - ret = CL_CLEAN; + status = CL_SUCCESS; goto done; } if (fsize < SIZEOF_CENTRAL_HEADER) { cli_dbgmsg("cli_unzip: file too short\n"); - ret = CL_CLEAN; + status = CL_SUCCESS; goto done; } @@ -1218,126 +1667,120 @@ cl_error_t cli_unzip(cli_ctx *ctx) &zip_catalogue, &records_count); if (CL_SUCCESS != ret) { - goto done; - } + cli_dbgmsg("index_central_dir_failed, must rely purely on local file headers\n"); - /* - * Then decrypt/unzip & scan each unique file entry. - */ - for (i = 0; i < records_count; i++) { - const uint8_t *compressed_data = NULL; + CLI_CALLOC_OR_GOTO_DONE( + zip_catalogue, + 1, + sizeof(struct zip_record) * ZIP_RECORDS_CHECK_BLOCKSIZE, + status = CL_EMEM); - if ((i > 0) && - (zip_catalogue[i].local_header_offset == zip_catalogue[i - 1].local_header_offset) && - (zip_catalogue[i].local_header_size == zip_catalogue[i - 1].local_header_size) && - (zip_catalogue[i].compressed_size == zip_catalogue[i - 1].compressed_size)) { + records_count = 0; + } + } else { + cli_dbgmsg("cli_unzip: central directory header not found, must rely purely on local file headers\n"); - /* Duplicate file entry, skip. */ - cli_dbgmsg("cli_unzip: Skipping unzipping of duplicate file entry: @ 0x%x\n", zip_catalogue[i].local_header_offset); - continue; - } + CLI_CALLOC_OR_GOTO_DONE( + zip_catalogue, + 1, + sizeof(struct zip_record) * ZIP_RECORDS_CHECK_BLOCKSIZE, + status = CL_EMEM); - compressed_data = fmap_need_off(map, zip_catalogue[i].local_header_offset + zip_catalogue[i].local_header_size, SIZEOF_LOCAL_HEADER); - - if (zip_catalogue[i].encrypted) { - if (fmap_need_ptr_once(map, compressed_data, zip_catalogue[i].compressed_size)) - ret = zdecrypt( - compressed_data, - zip_catalogue[i].compressed_size, - zip_catalogue[i].uncompressed_size, - fmap_need_off(map, zip_catalogue[i].local_header_offset, SIZEOF_LOCAL_HEADER), - &num_files_unzipped, - ctx, - tmpd, - zip_scan_cb, - zip_catalogue[i].original_filename); - } else { - if (fmap_need_ptr_once(map, compressed_data, zip_catalogue[i].compressed_size)) - ret = unz( - compressed_data, - zip_catalogue[i].compressed_size, - zip_catalogue[i].uncompressed_size, - zip_catalogue[i].method, - zip_catalogue[i].flags, - &num_files_unzipped, - ctx, - tmpd, - zip_scan_cb, - zip_catalogue[i].original_filename, - false); - } + records_count = 0; + } - file_count++; + /* + * Add local file headers not referenced by the central directory + */ + ret = index_local_file_headers( + ctx, + map, + fsize, + &zip_catalogue, + &records_count); + if (CL_SUCCESS != ret) { + cli_dbgmsg("index_local_file_headers_failed\n"); + status = ret; + goto done; + } - if (ctx->engine->maxfiles && num_files_unzipped >= ctx->engine->maxfiles) { - // Note: this check piggybacks on the MaxFiles setting, but is not actually - // scanning these files or incrementing the ctx->scannedfiles count - // This check is also redundant. zip_scan_cb == cli_magic_scan_desc, - // so we will also check and update the limits for the actual number of scanned - // files inside cli_magic_scan() - cli_dbgmsg("cli_unzip: Files limit reached (max: %u)\n", ctx->engine->maxfiles); - cli_append_potentially_unwanted_if_heur_exceedsmax(ctx, "Heuristics.Limits.Exceeded.MaxFiles"); - ret = CL_EMAXFILES; - } + /* + * Then decrypt/unzip & scan each unique file entry. + */ + for (i = 0; i < records_count; i++) { + const uint8_t *compressed_data = NULL; - if (cli_checktimelimit(ctx) != CL_SUCCESS) { - cli_dbgmsg("cli_unzip: Time limit reached (max: %u)\n", ctx->engine->maxscantime); - ret = CL_ETIMEOUT; - goto done; - } + if ((i > 0) && + (zip_catalogue[i].local_header_offset == zip_catalogue[i - 1].local_header_offset) && + (zip_catalogue[i].local_header_size == zip_catalogue[i - 1].local_header_size) && + (zip_catalogue[i].compressed_size == zip_catalogue[i - 1].compressed_size)) { - if (cli_json_timeout_cycle_check(ctx, &toval) != CL_SUCCESS) { - ret = CL_ETIMEOUT; - } + /* Duplicate file entry, skip. */ + cli_dbgmsg("cli_unzip: Skipping unzipping of duplicate file entry: @ 0x%x\n", zip_catalogue[i].local_header_offset); + continue; + } - if (ret != CL_SUCCESS) { - break; + // Get a pointer to the compressed data, is just after the local header. + compressed_data = fmap_need_off( + map, + zip_catalogue[i].local_header_offset + zip_catalogue[i].local_header_size, + zip_catalogue[i].compressed_size); + + if (zip_catalogue[i].encrypted) { + if (fmap_need_ptr_once(map, compressed_data, zip_catalogue[i].compressed_size)) { + status = zdecrypt( + compressed_data, + zip_catalogue[i].compressed_size, + zip_catalogue[i].uncompressed_size, + fmap_need_off(map, zip_catalogue[i].local_header_offset, SIZEOF_LOCAL_HEADER), + &num_files_unzipped, + ctx, + tmpd, + zip_scan_cb, + zip_catalogue[i].original_filename); + } + } else { + if (fmap_need_ptr_once(map, compressed_data, zip_catalogue[i].compressed_size)) { + status = unz( + compressed_data, + zip_catalogue[i].compressed_size, + zip_catalogue[i].uncompressed_size, + zip_catalogue[i].method, + zip_catalogue[i].flags, + &num_files_unzipped, + ctx, + tmpd, + zip_scan_cb, + zip_catalogue[i].original_filename, + false); } } - } else { - cli_dbgmsg("cli_unzip: central not found, using localhdrs\n"); - } - if (CL_SUCCESS != ret) { - // goto done right away if there was a timeout, an alert, etc. - // This is slightly redundant since the while loop will only happen - // if ret == CL_SUCCESS but it's more explicit. - goto done; - } + file_count++; - if (0 < num_files_unzipped && num_files_unzipped <= (file_count / 4)) { /* FIXME: make up a sane ratio or remove the whole logic */ - file_count = 0; - while ((ret == CL_CLEAN) && - (lhoff < fsize) && - (0 != (coff = parse_local_file_header(map, - lhoff, - fsize - lhoff, - &num_files_unzipped, - file_count + 1, - NULL, - &ret, - ctx, - tmpd, - 1, - zip_scan_cb, - NULL)))) { - file_count++; - lhoff += coff; + if (ctx->engine->maxfiles && num_files_unzipped >= ctx->engine->maxfiles) { + // Note: this check piggybacks on the MaxFiles setting, but is not actually + // scanning these files or incrementing the ctx->scannedfiles count + // This check is also redundant. zip_scan_cb == cli_magic_scan_desc, + // so we will also check and update the limits for the actual number of scanned + // files inside cli_magic_scan() + cli_dbgmsg("cli_unzip: Files limit reached (max: %u)\n", ctx->engine->maxfiles); + cli_append_potentially_unwanted_if_heur_exceedsmax(ctx, "Heuristics.Limits.Exceeded.MaxFiles"); + status = CL_EMAXFILES; + } - if (ctx->engine->maxfiles && num_files_unzipped >= ctx->engine->maxfiles) { - // Note: this check piggybacks on the MaxFiles setting, but is not actually - // scanning these files or incrementing the ctx->scannedfiles count - // This check is also redundant. zip_scan_cb == cli_magic_scan_desc, - // so we will also check and update the limits for the actual number of scanned - // files inside cli_magic_scan() - cli_dbgmsg("cli_unzip: Files limit reached (max: %u)\n", ctx->engine->maxfiles); - cli_append_potentially_unwanted_if_heur_exceedsmax(ctx, "Heuristics.Limits.Exceeded.MaxFiles"); - ret = CL_EMAXFILES; - } + if (cli_checktimelimit(ctx) != CL_SUCCESS) { + cli_dbgmsg("cli_unzip: Time limit reached (max: %u)\n", ctx->engine->maxscantime); + status = CL_ETIMEOUT; + goto done; + } - if (cli_json_timeout_cycle_check(ctx, &toval) != CL_SUCCESS) { - ret = CL_ETIMEOUT; - } + if (cli_json_timeout_cycle_check(ctx, &toval) != CL_SUCCESS) { + status = CL_ETIMEOUT; + } + + if (status != CL_SUCCESS) { + break; } } @@ -1362,12 +1805,12 @@ cl_error_t cli_unzip(cli_ctx *ctx) free(tmpd); } - return ret; + return status; } cl_error_t unzip_single_internal(cli_ctx *ctx, off_t local_header_offset, zip_cb zcb) { - cl_error_t ret = CL_CLEAN; + cl_error_t ret = CL_SUCCESS; unsigned int num_files_unzipped = 0; uint32_t fsize; @@ -1380,25 +1823,26 @@ cl_error_t unzip_single_internal(cli_ctx *ctx, off_t local_header_offset, zip_cb ((sizeof(off_t) != sizeof(uint32_t)) && ((size_t)fsize != map->len - local_header_offset))) { cli_dbgmsg("cli_unzip: bad offset\n"); - return CL_CLEAN; + return CL_SUCCESS; } if (fsize < SIZEOF_LOCAL_HEADER) { cli_dbgmsg("cli_unzip: file too short\n"); - return CL_CLEAN; + return CL_SUCCESS; } - parse_local_file_header(map, - local_header_offset, - fsize, - &num_files_unzipped, - 0, - NULL, - &ret, - ctx, - NULL, - 0, - zcb, - NULL); + parse_local_file_header( + map, + local_header_offset, + fsize, + &num_files_unzipped, + 0, + NULL, + &ret, + ctx, + NULL, + 0, + zcb, + NULL); return ret; } @@ -1433,7 +1877,7 @@ cl_error_t unzip_search(cli_ctx *ctx, fmap_t *map, struct zip_requests *requests size_t fsize; uint32_t coff = 0; const char *ptr; - cl_error_t ret = CL_CLEAN; + cl_error_t ret = CL_SUCCESS; uint32_t toval = 0; cli_dbgmsg("in unzip_search\n"); @@ -1448,11 +1892,11 @@ cl_error_t unzip_search(cli_ctx *ctx, fmap_t *map, struct zip_requests *requests fsize = zmap->len; if (sizeof(off_t) != sizeof(uint32_t) && fsize != zmap->len) { cli_dbgmsg("unzip_search: file too big\n"); - return CL_CLEAN; + return CL_SUCCESS; } if (fsize < SIZEOF_CENTRAL_HEADER) { cli_dbgmsg("unzip_search: file too short\n"); - return CL_CLEAN; + return CL_SUCCESS; } for (coff = fsize - 22; coff > 0; coff--) { /* sizeof(EOC)==22 */ @@ -1468,16 +1912,18 @@ cl_error_t unzip_search(cli_ctx *ctx, fmap_t *map, struct zip_requests *requests if (coff) { cli_dbgmsg("unzip_search: central directory header offset: @%x\n", coff); - while (ret == CL_CLEAN && (coff = parse_central_directory_file_header(zmap, - coff, - fsize, - NULL, - file_count + 1, - &ret, - ctx, - NULL, - requests, - NULL))) { + while ((ret == CL_SUCCESS) && + (coff = parse_central_directory_file_header( + zmap, + coff, + fsize, + NULL, + file_count + 1, + &ret, + ctx, + NULL, + requests, + NULL))) { if (requests->match) { ret = CL_VIRUS; } diff --git a/unit_tests/clamscan/allmatch_test.py b/unit_tests/clamscan/allmatch_test.py index d4166ea9c7..41b2af0c2e 100644 --- a/unit_tests/clamscan/allmatch_test.py +++ b/unit_tests/clamscan/allmatch_test.py @@ -7,6 +7,7 @@ import os from zipfile import ZIP_DEFLATED, ZipFile import sys +import hashlib sys.path.append('../unit_tests') import testcase @@ -274,6 +275,192 @@ def test_exe_pattern_plus_zipsfx(self): ] self.verify_output(output.out, expected=expected_results) + def test_zip_plus_zip(self): + self.step_name('Test that clam will the clam.zip and also another zip concatenated to the end.') + + # Build a file that is the clam.zip archive with a zip concatenated on that contains the not_eicar test string file. + clam_zip = TC.path_build / 'unit_tests' / 'input' / 'clamav_hdb_scanfiles' / 'clam.zip' + + not_eicar_zip = TC.path_tmp / 'not-eicar.zip' + with ZipFile(str(not_eicar_zip), 'w', ZIP_DEFLATED) as zf: + zf.writestr('not-eicar.txt', b"CLAMAV-TEST-STRING-NOT-EICAR") + + testfile = TC.path_tmp / 'clam.zip.not_eicar.zipsfx' + testfile.write_bytes(clam_zip.read_bytes() + not_eicar_zip.read_bytes()) + + command = '{valgrind} {valgrind_args} {clamscan} -d {clam_exe_db} -d {not_eicar_db} --allmatch {testfiles}'.format( + valgrind=TC.valgrind, valgrind_args=TC.valgrind_args, clamscan=TC.clamscan, + # We can't use the hash sig for this clam.exe program because the hash goes out the window when we concatenate on the zip. + clam_exe_db=TC.path_db / 'clam.ndb', + not_eicar_db=TC.path_source / 'unit_tests' / 'input' / 'other_sigs' / 'Clamav-Unit-Test-Signature.ndb', + testfiles=testfile, + ) + output = self.execute_command(command) + + assert output.ec == 1 # virus + + expected_results = [ + 'Test.NDB.UNOFFICIAL FOUND', + 'NDB.Clamav-Unit-Test-Signature.UNOFFICIAL FOUND', + ] + self.verify_output(output.out, expected=expected_results) + + def test_zip_all_files(self): + self.step_name('Test that clam will extract all files from a zip.') + + testfile = TC.path_tmp / 'multi-file.zip' + with ZipFile(str(testfile), 'w', ZIP_DEFLATED) as zf: + zf.writestr('file-0.txt', b"Test file 0") + zf.writestr('file-1.txt', b"Test file 1") + zf.writestr('file-2.txt', b"Test file 2") + zf.writestr('file-3.txt', b"Test file 3") + + # Calculate sha256 and len for all files + sha256s = {} + with ZipFile(str(testfile), 'r') as zf: + for name in zf.namelist(): + data = zf.read(name) + sha256s[name] = ( hashlib.sha256(data).hexdigest(), len(data) ) + + # Make sha256 signatures for all files + with open(TC.path_db / 'missing_entries.hsb', 'w') as f: + for name, data in sha256s.items(): + f.write(f"{data[0]}:{data[1]}:{name}.NDB:73\n") + + command = '{valgrind} {valgrind_args} {clamscan} -d {missing_entries_db} --allmatch {testfiles}'.format( + valgrind=TC.valgrind, valgrind_args=TC.valgrind_args, clamscan=TC.clamscan, + # We can't use the hash sig for this clam.exe program because the hash goes out the window when we concatenate on the zip. + missing_entries_db=TC.path_db / 'missing_entries.hsb', + testfiles=testfile, + ) + output = self.execute_command(command) + + assert output.ec == 1 # virus + + expected_results = [ + 'file-0.txt.NDB.UNOFFICIAL FOUND', + 'file-1.txt.NDB.UNOFFICIAL FOUND', + 'file-2.txt.NDB.UNOFFICIAL FOUND', + 'file-3.txt.NDB.UNOFFICIAL FOUND', + ] + self.verify_output(output.out, expected=expected_results) + + def test_zip_no_central_directory(self): + self.step_name('Test that clam will extract files from a zip with no central directory.') + + testfile = TC.path_tmp / 'multi-file-no-central.zip' + with ZipFile(str(testfile), 'w', ZIP_DEFLATED) as zf: + zf.writestr('file-0.txt', b"Test file 0") + zf.writestr('file-1.txt', b"Test file 1") + zf.writestr('file-2.txt', b"Test file 2") + zf.writestr('file-3.txt', b"Test file 3") + + # Calculate sha256 and len for all files + sha256s = {} + with ZipFile(str(testfile), 'r') as zf: + for name in zf.namelist(): + data = zf.read(name) + sha256s[name] = ( hashlib.sha256(data).hexdigest(), len(data) ) + + # Make sha256 signatures for all files + with open(TC.path_db / 'missing_entries.hsb', 'w') as f: + for name, data in sha256s.items(): + f.write(f"{data[0]}:{data[1]}:{name}.NDB:73\n") + + # Remove the central directory + with open(str(testfile), 'r+b') as f: + # find the start of the central directory, which has a 4-byte signature 'PK\x05\x06' + while f.read(4) != b'PK\x01\x02': + pass + # rewind 4 bytes + f.seek(-4, os.SEEK_CUR) + # truncate the central directory + f.truncate() + + command = '{valgrind} {valgrind_args} {clamscan} -d {missing_entries_db} --allmatch {testfiles}'.format( + valgrind=TC.valgrind, valgrind_args=TC.valgrind_args, clamscan=TC.clamscan, + # We can't use the hash sig for this clam.exe program because the hash goes out the window when we concatenate on the zip. + missing_entries_db=TC.path_db / 'missing_entries.hsb', + testfiles=testfile, + ) + output = self.execute_command(command) + + assert output.ec == 1 # virus + + expected_results = [ + 'file-0.txt.NDB.UNOFFICIAL FOUND', + 'file-1.txt.NDB.UNOFFICIAL FOUND', + 'file-2.txt.NDB.UNOFFICIAL FOUND', + 'file-3.txt.NDB.UNOFFICIAL FOUND', + ] + self.verify_output(output.out, expected=expected_results) + + def test_zip_missing_centrals(self): + self.step_name('Test that clam will detect files omitted from zip central directory.') + + testfile = TC.path_tmp / 'multi-file-missing-centrals.zip' + with ZipFile(str(testfile), 'w', ZIP_DEFLATED) as zf: + zf.writestr('file-0.txt', b"Test file 0") + zf.writestr('file-1.txt', b"Test file 1") + zf.writestr('file-2.txt', b"Test file 2") + zf.writestr('file-3.txt', b"Test file 3") + + # Calculate sha256 and len for all files + sha256s = {} + with ZipFile(str(testfile), 'r') as zf: + for name in zf.namelist(): + data = zf.read(name) + sha256s[name] = ( hashlib.sha256(data).hexdigest(), len(data) ) + + # Make sha256 signatures for all files + with open(TC.path_db / 'missing_entries.hsb', 'w') as f: + for name, data in sha256s.items(): + f.write(f"{data[0]}:{data[1]}:{name}.NDB:73\n") + + # Remove the central directory entries for file-2.txt and file-4.txt + with open(str(testfile), 'r+b') as f: + # find the first central directory record. Each will have a 4-byte signature 'PK\x01\x02' + while f.read(4) != b'PK\x01\x02': + # rewind 3 bytes, because it might not be aligned + f.seek(-3, os.SEEK_CUR) + + # get the offset + central_dir_offset = f.tell() + + # read the central directory + central_dir = f.read() + + # truncate the central directory + f.truncate(central_dir_offset) + + # seek to the end of the file + f.seek(0, os.SEEK_END) + + # write just the central directory entries for file-1.txt and file-3.txt + split_central_dir = central_dir.split(b'PK\x01\x02') + #f.write(split_central_dir[0]) + f.write(split_central_dir[1]) + #f.write(split_central_dir[2]) + f.write(split_central_dir[3]) # note the last one also has the end of central directory record. That's fine. + + command = '{valgrind} {valgrind_args} {clamscan} -d {missing_entries_db} --allmatch {testfiles}'.format( + valgrind=TC.valgrind, valgrind_args=TC.valgrind_args, clamscan=TC.clamscan, + # We can't use the hash sig for this clam.exe program because the hash goes out the window when we concatenate on the zip. + missing_entries_db=TC.path_db / 'missing_entries.hsb', + testfiles=testfile, + ) + output = self.execute_command(command) + + assert output.ec == 1 # virus + + expected_results = [ + 'file-0.txt.NDB.UNOFFICIAL FOUND', + 'file-1.txt.NDB.UNOFFICIAL FOUND', + 'file-2.txt.NDB.UNOFFICIAL FOUND', + 'file-3.txt.NDB.UNOFFICIAL FOUND', + ] + self.verify_output(output.out, expected=expected_results) + def test_pe_allmatch(self): self.step_name('Test that clam will detect a string in test.exe with a wide variety of signatures written or generated for the file.')