diff --git a/libclamav/readdb.c b/libclamav/readdb.c index 3051489fc6..a7d0e996d2 100644 --- a/libclamav/readdb.c +++ b/libclamav/readdb.c @@ -335,23 +335,37 @@ static cl_error_t readdb_load_regex_subsignature(struct cli_matcher *root, const char *end = NULL; const char *trigger, *pattern, *cflags; - int subtokens_count; + // The maximum number of `:` delimited fields in a regex subsignature. #define MAX_REGEX_SUB_TOKENS 4 char *subtokens[MAX_REGEX_SUB_TOKENS + 1]; const char *sig; - subtokens_count = cli_ldbtokenize(hexsig, ':', MAX_REGEX_SUB_TOKENS + 1, (const char **)subtokens, 0); - if (!subtokens_count) { - cli_errmsg("Invalid or unsupported ldb subsignature format\n"); - status = CL_EMALFDB; - goto done; - } + if (0 == strncmp(virname, "YARA", 4)) { + // Do not tokenize for ':' in yara regex strings. ':' do not have special meaning in yara regex strings. + // Also, Yara regex strings may use '/' without escape characters, which confuses the "within_pcre" feature of `cli_ldbtokenize()`. + sig = hexsig; - if ((subtokens_count % 2) == 0) - offset = subtokens[0]; + } else { + // LDB PCRE subsignatures have this structure: + // [Offset:]Trigger/PCRE/[Flags] + // We need to split on the ':' character in case the offset was specified. - sig = (subtokens_count % 2) ? subtokens[0] : subtokens[1]; + size_t subtokens_count = cli_ldbtokenize(hexsig, ':', MAX_REGEX_SUB_TOKENS + 1, (const char **)subtokens, 0); + if (!subtokens_count) { + cli_errmsg("Invalid or unsupported ldb subsignature format\n"); + status = CL_EMALFDB; + goto done; + } + + if (subtokens_count == 2) { + // Offset was specified + offset = subtokens[0]; + sig = subtokens[1]; + } else { + sig = subtokens[0]; + } + } /* get copied */ hexcpy = cli_strdup(sig); diff --git a/libclamav/str.c b/libclamav/str.c index 2c0266fa65..7fd015794a 100644 --- a/libclamav/str.c +++ b/libclamav/str.c @@ -857,32 +857,40 @@ cl_error_t cli_strntoul_wrap(const char *buf, size_t buf_size, size_t cli_ldbtokenize(char *buffer, const char delim, const size_t token_count, const char **tokens, size_t token_skip) { - size_t tokens_found, i; - int within_pcre = 0; - char *start = buffer; + size_t tokens_found = 0; + size_t token_index = 0; + size_t buffer_index = 0; + bool within_pcre = false; - for (tokens_found = 0; tokens_found < token_count;) { - tokens[tokens_found++] = buffer; + while (tokens_found < token_count) { + tokens[tokens_found++] = &buffer[buffer_index]; - while (*buffer != '\0') { - if (!within_pcre && (*buffer == delim)) + while (buffer[buffer_index] != '\0') { + if (!within_pcre && (buffer[buffer_index] == delim)) { break; - else if ((tokens_found > token_skip) && - ((buffer > start) && (*(buffer - 1) != '\\')) && - (*buffer == '/')) + } else if ((tokens_found > token_skip) && + // LDB PCRE rules must escape the '/' character with a '\'. + // If the character sequence is "\/", then we are still within the PCRE string. + ((buffer_index > 0) && (buffer[buffer_index - 1] != '\\')) && (buffer[buffer_index] == '/')) { within_pcre = !within_pcre; - buffer++; + } + buffer_index++; } - if (*buffer != '\0') { - *buffer++ = '\0'; + if (buffer[buffer_index] != '\0') { + buffer[buffer_index] = '\0'; + buffer_index++; } else { - i = tokens_found; - while (i < token_count) - tokens[i++] = NULL; + token_index = tokens_found; + while (token_index < token_count) { + tokens[token_index] = NULL; + token_index++; + } + return tokens_found; } } + return tokens_found; } diff --git a/libclamav_rust/src/sys.rs b/libclamav_rust/src/sys.rs index d4c4e2e5f0..575e5dd801 100644 --- a/libclamav_rust/src/sys.rs +++ b/libclamav_rust/src/sys.rs @@ -743,6 +743,7 @@ pub struct recursion_level_tag { pub recursion_level_buffer_fmap: u32, pub is_normalized_layer: bool, pub image_fuzzy_hash: image_fuzzy_hash_t, + pub calculated_image_fuzzy_hash: bool, } pub type recursion_level_t = recursion_level_tag; #[repr(C)] diff --git a/unit_tests/clamscan_test.py b/unit_tests/clamscan_test.py index 2164a5e201..b77aabd9f4 100644 --- a/unit_tests/clamscan_test.py +++ b/unit_tests/clamscan_test.py @@ -558,3 +558,224 @@ def test_clamscan_16_intermediates(self): 'v1rusv1rus.7z.zip: 7z_zip_intermediates_bad.UNOFFICIAL FOUND', ] self.verify_output(output.out, expected=expected_stdout, unexpected=unexpected_stdout) + + def test_clamscan_17_pcre_slash_colon(self): + self.step_name('Test LDB and Yara regex rules with / and : in the string work') + # This is a regression test for a bug where :'s in a PCRE regex would act + # as delimiters if there was also a / in the regex before the : + + testfile = TC.path_tmp / 'regex-slash-colon.sample' + testfile.write_text('hello blee/blah: bleh') + + # First test with LDB PCRE rule + # + yara_db = TC.path_tmp / 'regex-slash-colon.ldb' + yara_db.write_text( + r'regex;Engine:81-255,Target:0;1;68656c6c6f20;0/hello blee\/blah: bleh/' + ) + command = '{valgrind} {valgrind_args} {clamscan} -d {path_db} {testfiles}'.format( + valgrind=TC.valgrind, valgrind_args=TC.valgrind_args, clamscan=TC.clamscan, path_db=yara_db, testfiles=testfile, + ) + output = self.execute_command(command) + + assert output.ec == 1 # virus found + + expected_results = [ + 'regex-slash-colon.sample: regex.UNOFFICIAL FOUND', + 'Infected files: 1', + ] + self.verify_output(output.out, expected=expected_results) + + # Second test with YARA regex rule + # + yara_db = TC.path_tmp / 'regex-slash-colon.yara' + yara_db.write_text( + r''' +rule regex +{ + meta: + author = "Micah" + date = "2022/07/25" + description = "Just a test" + strings: + $b = /hello blee\/blah: bleh/ + condition: + all of them +} + ''' + ) + command = '{valgrind} {valgrind_args} {clamscan} -d {path_db} {testfiles}'.format( + valgrind=TC.valgrind, valgrind_args=TC.valgrind_args, clamscan=TC.clamscan, path_db=yara_db, testfiles=testfile, + ) + output = self.execute_command(command) + + assert output.ec == 1 # virus found + + expected_results = [ + 'regex-slash-colon.sample: YARA.regex.UNOFFICIAL FOUND', + 'Infected files: 1', + ] + self.verify_output(output.out, expected=expected_results) + + def test_clamscan_18_ldb_offset_pcre(self): + self.step_name('Test LDB regex rules with an offset') + # The offset feature starts the match some # of bytes after start of the pattern match + # The offset is EXACT, meaning it's no longer wildcard. + # The match must occur exactly that number of bytes from the start of the file. + + # using 'MZ' prefix so it is detected as MSEXE and not TEXT. This is to avoid normalization. + testfile = TC.path_tmp / 'ldb_offset_pcre' + testfile.write_text('MZ hello blee') + + # First without the offset, make sure it matches + yara_db = TC.path_tmp / 'ldb_pcre_no_offset.ldb' + yara_db.write_text( + r'ldb_pcre_no_offset;Engine:81-255,Target:0;0&1;68656c6c6f20;0/hello blee/' + ) + command = '{valgrind} {valgrind_args} {clamscan} -d {path_db} {testfiles}'.format( + valgrind=TC.valgrind, valgrind_args=TC.valgrind_args, clamscan=TC.clamscan, path_db=yara_db, testfiles=testfile, + ) + output = self.execute_command(command) + + assert output.ec == 1 # virus found + + expected_results = [ + 'ldb_offset_pcre: ldb_pcre_no_offset.UNOFFICIAL FOUND', + 'Infected files: 1', + ] + + # Next, with the offset, but it won't match, because the regex pattern is "hello blee" + # and with the offset of 5 (from start of file) means it should start the pcre matching at "llo blee" + yara_db = TC.path_tmp / 'ldb_pcre_offset_no_match.ldb' + yara_db.write_text( + r'ldb_pcre_offset_no_match;Engine:81-255,Target:0;0&1;68656c6c6f20;5:0/hello blee/' + ) + command = '{valgrind} {valgrind_args} {clamscan} -d {path_db} {testfiles}'.format( + valgrind=TC.valgrind, valgrind_args=TC.valgrind_args, clamscan=TC.clamscan, path_db=yara_db, testfiles=testfile, + ) + output = self.execute_command(command) + + assert output.ec == 0 # virus NOT found + + expected_results = [ + 'ldb_offset_pcre: OK', + ] + + # Next, with the offset, and it SHOULD match, because the regex pattern is "llo blee" + # and with the offset of 5 (from start of file) means it should start the pcre matching at "llo blee" + yara_db = TC.path_tmp / 'ldb_pcre_offset_match.ldb' + yara_db.write_text( + r'ldb_pcre_offset_match;Engine:81-255,Target:0;0&1;68656c6c6f20;5:0/llo blee/' + ) + command = '{valgrind} {valgrind_args} {clamscan} -d {path_db} {testfiles}'.format( + valgrind=TC.valgrind, valgrind_args=TC.valgrind_args, clamscan=TC.clamscan, path_db=yara_db, testfiles=testfile, + ) + output = self.execute_command(command) + + assert output.ec == 1 # virus found + + expected_results = [ + 'ldb_offset_pcre: ldb_pcre_offset_match.UNOFFICIAL FOUND', + 'Infected files: 1', + ] + + def test_clamscan_18_ldb_pcre_flag(self): + self.step_name('Test LDB regex rules with case insensitive flag') + # This test validates that the flags field is, and more specifically the case-insensitive flag is working. + + # using 'MZ' prefix so it is detected as MSEXE and not TEXT. This is to avoid normalization. + testfile = TC.path_tmp / 'ldb_pcre_flag' + testfile.write_text('MZ hello blee / BlAh') + + # First test withOUT the case-insensitive flag. It should NOT match. + yara_db = TC.path_tmp / 'ldb_pcre_case.ldb' + yara_db.write_text( + r'ldb_pcre_case;Engine:81-255,Target:0;0&1;68656c6c6f20;0/blah/' + ) + command = '{valgrind} {valgrind_args} {clamscan} -d {path_db} {testfiles}'.format( + valgrind=TC.valgrind, valgrind_args=TC.valgrind_args, clamscan=TC.clamscan, path_db=yara_db, testfiles=testfile, + ) + output = self.execute_command(command) + + assert output.ec == 0 # virus NOT found + + expected_results = [ + 'ldb_pcre_flag: OK', + ] + + # First test WITH the case-insensitive flag. It SHOULD match. + yara_db = TC.path_tmp / 'ldb_pcre_nocase.ldb' + yara_db.write_text( + r'ldb_pcre_nocase;Engine:81-255,Target:0;0&1;68656c6c6f20;0/blah/i' + ) + command = '{valgrind} {valgrind_args} {clamscan} -d {path_db} {testfiles}'.format( + valgrind=TC.valgrind, valgrind_args=TC.valgrind_args, clamscan=TC.clamscan, path_db=yara_db, testfiles=testfile, + ) + output = self.execute_command(command) + + assert output.ec == 1 # virus found + + expected_results = [ + 'ldb_pcre_flag: ldb_pcre_nocase.UNOFFICIAL FOUND', + 'Infected files: 1', + ] + + def test_clamscan_18_ldb_multi_pcre(self): + self.step_name('Test LDB and Yara regex rules with / and : in the string work') + # This is a regression test for a bug where :'s in a PCRE regex would act + # as delimiters if there was also a / in the regex before the : + + # using 'MZ' prefix so it is detected as MSEXE and not TEXT. This is to avoid normalization. + testfile = TC.path_tmp / 'ldb_multi_pcre' + testfile.write_text('MZ hello blee / BlAh') + + # Verify first with two subsigs that should match, that the alert has found. + yara_db = TC.path_tmp / 'ldb_multi_pcre.ldb' + yara_db.write_text( + r'ldb_multi_pcre;Engine:81-255,Target:0;0&1&2;68656c6c6f20;0/hello blee/;0/blah/i' + ) + command = '{valgrind} {valgrind_args} {clamscan} -d {path_db} {testfiles}'.format( + valgrind=TC.valgrind, valgrind_args=TC.valgrind_args, clamscan=TC.clamscan, path_db=yara_db, testfiles=testfile, + ) + output = self.execute_command(command) + + assert output.ec == 1 # virus found + + expected_results = [ + 'ldb_multi_pcre: ldb_multi_pcre.UNOFFICIAL FOUND', + 'Infected files: 1', + ] + + # Verify next that if one of the two subsigs do not match, the whole thing does not match. + yara_db = TC.path_tmp / 'ldb_multi_pcre.ldb' + yara_db.write_text( + r'ldb_multi_pcre;Engine:81-255,Target:0;0&1&2;68656c6c6f20;0/hello blee/;0/bloh/i' + ) + command = '{valgrind} {valgrind_args} {clamscan} -d {path_db} {testfiles}'.format( + valgrind=TC.valgrind, valgrind_args=TC.valgrind_args, clamscan=TC.clamscan, path_db=yara_db, testfiles=testfile, + ) + output = self.execute_command(command) + + assert output.ec == 0 # virus NOT found + + expected_results = [ + 'ldb_multi_pcre: OK', + 'Infected files: 0', + ] + + # Verify next that if the other of the two subsigs do not match, the whole thing does not match. + yara_db = TC.path_tmp / 'ldb_multi_pcre.ldb' + yara_db.write_text( + r'ldb_multi_pcre;Engine:81-255,Target:0;0&1&2;68656c6c6f20;0/hella blee/;0/blah/i' + ) + command = '{valgrind} {valgrind_args} {clamscan} -d {path_db} {testfiles}'.format( + valgrind=TC.valgrind, valgrind_args=TC.valgrind_args, clamscan=TC.clamscan, path_db=yara_db, testfiles=testfile, + ) + output = self.execute_command(command) + + assert output.ec == 0 # virus NOT found + + expected_results = [ + 'ldb_multi_pcre: OK', + 'Infected files: 0', + ]