Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 24 additions & 10 deletions libclamav/readdb.c
Original file line number Diff line number Diff line change
Expand Up @@ -340,23 +340,37 @@ static cl_error_t readdb_load_regex_subsignature(struct cli_matcher *root, const
char *end = NULL;

const char *trigger, *pattern, *cflags;
int subtokens_count;

// The maximum number of `:` delimited fields in a regex subsignature.
#define MAX_REGEX_SUB_TOKENS 4
char *subtokens[MAX_REGEX_SUB_TOKENS + 1];
const char *sig;

subtokens_count = cli_ldbtokenize(hexsig, ':', MAX_REGEX_SUB_TOKENS + 1, (const char **)subtokens, 0);
if (!subtokens_count) {
cli_errmsg("Invalid or unsupported ldb subsignature format\n");
status = CL_EMALFDB;
goto done;
}
if (0 == strncmp(virname, "YARA", 4)) {
// Do not tokenize for ':' in yara regex strings. ':' do not have special meaning in yara regex strings.
// Also, Yara regex strings may use '/' without escape characters, which confuses the "within_pcre" feature of `cli_ldbtokenize()`.
sig = hexsig;

if ((subtokens_count % 2) == 0)
offset = subtokens[0];
} else {
// LDB PCRE subsignatures have this structure:
// [Offset:]Trigger/PCRE/[Flags]
// We need to split on the ':' character in case the offset was specified.

sig = (subtokens_count % 2) ? subtokens[0] : subtokens[1];
size_t subtokens_count = cli_ldbtokenize(hexsig, ':', MAX_REGEX_SUB_TOKENS + 1, (const char **)subtokens, 0);
if (!subtokens_count) {
cli_errmsg("Invalid or unsupported ldb subsignature format\n");
status = CL_EMALFDB;
goto done;
}

if (subtokens_count == 2) {
// Offset was specified
offset = subtokens[0];
sig = subtokens[1];
} else {
sig = subtokens[0];
}
}

/* get copied */
hexcpy = cli_strdup(sig);
Expand Down
37 changes: 22 additions & 15 deletions libclamav/str.c
Original file line number Diff line number Diff line change
Expand Up @@ -857,33 +857,40 @@ cl_error_t cli_strntoul_wrap(const char *buf, size_t buf_size,
size_t cli_ldbtokenize(char *buffer, const char delim, const size_t token_count,
const char **tokens, size_t token_skip)
{
size_t tokens_found, i;
char *start = buffer;
size_t tokens_found = 0;
size_t token_index = 0;
size_t buffer_index = 0;
bool within_pcre = false;

for (tokens_found = 0; tokens_found < token_count;) {
tokens[tokens_found++] = buffer;
while (tokens_found < token_count) {
tokens[tokens_found++] = &buffer[buffer_index];

while (*buffer != '\0') {
if (*buffer == delim) {
while (buffer[buffer_index] != '\0') {
if (!within_pcre && (buffer[buffer_index] == delim)) {
break;
} else if ((tokens_found > token_skip) &&
((buffer > start) && (*(buffer - 1) != '\\')) &&
(*buffer == '/')) {
return tokens_found;
// LDB PCRE rules must escape the '/' character with a '\'.
// If the character sequence is "\/", then we are still within the PCRE string.
((buffer_index > 0) && (buffer[buffer_index - 1] != '\\')) && (buffer[buffer_index] == '/')) {
within_pcre = !within_pcre;
}
buffer++;
buffer_index++;
}

if (*buffer != '\0') {
*buffer++ = '\0';
if (buffer[buffer_index] != '\0') {
buffer[buffer_index] = '\0';
buffer_index++;
} else {
i = tokens_found;
while (i < token_count) {
tokens[i++] = NULL;
token_index = tokens_found;
while (token_index < token_count) {
tokens[token_index] = NULL;
token_index++;
}

return tokens_found;
}
}

return tokens_found;
}

Expand Down
165 changes: 164 additions & 1 deletion unit_tests/clamscan_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -571,7 +571,7 @@ def test_clamscan_17_pcre_slash_colon(self):
#
yara_db = TC.path_tmp / 'regex-slash-colon.ldb'
yara_db.write_text(
r'regex;Engine:81-255,Target:0;1;68656c6c6f20;0/hello blee/blah: bleh/'
r'regex;Engine:81-255,Target:0;1;68656c6c6f20;0/hello blee\/blah: bleh/'
)
command = '{valgrind} {valgrind_args} {clamscan} -d {path_db} {testfiles}'.format(
valgrind=TC.valgrind, valgrind_args=TC.valgrind_args, clamscan=TC.clamscan, path_db=yara_db, testfiles=testfile,
Expand Down Expand Up @@ -616,3 +616,166 @@ def test_clamscan_17_pcre_slash_colon(self):
'Infected files: 1',
]
self.verify_output(output.out, expected=expected_results)

def test_clamscan_18_ldb_offset_pcre(self):
self.step_name('Test LDB regex rules with an offset')
# The offset feature starts the match some # of bytes after start of the pattern match
# The offset is EXACT, meaning it's no longer wildcard.
# The match must occur exactly that number of bytes from the start of the file.

# using 'MZ' prefix so it is detected as MSEXE and not TEXT. This is to avoid normalization.
testfile = TC.path_tmp / 'ldb_offset_pcre'
testfile.write_text('MZ hello blee')

# First without the offset, make sure it matches
yara_db = TC.path_tmp / 'ldb_pcre_no_offset.ldb'
yara_db.write_text(
r'ldb_pcre_no_offset;Engine:81-255,Target:0;0&1;68656c6c6f20;0/hello blee/'
)
command = '{valgrind} {valgrind_args} {clamscan} -d {path_db} {testfiles}'.format(
valgrind=TC.valgrind, valgrind_args=TC.valgrind_args, clamscan=TC.clamscan, path_db=yara_db, testfiles=testfile,
)
output = self.execute_command(command)

assert output.ec == 1 # virus found

expected_results = [
'ldb_offset_pcre: ldb_pcre_no_offset.UNOFFICIAL FOUND',
'Infected files: 1',
]

# Next, with the offset, but it won't match, because the regex pattern is "hello blee"
# and with the offset of 5 (from start of file) means it should start the pcre matching at "llo blee"
yara_db = TC.path_tmp / 'ldb_pcre_offset_no_match.ldb'
yara_db.write_text(
r'ldb_pcre_offset_no_match;Engine:81-255,Target:0;0&1;68656c6c6f20;5:0/hello blee/'
)
command = '{valgrind} {valgrind_args} {clamscan} -d {path_db} {testfiles}'.format(
valgrind=TC.valgrind, valgrind_args=TC.valgrind_args, clamscan=TC.clamscan, path_db=yara_db, testfiles=testfile,
)
output = self.execute_command(command)

assert output.ec == 0 # virus NOT found

expected_results = [
'ldb_offset_pcre: OK',
]

# Next, with the offset, and it SHOULD match, because the regex pattern is "llo blee"
# and with the offset of 5 (from start of file) means it should start the pcre matching at "llo blee"
yara_db = TC.path_tmp / 'ldb_pcre_offset_match.ldb'
yara_db.write_text(
r'ldb_pcre_offset_match;Engine:81-255,Target:0;0&1;68656c6c6f20;5:0/llo blee/'
)
command = '{valgrind} {valgrind_args} {clamscan} -d {path_db} {testfiles}'.format(
valgrind=TC.valgrind, valgrind_args=TC.valgrind_args, clamscan=TC.clamscan, path_db=yara_db, testfiles=testfile,
)
output = self.execute_command(command)

assert output.ec == 1 # virus found

expected_results = [
'ldb_offset_pcre: ldb_pcre_offset_match.UNOFFICIAL FOUND',
'Infected files: 1',
]

def test_clamscan_18_ldb_pcre_flag(self):
self.step_name('Test LDB regex rules with case insensitive flag')
# This test validates that the flags field is, and more specifically the case-insensitive flag is working.

# using 'MZ' prefix so it is detected as MSEXE and not TEXT. This is to avoid normalization.
testfile = TC.path_tmp / 'ldb_pcre_flag'
testfile.write_text('MZ hello blee / BlAh')

# First test withOUT the case-insensitive flag. It should NOT match.
yara_db = TC.path_tmp / 'ldb_pcre_case.ldb'
yara_db.write_text(
r'ldb_pcre_case;Engine:81-255,Target:0;0&1;68656c6c6f20;0/blah/'
)
command = '{valgrind} {valgrind_args} {clamscan} -d {path_db} {testfiles}'.format(
valgrind=TC.valgrind, valgrind_args=TC.valgrind_args, clamscan=TC.clamscan, path_db=yara_db, testfiles=testfile,
)
output = self.execute_command(command)

assert output.ec == 0 # virus NOT found

expected_results = [
'ldb_pcre_flag: OK',
]

# First test WITH the case-insensitive flag. It SHOULD match.
yara_db = TC.path_tmp / 'ldb_pcre_nocase.ldb'
yara_db.write_text(
r'ldb_pcre_nocase;Engine:81-255,Target:0;0&1;68656c6c6f20;0/blah/i'
)
command = '{valgrind} {valgrind_args} {clamscan} -d {path_db} {testfiles}'.format(
valgrind=TC.valgrind, valgrind_args=TC.valgrind_args, clamscan=TC.clamscan, path_db=yara_db, testfiles=testfile,
)
output = self.execute_command(command)

assert output.ec == 1 # virus found

expected_results = [
'ldb_pcre_flag: ldb_pcre_nocase.UNOFFICIAL FOUND',
'Infected files: 1',
]

def test_clamscan_18_ldb_multi_pcre(self):
self.step_name('Test LDB and Yara regex rules with / and : in the string work')
# This is a regression test for a bug where :'s in a PCRE regex would act
# as delimiters if there was also a / in the regex before the :

# using 'MZ' prefix so it is detected as MSEXE and not TEXT. This is to avoid normalization.
testfile = TC.path_tmp / 'ldb_multi_pcre'
testfile.write_text('MZ hello blee / BlAh')

# Verify first with two subsigs that should match, that the alert has found.
yara_db = TC.path_tmp / 'ldb_multi_pcre.ldb'
yara_db.write_text(
r'ldb_multi_pcre;Engine:81-255,Target:0;0&1&2;68656c6c6f20;0/hello blee/;0/blah/i'
)
command = '{valgrind} {valgrind_args} {clamscan} -d {path_db} {testfiles}'.format(
valgrind=TC.valgrind, valgrind_args=TC.valgrind_args, clamscan=TC.clamscan, path_db=yara_db, testfiles=testfile,
)
output = self.execute_command(command)

assert output.ec == 1 # virus found

expected_results = [
'ldb_multi_pcre: ldb_multi_pcre.UNOFFICIAL FOUND',
'Infected files: 1',
]

# Verify next that if one of the two subsigs do not match, the whole thing does not match.
yara_db = TC.path_tmp / 'ldb_multi_pcre.ldb'
yara_db.write_text(
r'ldb_multi_pcre;Engine:81-255,Target:0;0&1&2;68656c6c6f20;0/hello blee/;0/bloh/i'
)
command = '{valgrind} {valgrind_args} {clamscan} -d {path_db} {testfiles}'.format(
valgrind=TC.valgrind, valgrind_args=TC.valgrind_args, clamscan=TC.clamscan, path_db=yara_db, testfiles=testfile,
)
output = self.execute_command(command)

assert output.ec == 0 # virus NOT found

expected_results = [
'ldb_multi_pcre: OK',
'Infected files: 0',
]

# Verify next that if the other of the two subsigs do not match, the whole thing does not match.
yara_db = TC.path_tmp / 'ldb_multi_pcre.ldb'
yara_db.write_text(
r'ldb_multi_pcre;Engine:81-255,Target:0;0&1&2;68656c6c6f20;0/hella blee/;0/blah/i'
)
command = '{valgrind} {valgrind_args} {clamscan} -d {path_db} {testfiles}'.format(
valgrind=TC.valgrind, valgrind_args=TC.valgrind_args, clamscan=TC.clamscan, path_db=yara_db, testfiles=testfile,
)
output = self.execute_command(command)

assert output.ec == 0 # virus NOT found

expected_results = [
'ldb_multi_pcre: OK',
'Infected files: 0',
]