Skip to content

Commit 63066b7

Browse files
Alexei Starovoitovanakryiko
authored andcommitted
selftests/bpf: Convert glob_match() to bpf arena
Increase arena test coverage. Convert glob_match() to bpf arena in two steps: 1. Copy paste lib/glob.c into bpf_arena_strsearch.h Copy paste lib/globtests.c into progs/arena_strsearch.c 2. Add __arena to pointers Add __arg_arena to global functions that accept arena pointers Add cond_break to loops The test also serves as a good example of what's possible with bpf arena and how existing algorithms can be converted. Signed-off-by: Alexei Starovoitov <[email protected]> Signed-off-by: Andrii Nakryiko <[email protected]> Link: https://lore.kernel.org/bpf/[email protected]
1 parent fea3f5e commit 63066b7

File tree

3 files changed

+304
-0
lines changed

3 files changed

+304
-0
lines changed
Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
2+
/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
3+
#pragma once
4+
#include "bpf_arena_common.h"
5+
6+
__noinline int bpf_arena_strlen(const char __arena *s __arg_arena)
7+
{
8+
const char __arena *sc;
9+
10+
for (sc = s; *sc != '\0'; ++sc)
11+
cond_break;
12+
return sc - s;
13+
}
14+
15+
/**
16+
* glob_match - Shell-style pattern matching, like !fnmatch(pat, str, 0)
17+
* @pat: Shell-style pattern to match, e.g. "*.[ch]".
18+
* @str: String to match. The pattern must match the entire string.
19+
*
20+
* Perform shell-style glob matching, returning true (1) if the match
21+
* succeeds, or false (0) if it fails. Equivalent to !fnmatch(@pat, @str, 0).
22+
*
23+
* Pattern metacharacters are ?, *, [ and \.
24+
* (And, inside character classes, !, - and ].)
25+
*
26+
* This is small and simple implementation intended for device blacklists
27+
* where a string is matched against a number of patterns. Thus, it
28+
* does not preprocess the patterns. It is non-recursive, and run-time
29+
* is at most quadratic: strlen(@str)*strlen(@pat).
30+
*
31+
* An example of the worst case is glob_match("*aaaaa", "aaaaaaaaaa");
32+
* it takes 6 passes over the pattern before matching the string.
33+
*
34+
* Like !fnmatch(@pat, @str, 0) and unlike the shell, this does NOT
35+
* treat / or leading . specially; it isn't actually used for pathnames.
36+
*
37+
* Note that according to glob(7) (and unlike bash), character classes
38+
* are complemented by a leading !; this does not support the regex-style
39+
* [^a-z] syntax.
40+
*
41+
* An opening bracket without a matching close is matched literally.
42+
*/
43+
__noinline bool glob_match(char const __arena *pat __arg_arena, char const __arena *str __arg_arena)
44+
{
45+
/*
46+
* Backtrack to previous * on mismatch and retry starting one
47+
* character later in the string. Because * matches all characters
48+
* (no exception for /), it can be easily proved that there's
49+
* never a need to backtrack multiple levels.
50+
*/
51+
char const __arena *back_pat = NULL, *back_str;
52+
53+
/*
54+
* Loop over each token (character or class) in pat, matching
55+
* it against the remaining unmatched tail of str. Return false
56+
* on mismatch, or true after matching the trailing nul bytes.
57+
*/
58+
for (;;) {
59+
unsigned char c = *str++;
60+
unsigned char d = *pat++;
61+
62+
switch (d) {
63+
case '?': /* Wildcard: anything but nul */
64+
if (c == '\0')
65+
return false;
66+
break;
67+
case '*': /* Any-length wildcard */
68+
if (*pat == '\0') /* Optimize trailing * case */
69+
return true;
70+
back_pat = pat;
71+
back_str = --str; /* Allow zero-length match */
72+
break;
73+
case '[': { /* Character class */
74+
bool match = false, inverted = (*pat == '!');
75+
char const __arena *class = pat + inverted;
76+
unsigned char a = *class++;
77+
78+
/*
79+
* Iterate over each span in the character class.
80+
* A span is either a single character a, or a
81+
* range a-b. The first span may begin with ']'.
82+
*/
83+
do {
84+
unsigned char b = a;
85+
86+
if (a == '\0') /* Malformed */
87+
goto literal;
88+
89+
if (class[0] == '-' && class[1] != ']') {
90+
b = class[1];
91+
92+
if (b == '\0')
93+
goto literal;
94+
95+
class += 2;
96+
/* Any special action if a > b? */
97+
}
98+
match |= (a <= c && c <= b);
99+
cond_break;
100+
} while ((a = *class++) != ']');
101+
102+
if (match == inverted)
103+
goto backtrack;
104+
pat = class;
105+
}
106+
break;
107+
case '\\':
108+
d = *pat++;
109+
__attribute__((__fallthrough__));
110+
default: /* Literal character */
111+
literal:
112+
if (c == d) {
113+
if (d == '\0')
114+
return true;
115+
break;
116+
}
117+
backtrack:
118+
if (c == '\0' || !back_pat)
119+
return false; /* No point continuing */
120+
/* Try again from last *, one character later in str. */
121+
pat = back_pat;
122+
str = ++back_str;
123+
break;
124+
}
125+
cond_break;
126+
}
127+
return false;
128+
}
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
3+
#include <test_progs.h>
4+
#include "arena_strsearch.skel.h"
5+
6+
static void test_arena_str(void)
7+
{
8+
LIBBPF_OPTS(bpf_test_run_opts, opts);
9+
struct arena_strsearch *skel;
10+
int ret;
11+
12+
skel = arena_strsearch__open_and_load();
13+
if (!ASSERT_OK_PTR(skel, "arena_strsearch__open_and_load"))
14+
return;
15+
16+
ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.arena_strsearch), &opts);
17+
ASSERT_OK(ret, "ret_add");
18+
ASSERT_OK(opts.retval, "retval");
19+
if (skel->bss->skip) {
20+
printf("%s:SKIP:compiler doesn't support arena_cast\n", __func__);
21+
test__skip();
22+
}
23+
arena_strsearch__destroy(skel);
24+
}
25+
26+
void test_arena_strsearch(void)
27+
{
28+
if (test__start_subtest("arena_strsearch"))
29+
test_arena_str();
30+
}
Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
3+
#include <vmlinux.h>
4+
#include "bpf_experimental.h"
5+
6+
struct {
7+
__uint(type, BPF_MAP_TYPE_ARENA);
8+
__uint(map_flags, BPF_F_MMAPABLE);
9+
__uint(max_entries, 100); /* number of pages */
10+
} arena SEC(".maps");
11+
12+
#include "bpf_arena_strsearch.h"
13+
14+
struct glob_test {
15+
char const __arena *pat, *str;
16+
bool expected;
17+
};
18+
19+
static bool test(char const __arena *pat, char const __arena *str, bool expected)
20+
{
21+
bool match = glob_match(pat, str);
22+
bool success = match == expected;
23+
24+
/* bpf_printk("glob_match %s %s res %d ok %d", pat, str, match, success); */
25+
return success;
26+
}
27+
28+
/*
29+
* The tests are all jammed together in one array to make it simpler
30+
* to place that array in the .init.rodata section. The obvious
31+
* "array of structures containing char *" has no way to force the
32+
* pointed-to strings to be in a particular section.
33+
*
34+
* Anyway, a test consists of:
35+
* 1. Expected glob_match result: '1' or '0'.
36+
* 2. Pattern to match: null-terminated string
37+
* 3. String to match against: null-terminated string
38+
*
39+
* The list of tests is terminated with a final '\0' instead of
40+
* a glob_match result character.
41+
*/
42+
static const char __arena glob_tests[] =
43+
/* Some basic tests */
44+
"1" "a\0" "a\0"
45+
"0" "a\0" "b\0"
46+
"0" "a\0" "aa\0"
47+
"0" "a\0" "\0"
48+
"1" "\0" "\0"
49+
"0" "\0" "a\0"
50+
/* Simple character class tests */
51+
"1" "[a]\0" "a\0"
52+
"0" "[a]\0" "b\0"
53+
"0" "[!a]\0" "a\0"
54+
"1" "[!a]\0" "b\0"
55+
"1" "[ab]\0" "a\0"
56+
"1" "[ab]\0" "b\0"
57+
"0" "[ab]\0" "c\0"
58+
"1" "[!ab]\0" "c\0"
59+
"1" "[a-c]\0" "b\0"
60+
"0" "[a-c]\0" "d\0"
61+
/* Corner cases in character class parsing */
62+
"1" "[a-c-e-g]\0" "-\0"
63+
"0" "[a-c-e-g]\0" "d\0"
64+
"1" "[a-c-e-g]\0" "f\0"
65+
"1" "[]a-ceg-ik[]\0" "a\0"
66+
"1" "[]a-ceg-ik[]\0" "]\0"
67+
"1" "[]a-ceg-ik[]\0" "[\0"
68+
"1" "[]a-ceg-ik[]\0" "h\0"
69+
"0" "[]a-ceg-ik[]\0" "f\0"
70+
"0" "[!]a-ceg-ik[]\0" "h\0"
71+
"0" "[!]a-ceg-ik[]\0" "]\0"
72+
"1" "[!]a-ceg-ik[]\0" "f\0"
73+
/* Simple wild cards */
74+
"1" "?\0" "a\0"
75+
"0" "?\0" "aa\0"
76+
"0" "??\0" "a\0"
77+
"1" "?x?\0" "axb\0"
78+
"0" "?x?\0" "abx\0"
79+
"0" "?x?\0" "xab\0"
80+
/* Asterisk wild cards (backtracking) */
81+
"0" "*??\0" "a\0"
82+
"1" "*??\0" "ab\0"
83+
"1" "*??\0" "abc\0"
84+
"1" "*??\0" "abcd\0"
85+
"0" "??*\0" "a\0"
86+
"1" "??*\0" "ab\0"
87+
"1" "??*\0" "abc\0"
88+
"1" "??*\0" "abcd\0"
89+
"0" "?*?\0" "a\0"
90+
"1" "?*?\0" "ab\0"
91+
"1" "?*?\0" "abc\0"
92+
"1" "?*?\0" "abcd\0"
93+
"1" "*b\0" "b\0"
94+
"1" "*b\0" "ab\0"
95+
"0" "*b\0" "ba\0"
96+
"1" "*b\0" "bb\0"
97+
"1" "*b\0" "abb\0"
98+
"1" "*b\0" "bab\0"
99+
"1" "*bc\0" "abbc\0"
100+
"1" "*bc\0" "bc\0"
101+
"1" "*bc\0" "bbc\0"
102+
"1" "*bc\0" "bcbc\0"
103+
/* Multiple asterisks (complex backtracking) */
104+
"1" "*ac*\0" "abacadaeafag\0"
105+
"1" "*ac*ae*ag*\0" "abacadaeafag\0"
106+
"1" "*a*b*[bc]*[ef]*g*\0" "abacadaeafag\0"
107+
"0" "*a*b*[ef]*[cd]*g*\0" "abacadaeafag\0"
108+
"1" "*abcd*\0" "abcabcabcabcdefg\0"
109+
"1" "*ab*cd*\0" "abcabcabcabcdefg\0"
110+
"1" "*abcd*abcdef*\0" "abcabcdabcdeabcdefg\0"
111+
"0" "*abcd*\0" "abcabcabcabcefg\0"
112+
"0" "*ab*cd*\0" "abcabcabcabcefg\0";
113+
114+
bool skip = false;
115+
116+
SEC("syscall")
117+
int arena_strsearch(void *ctx)
118+
{
119+
unsigned successes = 0;
120+
unsigned n = 0;
121+
char const __arena *p = glob_tests;
122+
123+
/*
124+
* Tests are jammed together in a string. The first byte is '1'
125+
* or '0' to indicate the expected outcome, or '\0' to indicate the
126+
* end of the tests. Then come two null-terminated strings: the
127+
* pattern and the string to match it against.
128+
*/
129+
while (*p) {
130+
bool expected = *p++ & 1;
131+
char const __arena *pat = p;
132+
133+
cond_break;
134+
p += bpf_arena_strlen(p) + 1;
135+
successes += test(pat, p, expected);
136+
p += bpf_arena_strlen(p) + 1;
137+
n++;
138+
}
139+
140+
n -= successes;
141+
/* bpf_printk("glob: %u self-tests passed, %u failed\n", successes, n); */
142+
143+
return n ? -1 : 0;
144+
}
145+
146+
char _license[] SEC("license") = "GPL";

0 commit comments

Comments
 (0)