Skip to content

Commit a08acd1

Browse files
committed
Merge branch 'bpf-send-sig'
Yonghong Song says: ==================== This patch tries to solve the following specific use case. Currently, bpf program can already collect stack traces through kernel function get_perf_callchain() when certain events happens (e.g., cache miss counter or cpu clock counter overflows). But such stack traces are not enough for jitted programs, e.g., hhvm (jited php). To get real stack trace, jit engine internal data structures need to be traversed in order to get the real user functions. bpf program itself may not be the best place to traverse the jit engine as the traversing logic could be complex and it is not a stable interface either. Instead, hhvm implements a signal handler, e.g. for SIGALARM, and a set of program locations which it can dump stack traces. When it receives a signal, it will dump the stack in next such program location. This patch implements bpf_send_signal() helper to send a signal to hhvm in real time, resulting in intended stack traces. Patch Freescale#1 implemented the bpf_send_helper() in the kernel. Patch Freescale#2 synced uapi header bpf.h to tools directory. Patch Freescale#3 added a self test which covers tracepoint and perf_event bpf programs. Changelogs: v4 => v5: . pass the "current" task struct to irq_work as well since the current task struct may change between nmi and subsequent irq_work_interrupt. Discovered by Daniel. v3 => v4: . fix one typo and declare "const char *id_path = ..." to avoid directly use the long string in the func body in Patch Freescale#3. v2 => v3: . change the standalone test to be part of prog_tests. RFC v1 => v2: . previous version allows to send signal to an arbitrary pid. This version just sends the signal to current task to avoid unstable pid and potential races between sending signals and task state changes for the pid. ==================== Signed-off-by: Daniel Borkmann <[email protected]>
2 parents 5420f32 + 16f0efc commit a08acd1

File tree

6 files changed

+354
-2
lines changed

6 files changed

+354
-2
lines changed

include/uapi/linux/bpf.h

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2672,6 +2672,20 @@ union bpf_attr {
26722672
* 0 on success.
26732673
*
26742674
* **-ENOENT** if the bpf-local-storage cannot be found.
2675+
*
2676+
* int bpf_send_signal(u32 sig)
2677+
* Description
2678+
* Send signal *sig* to the current task.
2679+
* Return
2680+
* 0 on success or successfully queued.
2681+
*
2682+
* **-EBUSY** if work queue under nmi is full.
2683+
*
2684+
* **-EINVAL** if *sig* is invalid.
2685+
*
2686+
* **-EPERM** if no permission to send the *sig*.
2687+
*
2688+
* **-EAGAIN** if bpf program can try again.
26752689
*/
26762690
#define __BPF_FUNC_MAPPER(FN) \
26772691
FN(unspec), \
@@ -2782,7 +2796,8 @@ union bpf_attr {
27822796
FN(strtol), \
27832797
FN(strtoul), \
27842798
FN(sk_storage_get), \
2785-
FN(sk_storage_delete),
2799+
FN(sk_storage_delete), \
2800+
FN(send_signal),
27862801

27872802
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
27882803
* function eBPF program intends to call

kernel/trace/bpf_trace.c

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -567,6 +567,63 @@ static const struct bpf_func_proto bpf_probe_read_str_proto = {
567567
.arg3_type = ARG_ANYTHING,
568568
};
569569

570+
struct send_signal_irq_work {
571+
struct irq_work irq_work;
572+
struct task_struct *task;
573+
u32 sig;
574+
};
575+
576+
static DEFINE_PER_CPU(struct send_signal_irq_work, send_signal_work);
577+
578+
static void do_bpf_send_signal(struct irq_work *entry)
579+
{
580+
struct send_signal_irq_work *work;
581+
582+
work = container_of(entry, struct send_signal_irq_work, irq_work);
583+
group_send_sig_info(work->sig, SEND_SIG_PRIV, work->task, PIDTYPE_TGID);
584+
}
585+
586+
BPF_CALL_1(bpf_send_signal, u32, sig)
587+
{
588+
struct send_signal_irq_work *work = NULL;
589+
590+
/* Similar to bpf_probe_write_user, task needs to be
591+
* in a sound condition and kernel memory access be
592+
* permitted in order to send signal to the current
593+
* task.
594+
*/
595+
if (unlikely(current->flags & (PF_KTHREAD | PF_EXITING)))
596+
return -EPERM;
597+
if (unlikely(uaccess_kernel()))
598+
return -EPERM;
599+
if (unlikely(!nmi_uaccess_okay()))
600+
return -EPERM;
601+
602+
if (in_nmi()) {
603+
work = this_cpu_ptr(&send_signal_work);
604+
if (work->irq_work.flags & IRQ_WORK_BUSY)
605+
return -EBUSY;
606+
607+
/* Add the current task, which is the target of sending signal,
608+
* to the irq_work. The current task may change when queued
609+
* irq works get executed.
610+
*/
611+
work->task = current;
612+
work->sig = sig;
613+
irq_work_queue(&work->irq_work);
614+
return 0;
615+
}
616+
617+
return group_send_sig_info(sig, SEND_SIG_PRIV, current, PIDTYPE_TGID);
618+
}
619+
620+
static const struct bpf_func_proto bpf_send_signal_proto = {
621+
.func = bpf_send_signal,
622+
.gpl_only = false,
623+
.ret_type = RET_INTEGER,
624+
.arg1_type = ARG_ANYTHING,
625+
};
626+
570627
static const struct bpf_func_proto *
571628
tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
572629
{
@@ -617,6 +674,8 @@ tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
617674
case BPF_FUNC_get_current_cgroup_id:
618675
return &bpf_get_current_cgroup_id_proto;
619676
#endif
677+
case BPF_FUNC_send_signal:
678+
return &bpf_send_signal_proto;
620679
default:
621680
return NULL;
622681
}
@@ -1343,5 +1402,18 @@ static int __init bpf_event_init(void)
13431402
return 0;
13441403
}
13451404

1405+
static int __init send_signal_irq_work_init(void)
1406+
{
1407+
int cpu;
1408+
struct send_signal_irq_work *work;
1409+
1410+
for_each_possible_cpu(cpu) {
1411+
work = per_cpu_ptr(&send_signal_work, cpu);
1412+
init_irq_work(&work->irq_work, do_bpf_send_signal);
1413+
}
1414+
return 0;
1415+
}
1416+
13461417
fs_initcall(bpf_event_init);
1418+
subsys_initcall(send_signal_irq_work_init);
13471419
#endif /* CONFIG_MODULES */

tools/include/uapi/linux/bpf.h

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2672,6 +2672,20 @@ union bpf_attr {
26722672
* 0 on success.
26732673
*
26742674
* **-ENOENT** if the bpf-local-storage cannot be found.
2675+
*
2676+
* int bpf_send_signal(u32 sig)
2677+
* Description
2678+
* Send signal *sig* to the current task.
2679+
* Return
2680+
* 0 on success or successfully queued.
2681+
*
2682+
* **-EBUSY** if work queue under nmi is full.
2683+
*
2684+
* **-EINVAL** if *sig* is invalid.
2685+
*
2686+
* **-EPERM** if no permission to send the *sig*.
2687+
*
2688+
* **-EAGAIN** if bpf program can try again.
26752689
*/
26762690
#define __BPF_FUNC_MAPPER(FN) \
26772691
FN(unspec), \
@@ -2782,7 +2796,8 @@ union bpf_attr {
27822796
FN(strtol), \
27832797
FN(strtoul), \
27842798
FN(sk_storage_get), \
2785-
FN(sk_storage_delete),
2799+
FN(sk_storage_delete), \
2800+
FN(send_signal),
27862801

27872802
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
27882803
* function eBPF program intends to call

tools/testing/selftests/bpf/bpf_helpers.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,7 @@ static void *(*bpf_sk_storage_get)(void *map, struct bpf_sock *sk,
224224
(void *) BPF_FUNC_sk_storage_get;
225225
static int (*bpf_sk_storage_delete)(void *map, struct bpf_sock *sk) =
226226
(void *)BPF_FUNC_sk_storage_delete;
227+
static int (*bpf_send_signal)(unsigned sig) = (void *)BPF_FUNC_send_signal;
227228

228229
/* llvm builtin functions that eBPF C program may use to
229230
* emit BPF_LD_ABS and BPF_LD_IND instructions
Lines changed: 198 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,198 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
#include <test_progs.h>
3+
4+
static volatile int sigusr1_received = 0;
5+
6+
static void sigusr1_handler(int signum)
7+
{
8+
sigusr1_received++;
9+
}
10+
11+
static int test_send_signal_common(struct perf_event_attr *attr,
12+
int prog_type,
13+
const char *test_name)
14+
{
15+
int err = -1, pmu_fd, prog_fd, info_map_fd, status_map_fd;
16+
const char *file = "./test_send_signal_kern.o";
17+
struct bpf_object *obj = NULL;
18+
int pipe_c2p[2], pipe_p2c[2];
19+
__u32 key = 0, duration = 0;
20+
char buf[256];
21+
pid_t pid;
22+
__u64 val;
23+
24+
if (CHECK(pipe(pipe_c2p), test_name,
25+
"pipe pipe_c2p error: %s\n", strerror(errno)))
26+
goto no_fork_done;
27+
28+
if (CHECK(pipe(pipe_p2c), test_name,
29+
"pipe pipe_p2c error: %s\n", strerror(errno))) {
30+
close(pipe_c2p[0]);
31+
close(pipe_c2p[1]);
32+
goto no_fork_done;
33+
}
34+
35+
pid = fork();
36+
if (CHECK(pid < 0, test_name, "fork error: %s\n", strerror(errno))) {
37+
close(pipe_c2p[0]);
38+
close(pipe_c2p[1]);
39+
close(pipe_p2c[0]);
40+
close(pipe_p2c[1]);
41+
goto no_fork_done;
42+
}
43+
44+
if (pid == 0) {
45+
/* install signal handler and notify parent */
46+
signal(SIGUSR1, sigusr1_handler);
47+
48+
close(pipe_c2p[0]); /* close read */
49+
close(pipe_p2c[1]); /* close write */
50+
51+
/* notify parent signal handler is installed */
52+
write(pipe_c2p[1], buf, 1);
53+
54+
/* make sure parent enabled bpf program to send_signal */
55+
read(pipe_p2c[0], buf, 1);
56+
57+
/* wait a little for signal handler */
58+
sleep(1);
59+
60+
if (sigusr1_received)
61+
write(pipe_c2p[1], "2", 1);
62+
else
63+
write(pipe_c2p[1], "0", 1);
64+
65+
/* wait for parent notification and exit */
66+
read(pipe_p2c[0], buf, 1);
67+
68+
close(pipe_c2p[1]);
69+
close(pipe_p2c[0]);
70+
exit(0);
71+
}
72+
73+
close(pipe_c2p[1]); /* close write */
74+
close(pipe_p2c[0]); /* close read */
75+
76+
err = bpf_prog_load(file, prog_type, &obj, &prog_fd);
77+
if (CHECK(err < 0, test_name, "bpf_prog_load error: %s\n",
78+
strerror(errno)))
79+
goto prog_load_failure;
80+
81+
pmu_fd = syscall(__NR_perf_event_open, attr, pid, -1,
82+
-1 /* group id */, 0 /* flags */);
83+
if (CHECK(pmu_fd < 0, test_name, "perf_event_open error: %s\n",
84+
strerror(errno))) {
85+
err = -1;
86+
goto close_prog;
87+
}
88+
89+
err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
90+
if (CHECK(err < 0, test_name, "ioctl perf_event_ioc_enable error: %s\n",
91+
strerror(errno)))
92+
goto disable_pmu;
93+
94+
err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);
95+
if (CHECK(err < 0, test_name, "ioctl perf_event_ioc_set_bpf error: %s\n",
96+
strerror(errno)))
97+
goto disable_pmu;
98+
99+
err = -1;
100+
info_map_fd = bpf_object__find_map_fd_by_name(obj, "info_map");
101+
if (CHECK(info_map_fd < 0, test_name, "find map %s error\n", "info_map"))
102+
goto disable_pmu;
103+
104+
status_map_fd = bpf_object__find_map_fd_by_name(obj, "status_map");
105+
if (CHECK(status_map_fd < 0, test_name, "find map %s error\n", "status_map"))
106+
goto disable_pmu;
107+
108+
/* wait until child signal handler installed */
109+
read(pipe_c2p[0], buf, 1);
110+
111+
/* trigger the bpf send_signal */
112+
key = 0;
113+
val = (((__u64)(SIGUSR1)) << 32) | pid;
114+
bpf_map_update_elem(info_map_fd, &key, &val, 0);
115+
116+
/* notify child that bpf program can send_signal now */
117+
write(pipe_p2c[1], buf, 1);
118+
119+
/* wait for result */
120+
err = read(pipe_c2p[0], buf, 1);
121+
if (CHECK(err < 0, test_name, "reading pipe error: %s\n", strerror(errno)))
122+
goto disable_pmu;
123+
if (CHECK(err == 0, test_name, "reading pipe error: size 0\n")) {
124+
err = -1;
125+
goto disable_pmu;
126+
}
127+
128+
err = CHECK(buf[0] != '2', test_name, "incorrect result\n");
129+
130+
/* notify child safe to exit */
131+
write(pipe_p2c[1], buf, 1);
132+
133+
disable_pmu:
134+
close(pmu_fd);
135+
close_prog:
136+
bpf_object__close(obj);
137+
prog_load_failure:
138+
close(pipe_c2p[0]);
139+
close(pipe_p2c[1]);
140+
wait(NULL);
141+
no_fork_done:
142+
return err;
143+
}
144+
145+
static int test_send_signal_tracepoint(void)
146+
{
147+
const char *id_path = "/sys/kernel/debug/tracing/events/syscalls/sys_enter_nanosleep/id";
148+
struct perf_event_attr attr = {
149+
.type = PERF_TYPE_TRACEPOINT,
150+
.sample_type = PERF_SAMPLE_RAW | PERF_SAMPLE_CALLCHAIN,
151+
.sample_period = 1,
152+
.wakeup_events = 1,
153+
};
154+
__u32 duration = 0;
155+
int bytes, efd;
156+
char buf[256];
157+
158+
efd = open(id_path, O_RDONLY, 0);
159+
if (CHECK(efd < 0, "tracepoint",
160+
"open syscalls/sys_enter_nanosleep/id failure: %s\n",
161+
strerror(errno)))
162+
return -1;
163+
164+
bytes = read(efd, buf, sizeof(buf));
165+
close(efd);
166+
if (CHECK(bytes <= 0 || bytes >= sizeof(buf), "tracepoint",
167+
"read syscalls/sys_enter_nanosleep/id failure: %s\n",
168+
strerror(errno)))
169+
return -1;
170+
171+
attr.config = strtol(buf, NULL, 0);
172+
173+
return test_send_signal_common(&attr, BPF_PROG_TYPE_TRACEPOINT, "tracepoint");
174+
}
175+
176+
static int test_send_signal_nmi(void)
177+
{
178+
struct perf_event_attr attr = {
179+
.sample_freq = 50,
180+
.freq = 1,
181+
.type = PERF_TYPE_HARDWARE,
182+
.config = PERF_COUNT_HW_CPU_CYCLES,
183+
};
184+
185+
return test_send_signal_common(&attr, BPF_PROG_TYPE_PERF_EVENT, "perf_event");
186+
}
187+
188+
void test_send_signal(void)
189+
{
190+
int ret = 0;
191+
192+
ret |= test_send_signal_tracepoint();
193+
ret |= test_send_signal_nmi();
194+
if (!ret)
195+
printf("test_send_signal:OK\n");
196+
else
197+
printf("test_send_signal:FAIL\n");
198+
}

0 commit comments

Comments
 (0)