Skip to content

Commit 56667da

Browse files
edumazetdavem330
authored andcommitted
net: implement lockless setsockopt(SO_PEEK_OFF)
syzbot reported a lockdep violation [1] involving af_unix support of SO_PEEK_OFF. Since SO_PEEK_OFF is inherently not thread safe (it uses a per-socket sk_peek_off field), there is really no point to enforce a pointless thread safety in the kernel. After this patch : - setsockopt(SO_PEEK_OFF) no longer acquires the socket lock. - skb_consume_udp() no longer has to acquire the socket lock. - af_unix no longer needs a special version of sk_set_peek_off(), because it does not lock u->iolock anymore. As a followup, we could replace prot->set_peek_off to be a boolean and avoid an indirect call, since we always use sk_set_peek_off(). [1] WARNING: possible circular locking dependency detected 6.8.0-rc4-syzkaller-00267-g0f1dd5e91e2b #0 Not tainted syz-executor.2/30025 is trying to acquire lock: ffff8880765e7d80 (&u->iolock){+.+.}-{3:3}, at: unix_set_peek_off+0x26/0xa0 net/unix/af_unix.c:789 but task is already holding lock: ffff8880765e7930 (sk_lock-AF_UNIX){+.+.}-{0:0}, at: lock_sock include/net/sock.h:1691 [inline] ffff8880765e7930 (sk_lock-AF_UNIX){+.+.}-{0:0}, at: sockopt_lock_sock net/core/sock.c:1060 [inline] ffff8880765e7930 (sk_lock-AF_UNIX){+.+.}-{0:0}, at: sk_setsockopt+0xe52/0x3360 net/core/sock.c:1193 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (sk_lock-AF_UNIX){+.+.}-{0:0}: lock_acquire+0x1e3/0x530 kernel/locking/lockdep.c:5754 lock_sock_nested+0x48/0x100 net/core/sock.c:3524 lock_sock include/net/sock.h:1691 [inline] __unix_dgram_recvmsg+0x1275/0x12c0 net/unix/af_unix.c:2415 sock_recvmsg_nosec+0x18e/0x1d0 net/socket.c:1046 ____sys_recvmsg+0x3c0/0x470 net/socket.c:2801 ___sys_recvmsg net/socket.c:2845 [inline] do_recvmmsg+0x474/0xae0 net/socket.c:2939 __sys_recvmmsg net/socket.c:3018 [inline] __do_sys_recvmmsg net/socket.c:3041 [inline] __se_sys_recvmmsg net/socket.c:3034 [inline] __x64_sys_recvmmsg+0x199/0x250 net/socket.c:3034 do_syscall_64+0xf9/0x240 entry_SYSCALL_64_after_hwframe+0x6f/0x77 -> #0 (&u->iolock){+.+.}-{3:3}: check_prev_add kernel/locking/lockdep.c:3134 [inline] check_prevs_add kernel/locking/lockdep.c:3253 [inline] validate_chain+0x18ca/0x58e0 kernel/locking/lockdep.c:3869 __lock_acquire+0x1345/0x1fd0 kernel/locking/lockdep.c:5137 lock_acquire+0x1e3/0x530 kernel/locking/lockdep.c:5754 __mutex_lock_common kernel/locking/mutex.c:608 [inline] __mutex_lock+0x136/0xd70 kernel/locking/mutex.c:752 unix_set_peek_off+0x26/0xa0 net/unix/af_unix.c:789 sk_setsockopt+0x207e/0x3360 do_sock_setsockopt+0x2fb/0x720 net/socket.c:2307 __sys_setsockopt+0x1ad/0x250 net/socket.c:2334 __do_sys_setsockopt net/socket.c:2343 [inline] __se_sys_setsockopt net/socket.c:2340 [inline] __x64_sys_setsockopt+0xb5/0xd0 net/socket.c:2340 do_syscall_64+0xf9/0x240 entry_SYSCALL_64_after_hwframe+0x6f/0x77 other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(sk_lock-AF_UNIX); lock(&u->iolock); lock(sk_lock-AF_UNIX); lock(&u->iolock); *** DEADLOCK *** 1 lock held by syz-executor.2/30025: #0: ffff8880765e7930 (sk_lock-AF_UNIX){+.+.}-{0:0}, at: lock_sock include/net/sock.h:1691 [inline] #0: ffff8880765e7930 (sk_lock-AF_UNIX){+.+.}-{0:0}, at: sockopt_lock_sock net/core/sock.c:1060 [inline] #0: ffff8880765e7930 (sk_lock-AF_UNIX){+.+.}-{0:0}, at: sk_setsockopt+0xe52/0x3360 net/core/sock.c:1193 stack backtrace: CPU: 0 PID: 30025 Comm: syz-executor.2 Not tainted 6.8.0-rc4-syzkaller-00267-g0f1dd5e91e2b #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/25/2024 Call Trace: <TASK> __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x1e7/0x2e0 lib/dump_stack.c:106 check_noncircular+0x36a/0x4a0 kernel/locking/lockdep.c:2187 check_prev_add kernel/locking/lockdep.c:3134 [inline] check_prevs_add kernel/locking/lockdep.c:3253 [inline] validate_chain+0x18ca/0x58e0 kernel/locking/lockdep.c:3869 __lock_acquire+0x1345/0x1fd0 kernel/locking/lockdep.c:5137 lock_acquire+0x1e3/0x530 kernel/locking/lockdep.c:5754 __mutex_lock_common kernel/locking/mutex.c:608 [inline] __mutex_lock+0x136/0xd70 kernel/locking/mutex.c:752 unix_set_peek_off+0x26/0xa0 net/unix/af_unix.c:789 sk_setsockopt+0x207e/0x3360 do_sock_setsockopt+0x2fb/0x720 net/socket.c:2307 __sys_setsockopt+0x1ad/0x250 net/socket.c:2334 __do_sys_setsockopt net/socket.c:2343 [inline] __se_sys_setsockopt net/socket.c:2340 [inline] __x64_sys_setsockopt+0xb5/0xd0 net/socket.c:2340 do_syscall_64+0xf9/0x240 entry_SYSCALL_64_after_hwframe+0x6f/0x77 RIP: 0033:0x7f78a1c7dda9 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 e1 20 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b0 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f78a0fde0c8 EFLAGS: 00000246 ORIG_RAX: 0000000000000036 RAX: ffffffffffffffda RBX: 00007f78a1dac050 RCX: 00007f78a1c7dda9 RDX: 000000000000002a RSI: 0000000000000001 RDI: 0000000000000006 RBP: 00007f78a1cca47a R08: 0000000000000004 R09: 0000000000000000 R10: 0000000020000180 R11: 0000000000000246 R12: 0000000000000000 R13: 000000000000006e R14: 00007f78a1dac050 R15: 00007ffe5cd81ae8 Fixes: 859051d ("bpf: Implement cgroup sockaddr hooks for unix sockets") Signed-off-by: Eric Dumazet <edumazet@google.com> Cc: Willem de Bruijn <willemdebruijn.kernel@gmail.com> Cc: Daan De Meyer <daan.j.demeyer@gmail.com> Cc: Kuniyuki Iwashima <kuniyu@amazon.com> Cc: Martin KaFai Lau <martin.lau@kernel.org> Cc: David Ahern <dsahern@kernel.org> Reviewed-by: Willem de Bruijn <willemb@google.com> Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com> Signed-off-by: David S. Miller <davem@davemloft.net>
1 parent 3b1ae9b commit 56667da

3 files changed

Lines changed: 15 additions & 34 deletions

File tree

net/core/sock.c

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1188,6 +1188,17 @@ int sk_setsockopt(struct sock *sk, int level, int optname,
11881188
*/
11891189
WRITE_ONCE(sk->sk_txrehash, (u8)val);
11901190
return 0;
1191+
case SO_PEEK_OFF:
1192+
{
1193+
int (*set_peek_off)(struct sock *sk, int val);
1194+
1195+
set_peek_off = READ_ONCE(sock->ops)->set_peek_off;
1196+
if (set_peek_off)
1197+
ret = set_peek_off(sk, val);
1198+
else
1199+
ret = -EOPNOTSUPP;
1200+
return ret;
1201+
}
11911202
}
11921203

11931204
sockopt_lock_sock(sk);
@@ -1430,18 +1441,6 @@ int sk_setsockopt(struct sock *sk, int level, int optname,
14301441
sock_valbool_flag(sk, SOCK_WIFI_STATUS, valbool);
14311442
break;
14321443

1433-
case SO_PEEK_OFF:
1434-
{
1435-
int (*set_peek_off)(struct sock *sk, int val);
1436-
1437-
set_peek_off = READ_ONCE(sock->ops)->set_peek_off;
1438-
if (set_peek_off)
1439-
ret = set_peek_off(sk, val);
1440-
else
1441-
ret = -EOPNOTSUPP;
1442-
break;
1443-
}
1444-
14451444
case SO_NOFCS:
14461445
sock_valbool_flag(sk, SOCK_NOFCS, valbool);
14471446
break;

net/ipv4/udp.c

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1589,12 +1589,7 @@ int udp_init_sock(struct sock *sk)
15891589

15901590
void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len)
15911591
{
1592-
if (unlikely(READ_ONCE(sk->sk_peek_off) >= 0)) {
1593-
bool slow = lock_sock_fast(sk);
1594-
1595-
sk_peek_offset_bwd(sk, len);
1596-
unlock_sock_fast(sk, slow);
1597-
}
1592+
sk_peek_offset_bwd(sk, len);
15981593

15991594
if (!skb_unref(skb))
16001595
return;

net/unix/af_unix.c

Lines changed: 3 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -782,19 +782,6 @@ static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
782782
static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
783783
int);
784784

785-
static int unix_set_peek_off(struct sock *sk, int val)
786-
{
787-
struct unix_sock *u = unix_sk(sk);
788-
789-
if (mutex_lock_interruptible(&u->iolock))
790-
return -EINTR;
791-
792-
WRITE_ONCE(sk->sk_peek_off, val);
793-
mutex_unlock(&u->iolock);
794-
795-
return 0;
796-
}
797-
798785
#ifdef CONFIG_PROC_FS
799786
static int unix_count_nr_fds(struct sock *sk)
800787
{
@@ -862,7 +849,7 @@ static const struct proto_ops unix_stream_ops = {
862849
.read_skb = unix_stream_read_skb,
863850
.mmap = sock_no_mmap,
864851
.splice_read = unix_stream_splice_read,
865-
.set_peek_off = unix_set_peek_off,
852+
.set_peek_off = sk_set_peek_off,
866853
.show_fdinfo = unix_show_fdinfo,
867854
};
868855

@@ -886,7 +873,7 @@ static const struct proto_ops unix_dgram_ops = {
886873
.read_skb = unix_read_skb,
887874
.recvmsg = unix_dgram_recvmsg,
888875
.mmap = sock_no_mmap,
889-
.set_peek_off = unix_set_peek_off,
876+
.set_peek_off = sk_set_peek_off,
890877
.show_fdinfo = unix_show_fdinfo,
891878
};
892879

@@ -909,7 +896,7 @@ static const struct proto_ops unix_seqpacket_ops = {
909896
.sendmsg = unix_seqpacket_sendmsg,
910897
.recvmsg = unix_seqpacket_recvmsg,
911898
.mmap = sock_no_mmap,
912-
.set_peek_off = unix_set_peek_off,
899+
.set_peek_off = sk_set_peek_off,
913900
.show_fdinfo = unix_show_fdinfo,
914901
};
915902

0 commit comments

Comments
 (0)