forked from Minki/linux
Merge branch 'sockmap fixes picked up by stress tests'
John Fastabend says:
====================
Running stress tests with recent patch to remove an extra lock in sockmap
resulted in a couple new issues popping up. It seems only one of them
is actually related to the patch:
799aa7f98d
("skmsg: Avoid lock_sock() in sk_psock_backlog()")
The other two issues had existed long before, but I guess the timing
with the serialization we had before was too tight to get any of
our tests or deployments to hit it.
With attached series stress testing sockmap+TCP with workloads that
create lots of short-lived connections no more splats like below were
seen on upstream bpf branch.
[224913.935822] WARNING: CPU: 3 PID: 32100 at net/core/stream.c:208 sk_stream_kill_queues+0x212/0x220
[224913.935841] Modules linked in: fuse overlay bpf_preload x86_pkg_temp_thermal intel_uncore wmi_bmof squashfs sch_fq_codel efivarfs ip_tables x_tables uas xhci_pci ixgbe mdio xfrm_algo xhci_hcd wmi
[224913.935897] CPU: 3 PID: 32100 Comm: fgs-bench Tainted: G I 5.14.0-rc1alu+ #181
[224913.935908] Hardware name: Dell Inc. Precision 5820 Tower/002KVM, BIOS 1.9.2 01/24/2019
[224913.935914] RIP: 0010:sk_stream_kill_queues+0x212/0x220
[224913.935923] Code: 8b 83 20 02 00 00 85 c0 75 20 5b 5d 41 5c 41 5d 41 5e 41 5f c3 48 89 df e8 2b 11 fe ff eb c3 0f 0b e9 7c ff ff ff 0f 0b eb ce <0f> 0b 5b 5d 41 5c 41 5d 41 5e 41 5f c3 90 0f 1f 44 00 00 41 57 41
[224913.935932] RSP: 0018:ffff88816271fd38 EFLAGS: 00010206
[224913.935941] RAX: 0000000000000ae8 RBX: ffff88815acd5240 RCX: dffffc0000000000
[224913.935948] RDX: 0000000000000003 RSI: 0000000000000ae8 RDI: ffff88815acd5460
[224913.935954] RBP: ffff88815acd5460 R08: ffffffff955c0ae8 R09: fffffbfff2e6f543
[224913.935961] R10: ffffffff9737aa17 R11: fffffbfff2e6f542 R12: ffff88815acd5390
[224913.935967] R13: ffff88815acd5480 R14: ffffffff98d0c080 R15: ffffffff96267500
[224913.935974] FS: 00007f86e6bd1700(0000) GS:ffff888451cc0000(0000) knlGS:0000000000000000
[224913.935981] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[224913.935988] CR2: 000000c0008eb000 CR3: 00000001020e0005 CR4: 00000000003706e0
[224913.935994] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[224913.936000] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[224913.936007] Call Trace:
[224913.936016] inet_csk_destroy_sock+0xba/0x1f0
[224913.936033] __tcp_close+0x620/0x790
[224913.936047] tcp_close+0x20/0x80
[224913.936056] inet_release+0x8f/0xf0
[224913.936070] __sock_release+0x72/0x120
v3: make sock_drop inline in skmsg.h
v2: init skb to null and fix a space/tab issue. Added Jakub's acks.
====================
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
This commit is contained in:
commit
f1fdee33f5
@ -285,11 +285,45 @@ static inline struct sk_psock *sk_psock(const struct sock *sk)
|
||||
return rcu_dereference_sk_user_data(sk);
|
||||
}
|
||||
|
||||
static inline void sk_psock_set_state(struct sk_psock *psock,
|
||||
enum sk_psock_state_bits bit)
|
||||
{
|
||||
set_bit(bit, &psock->state);
|
||||
}
|
||||
|
||||
static inline void sk_psock_clear_state(struct sk_psock *psock,
|
||||
enum sk_psock_state_bits bit)
|
||||
{
|
||||
clear_bit(bit, &psock->state);
|
||||
}
|
||||
|
||||
static inline bool sk_psock_test_state(const struct sk_psock *psock,
|
||||
enum sk_psock_state_bits bit)
|
||||
{
|
||||
return test_bit(bit, &psock->state);
|
||||
}
|
||||
|
||||
static inline void sock_drop(struct sock *sk, struct sk_buff *skb)
|
||||
{
|
||||
sk_drops_add(sk, skb);
|
||||
kfree_skb(skb);
|
||||
}
|
||||
|
||||
static inline void drop_sk_msg(struct sk_psock *psock, struct sk_msg *msg)
|
||||
{
|
||||
if (msg->skb)
|
||||
sock_drop(psock->sk, msg->skb);
|
||||
kfree(msg);
|
||||
}
|
||||
|
||||
static inline void sk_psock_queue_msg(struct sk_psock *psock,
|
||||
struct sk_msg *msg)
|
||||
{
|
||||
spin_lock_bh(&psock->ingress_lock);
|
||||
list_add_tail(&msg->list, &psock->ingress_msg);
|
||||
if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED))
|
||||
list_add_tail(&msg->list, &psock->ingress_msg);
|
||||
else
|
||||
drop_sk_msg(psock, msg);
|
||||
spin_unlock_bh(&psock->ingress_lock);
|
||||
}
|
||||
|
||||
@ -406,24 +440,6 @@ static inline void sk_psock_restore_proto(struct sock *sk,
|
||||
psock->psock_update_sk_prot(sk, psock, true);
|
||||
}
|
||||
|
||||
static inline void sk_psock_set_state(struct sk_psock *psock,
|
||||
enum sk_psock_state_bits bit)
|
||||
{
|
||||
set_bit(bit, &psock->state);
|
||||
}
|
||||
|
||||
static inline void sk_psock_clear_state(struct sk_psock *psock,
|
||||
enum sk_psock_state_bits bit)
|
||||
{
|
||||
clear_bit(bit, &psock->state);
|
||||
}
|
||||
|
||||
static inline bool sk_psock_test_state(const struct sk_psock *psock,
|
||||
enum sk_psock_state_bits bit)
|
||||
{
|
||||
return test_bit(bit, &psock->state);
|
||||
}
|
||||
|
||||
static inline struct sk_psock *sk_psock_get(struct sock *sk)
|
||||
{
|
||||
struct sk_psock *psock;
|
||||
|
@ -584,29 +584,42 @@ static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,
|
||||
return sk_psock_skb_ingress(psock, skb);
|
||||
}
|
||||
|
||||
static void sock_drop(struct sock *sk, struct sk_buff *skb)
|
||||
static void sk_psock_skb_state(struct sk_psock *psock,
|
||||
struct sk_psock_work_state *state,
|
||||
struct sk_buff *skb,
|
||||
int len, int off)
|
||||
{
|
||||
sk_drops_add(sk, skb);
|
||||
kfree_skb(skb);
|
||||
spin_lock_bh(&psock->ingress_lock);
|
||||
if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) {
|
||||
state->skb = skb;
|
||||
state->len = len;
|
||||
state->off = off;
|
||||
} else {
|
||||
sock_drop(psock->sk, skb);
|
||||
}
|
||||
spin_unlock_bh(&psock->ingress_lock);
|
||||
}
|
||||
|
||||
static void sk_psock_backlog(struct work_struct *work)
|
||||
{
|
||||
struct sk_psock *psock = container_of(work, struct sk_psock, work);
|
||||
struct sk_psock_work_state *state = &psock->work_state;
|
||||
struct sk_buff *skb;
|
||||
struct sk_buff *skb = NULL;
|
||||
bool ingress;
|
||||
u32 len, off;
|
||||
int ret;
|
||||
|
||||
mutex_lock(&psock->work_mutex);
|
||||
if (state->skb) {
|
||||
if (unlikely(state->skb)) {
|
||||
spin_lock_bh(&psock->ingress_lock);
|
||||
skb = state->skb;
|
||||
len = state->len;
|
||||
off = state->off;
|
||||
state->skb = NULL;
|
||||
goto start;
|
||||
spin_unlock_bh(&psock->ingress_lock);
|
||||
}
|
||||
if (skb)
|
||||
goto start;
|
||||
|
||||
while ((skb = skb_dequeue(&psock->ingress_skb))) {
|
||||
len = skb->len;
|
||||
@ -621,9 +634,8 @@ start:
|
||||
len, ingress);
|
||||
if (ret <= 0) {
|
||||
if (ret == -EAGAIN) {
|
||||
state->skb = skb;
|
||||
state->len = len;
|
||||
state->off = off;
|
||||
sk_psock_skb_state(psock, state, skb,
|
||||
len, off);
|
||||
goto end;
|
||||
}
|
||||
/* Hard errors break pipe and stop xmit. */
|
||||
@ -722,6 +734,11 @@ static void __sk_psock_zap_ingress(struct sk_psock *psock)
|
||||
skb_bpf_redirect_clear(skb);
|
||||
sock_drop(psock->sk, skb);
|
||||
}
|
||||
kfree_skb(psock->work_state.skb);
|
||||
/* We null the skb here to ensure that calls to sk_psock_backlog
|
||||
* do not pick up the free'd skb.
|
||||
*/
|
||||
psock->work_state.skb = NULL;
|
||||
__sk_psock_purge_ingress_msg(psock);
|
||||
}
|
||||
|
||||
@ -773,8 +790,6 @@ static void sk_psock_destroy(struct work_struct *work)
|
||||
|
||||
void sk_psock_drop(struct sock *sk, struct sk_psock *psock)
|
||||
{
|
||||
sk_psock_stop(psock, false);
|
||||
|
||||
write_lock_bh(&sk->sk_callback_lock);
|
||||
sk_psock_restore_proto(sk, psock);
|
||||
rcu_assign_sk_user_data(sk, NULL);
|
||||
@ -784,6 +799,8 @@ void sk_psock_drop(struct sock *sk, struct sk_psock *psock)
|
||||
sk_psock_stop_verdict(sk, psock);
|
||||
write_unlock_bh(&sk->sk_callback_lock);
|
||||
|
||||
sk_psock_stop(psock, false);
|
||||
|
||||
INIT_RCU_WORK(&psock->rwork, sk_psock_destroy);
|
||||
queue_rcu_work(system_wq, &psock->rwork);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user