From 8b06a24bb625728ac709f2c69405eb01025687e1 Mon Sep 17 00:00:00 2001 From: Antony Antony Date: Wed, 10 Apr 2024 19:27:12 +0200 Subject: [PATCH 01/47] xfrm: fix possible derferencing in error path Fix derferencing pointer when xfrm_policy_lookup_bytype returns an error. Fixes: 63b21caba17e ("xfrm: introduce forwarding of ICMP Error messages") Reported-by: Dan Carpenter Closes: https://lore.kernel.org/kernel-janitors/f6ef0d0d-96de-4e01-9dc3-c1b3a6338653@moroto.mountain/ Signed-off-by: Antony Antony Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 6affe5cd85d8..53d8fabfa685 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -3593,6 +3593,8 @@ xfrm_policy *xfrm_in_fwd_icmp(struct sk_buff *skb, return pol; pol = xfrm_policy_lookup(net, &fl1, family, XFRM_POLICY_FWD, if_id); + if (IS_ERR(pol)) + pol = NULL; } return pol; From 58fbfecab965014b6e3cc956a76b4a96265a1add Mon Sep 17 00:00:00 2001 From: Paul Davey Date: Tue, 23 Apr 2024 18:00:24 +1200 Subject: [PATCH 02/47] xfrm: Preserve vlan tags for transport mode software GRO The software GRO path for esp transport mode uses skb_mac_header_rebuild prior to re-injecting the packet via the xfrm_napi_dev. This only copies skb->mac_len bytes of header which may not be sufficient if the packet contains 802.1Q tags or other VLAN tags. Worse copying only the initial header will leave a packet marked as being VLAN tagged but without the corresponding tag leading to mangling when it is later untagged. The VLAN tags are important when receiving the decrypted esp transport mode packet after GRO processing to ensure it is received on the correct interface. Therefore record the full mac header length in xfrm*_transport_input for later use in corresponding xfrm*_transport_finish to copy the entire mac header when rebuilding the mac header for GRO. The skb->data pointer is left pointing skb->mac_header bytes after the start of the mac header as is expected by the network stack and network and transport header offsets reset to this location. Fixes: 7785bba299a8 ("esp: Add a software GRO codepath") Signed-off-by: Paul Davey Signed-off-by: Steffen Klassert --- include/linux/skbuff.h | 15 +++++++++++++++ include/net/xfrm.h | 3 +++ net/ipv4/xfrm4_input.c | 6 +++++- net/ipv6/xfrm6_input.c | 6 +++++- net/xfrm/xfrm_input.c | 8 ++++++++ 5 files changed, 36 insertions(+), 2 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 9d24aec064e8..4ff48eda3f64 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3031,6 +3031,21 @@ static inline void skb_mac_header_rebuild(struct sk_buff *skb) } } +/* Move the full mac header up to current network_header. + * Leaves skb->data pointing at offset skb->mac_len into the mac_header. + * Must be provided the complete mac header length. + */ +static inline void skb_mac_header_rebuild_full(struct sk_buff *skb, u32 full_mac_len) +{ + if (skb_mac_header_was_set(skb)) { + const unsigned char *old_mac = skb_mac_header(skb); + + skb_set_mac_header(skb, -full_mac_len); + memmove(skb_mac_header(skb), old_mac, full_mac_len); + __skb_push(skb, full_mac_len - skb->mac_len); + } +} + static inline int skb_checksum_start_offset(const struct sk_buff *skb) { return skb->csum_start - skb_headroom(skb); diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 57c743b7e4fe..cb4841a9fffd 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1049,6 +1049,9 @@ struct xfrm_offload { #define CRYPTO_INVALID_PACKET_SYNTAX 64 #define CRYPTO_INVALID_PROTOCOL 128 + /* Used to keep whole l2 header for transport mode GRO */ + __u32 orig_mac_len; + __u8 proto; __u8 inner_ipproto; }; diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index dae35101d189..86382e08140e 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -63,7 +63,11 @@ int xfrm4_transport_finish(struct sk_buff *skb, int async) ip_send_check(iph); if (xo && (xo->flags & XFRM_GRO)) { - skb_mac_header_rebuild(skb); + /* The full l2 header needs to be preserved so that re-injecting the packet at l2 + * works correctly in the presence of vlan tags. + */ + skb_mac_header_rebuild_full(skb, xo->orig_mac_len); + skb_reset_network_header(skb); skb_reset_transport_header(skb); return 0; } diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index a17d783dc7c0..c6b8e132e10a 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -58,7 +58,11 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async) skb_postpush_rcsum(skb, skb_network_header(skb), nhlen); if (xo && (xo->flags & XFRM_GRO)) { - skb_mac_header_rebuild(skb); + /* The full l2 header needs to be preserved so that re-injecting the packet at l2 + * works correctly in the presence of vlan tags. + */ + skb_mac_header_rebuild_full(skb, xo->orig_mac_len); + skb_reset_network_header(skb); skb_reset_transport_header(skb); return 0; } diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 161f535c8b94..3a2982a72a6b 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -389,11 +389,15 @@ static int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb) */ static int xfrm4_transport_input(struct xfrm_state *x, struct sk_buff *skb) { + struct xfrm_offload *xo = xfrm_offload(skb); int ihl = skb->data - skb_transport_header(skb); if (skb->transport_header != skb->network_header) { memmove(skb_transport_header(skb), skb_network_header(skb), ihl); + if (xo) + xo->orig_mac_len = + skb_mac_header_was_set(skb) ? skb_mac_header_len(skb) : 0; skb->network_header = skb->transport_header; } ip_hdr(skb)->tot_len = htons(skb->len + ihl); @@ -404,11 +408,15 @@ static int xfrm4_transport_input(struct xfrm_state *x, struct sk_buff *skb) static int xfrm6_transport_input(struct xfrm_state *x, struct sk_buff *skb) { #if IS_ENABLED(CONFIG_IPV6) + struct xfrm_offload *xo = xfrm_offload(skb); int ihl = skb->data - skb_transport_header(skb); if (skb->transport_header != skb->network_header) { memmove(skb_transport_header(skb), skb_network_header(skb), ihl); + if (xo) + xo->orig_mac_len = + skb_mac_header_was_set(skb) ? skb_mac_header_len(skb) : 0; skb->network_header = skb->transport_header; } ipv6_hdr(skb)->payload_len = htons(skb->len + ihl - From b6d2e438e16c7d4dbde08cfb2b95b0f3f325ba40 Mon Sep 17 00:00:00 2001 From: Antony Antony Date: Fri, 26 Apr 2024 12:15:13 +0200 Subject: [PATCH 03/47] xfrm: Correct spelling mistake in xfrm.h comment A spelling error was found in the comment section of include/uapi/linux/xfrm.h. Since this header file is copied to many userspace programs and undergoes Debian spellcheck, it's preferable to fix it in upstream rather than downstream having exceptions. This commit fixes the spelling mistake. Fixes: df71837d5024 ("[LSM-IPSec]: Security association restriction.") Signed-off-by: Antony Antony Reviewed-by: Jiri Pirko Signed-off-by: Steffen Klassert --- include/uapi/linux/xfrm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h index 6a77328be114..594b66e16395 100644 --- a/include/uapi/linux/xfrm.h +++ b/include/uapi/linux/xfrm.h @@ -228,7 +228,7 @@ enum { #define XFRM_NR_MSGTYPES (XFRM_MSG_MAX + 1 - XFRM_MSG_BASE) /* - * Generic LSM security context for comunicating to user space + * Generic LSM security context for communicating to user space * NOTE: Same format as sadb_x_sec_ctx */ struct xfrm_user_sec_ctx { From 94062790aedb505bdda209b10bea47b294d6394f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 1 May 2024 12:54:48 +0000 Subject: [PATCH 04/47] tcp: defer shutdown(SEND_SHUTDOWN) for TCP_SYN_RECV sockets TCP_SYN_RECV state is really special, it is only used by cross-syn connections, mostly used by fuzzers. In the following crash [1], syzbot managed to trigger a divide by zero in tcp_rcv_space_adjust() A socket makes the following state transitions, without ever calling tcp_init_transfer(), meaning tcp_init_buffer_space() is also not called. TCP_CLOSE connect() TCP_SYN_SENT TCP_SYN_RECV shutdown() -> tcp_shutdown(sk, SEND_SHUTDOWN) TCP_FIN_WAIT1 To fix this issue, change tcp_shutdown() to not perform a TCP_SYN_RECV -> TCP_FIN_WAIT1 transition, which makes no sense anyway. When tcp_rcv_state_process() later changes socket state from TCP_SYN_RECV to TCP_ESTABLISH, then look at sk->sk_shutdown to finally enter TCP_FIN_WAIT1 state, and send a FIN packet from a sane socket state. This means tcp_send_fin() can now be called from BH context, and must use GFP_ATOMIC allocations. [1] divide error: 0000 [#1] PREEMPT SMP KASAN NOPTI CPU: 1 PID: 5084 Comm: syz-executor358 Not tainted 6.9.0-rc6-syzkaller-00022-g98369dccd2f8 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 03/27/2024 RIP: 0010:tcp_rcv_space_adjust+0x2df/0x890 net/ipv4/tcp_input.c:767 Code: e3 04 4c 01 eb 48 8b 44 24 38 0f b6 04 10 84 c0 49 89 d5 0f 85 a5 03 00 00 41 8b 8e c8 09 00 00 89 e8 29 c8 48 0f af c3 31 d2 <48> f7 f1 48 8d 1c 43 49 8d 96 76 08 00 00 48 89 d0 48 c1 e8 03 48 RSP: 0018:ffffc900031ef3f0 EFLAGS: 00010246 RAX: 0c677a10441f8f42 RBX: 000000004fb95e7e RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000027d4b11f R08: ffffffff89e535a4 R09: 1ffffffff25e6ab7 R10: dffffc0000000000 R11: ffffffff8135e920 R12: ffff88802a9f8d30 R13: dffffc0000000000 R14: ffff88802a9f8d00 R15: 1ffff1100553f2da FS: 00005555775c0380(0000) GS:ffff8880b9500000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f1155bf2304 CR3: 000000002b9f2000 CR4: 0000000000350ef0 Call Trace: tcp_recvmsg_locked+0x106d/0x25a0 net/ipv4/tcp.c:2513 tcp_recvmsg+0x25d/0x920 net/ipv4/tcp.c:2578 inet6_recvmsg+0x16a/0x730 net/ipv6/af_inet6.c:680 sock_recvmsg_nosec net/socket.c:1046 [inline] sock_recvmsg+0x109/0x280 net/socket.c:1068 ____sys_recvmsg+0x1db/0x470 net/socket.c:2803 ___sys_recvmsg net/socket.c:2845 [inline] do_recvmmsg+0x474/0xae0 net/socket.c:2939 __sys_recvmmsg net/socket.c:3018 [inline] __do_sys_recvmmsg net/socket.c:3041 [inline] __se_sys_recvmmsg net/socket.c:3034 [inline] __x64_sys_recvmmsg+0x199/0x250 net/socket.c:3034 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf5/0x240 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7faeb6363db9 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 c1 17 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007ffcc1997168 EFLAGS: 00000246 ORIG_RAX: 000000000000012b RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007faeb6363db9 RDX: 0000000000000001 RSI: 0000000020000bc0 RDI: 0000000000000005 RBP: 0000000000000000 R08: 0000000000000000 R09: 000000000000001c R10: 0000000000000122 R11: 0000000000000246 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000001 R15: 0000000000000001 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: syzbot Signed-off-by: Eric Dumazet Acked-by: Neal Cardwell Link: https://lore.kernel.org/r/20240501125448.896529-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/tcp.c | 4 ++-- net/ipv4/tcp_input.c | 2 ++ net/ipv4/tcp_output.c | 4 +++- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index e767721b3a58..66d77faca64f 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2710,7 +2710,7 @@ void tcp_shutdown(struct sock *sk, int how) /* If we've already sent a FIN, or it's a closed state, skip this. */ if ((1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_SYN_SENT | - TCPF_SYN_RECV | TCPF_CLOSE_WAIT)) { + TCPF_CLOSE_WAIT)) { /* Clear out any half completed packets. FIN if needed. */ if (tcp_close_state(sk)) tcp_send_fin(sk); @@ -2819,7 +2819,7 @@ void __tcp_close(struct sock *sk, long timeout) * machine. State transitions: * * TCP_ESTABLISHED -> TCP_FIN_WAIT1 - * TCP_SYN_RECV -> TCP_FIN_WAIT1 (forget it, it's impossible) + * TCP_SYN_RECV -> TCP_FIN_WAIT1 (it is difficult) * TCP_CLOSE_WAIT -> TCP_LAST_ACK * * are legal only when FIN has been sent (i.e. in window), diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 5d874817a78d..a140d9f7a0a3 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6761,6 +6761,8 @@ tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) tcp_initialize_rcv_mss(sk); tcp_fast_path_on(tp); + if (sk->sk_shutdown & SEND_SHUTDOWN) + tcp_shutdown(sk, SEND_SHUTDOWN); break; case TCP_FIN_WAIT1: { diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index e3167ad96567..02caeb7bcf63 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3563,7 +3563,9 @@ void tcp_send_fin(struct sock *sk) return; } } else { - skb = alloc_skb_fclone(MAX_TCP_HEADER, sk->sk_allocation); + skb = alloc_skb_fclone(MAX_TCP_HEADER, + sk_gfp_mask(sk, GFP_ATOMIC | + __GFP_NOWARN)); if (unlikely(!skb)) return; From f2db7230f73a80dbb179deab78f88a7947f0ab7e Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 1 May 2024 14:31:45 -0700 Subject: [PATCH 05/47] tcp: Use refcount_inc_not_zero() in tcp_twsk_unique(). Anderson Nascimento reported a use-after-free splat in tcp_twsk_unique() with nice analysis. Since commit ec94c2696f0b ("tcp/dccp: avoid one atomic operation for timewait hashdance"), inet_twsk_hashdance() sets TIME-WAIT socket's sk_refcnt after putting it into ehash and releasing the bucket lock. Thus, there is a small race window where other threads could try to reuse the port during connect() and call sock_hold() in tcp_twsk_unique() for the TIME-WAIT socket with zero refcnt. If that happens, the refcnt taken by tcp_twsk_unique() is overwritten and sock_put() will cause underflow, triggering a real use-after-free somewhere else. To avoid the use-after-free, we need to use refcount_inc_not_zero() in tcp_twsk_unique() and give up on reusing the port if it returns false. [0]: refcount_t: addition on 0; use-after-free. WARNING: CPU: 0 PID: 1039313 at lib/refcount.c:25 refcount_warn_saturate+0xe5/0x110 CPU: 0 PID: 1039313 Comm: trigger Not tainted 6.8.6-200.fc39.x86_64 #1 Hardware name: VMware, Inc. VMware20,1/440BX Desktop Reference Platform, BIOS VMW201.00V.21805430.B64.2305221830 05/22/2023 RIP: 0010:refcount_warn_saturate+0xe5/0x110 Code: 42 8e ff 0f 0b c3 cc cc cc cc 80 3d aa 13 ea 01 00 0f 85 5e ff ff ff 48 c7 c7 f8 8e b7 82 c6 05 96 13 ea 01 01 e8 7b 42 8e ff <0f> 0b c3 cc cc cc cc 48 c7 c7 50 8f b7 82 c6 05 7a 13 ea 01 01 e8 RSP: 0018:ffffc90006b43b60 EFLAGS: 00010282 RAX: 0000000000000000 RBX: ffff888009bb3ef0 RCX: 0000000000000027 RDX: ffff88807be218c8 RSI: 0000000000000001 RDI: ffff88807be218c0 RBP: 0000000000069d70 R08: 0000000000000000 R09: ffffc90006b439f0 R10: ffffc90006b439e8 R11: 0000000000000003 R12: ffff8880029ede84 R13: 0000000000004e20 R14: ffffffff84356dc0 R15: ffff888009bb3ef0 FS: 00007f62c10926c0(0000) GS:ffff88807be00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020ccb000 CR3: 000000004628c005 CR4: 0000000000f70ef0 PKRU: 55555554 Call Trace: ? refcount_warn_saturate+0xe5/0x110 ? __warn+0x81/0x130 ? refcount_warn_saturate+0xe5/0x110 ? report_bug+0x171/0x1a0 ? refcount_warn_saturate+0xe5/0x110 ? handle_bug+0x3c/0x80 ? exc_invalid_op+0x17/0x70 ? asm_exc_invalid_op+0x1a/0x20 ? refcount_warn_saturate+0xe5/0x110 tcp_twsk_unique+0x186/0x190 __inet_check_established+0x176/0x2d0 __inet_hash_connect+0x74/0x7d0 ? __pfx___inet_check_established+0x10/0x10 tcp_v4_connect+0x278/0x530 __inet_stream_connect+0x10f/0x3d0 inet_stream_connect+0x3a/0x60 __sys_connect+0xa8/0xd0 __x64_sys_connect+0x18/0x20 do_syscall_64+0x83/0x170 entry_SYSCALL_64_after_hwframe+0x78/0x80 RIP: 0033:0x7f62c11a885d Code: ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d a3 45 0c 00 f7 d8 64 89 01 48 RSP: 002b:00007f62c1091e58 EFLAGS: 00000296 ORIG_RAX: 000000000000002a RAX: ffffffffffffffda RBX: 0000000020ccb004 RCX: 00007f62c11a885d RDX: 0000000000000010 RSI: 0000000020ccb000 RDI: 0000000000000003 RBP: 00007f62c1091e90 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000296 R12: 00007f62c10926c0 R13: ffffffffffffff88 R14: 0000000000000000 R15: 00007ffe237885b0 Fixes: ec94c2696f0b ("tcp/dccp: avoid one atomic operation for timewait hashdance") Reported-by: Anderson Nascimento Closes: https://lore.kernel.org/netdev/37a477a6-d39e-486b-9577-3463f655a6b7@allelesecurity.com/ Suggested-by: Eric Dumazet Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240501213145.62261-1-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- net/ipv4/tcp_ipv4.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a22ee5838751..e0cef75f85fb 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -154,6 +154,12 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) if (tcptw->tw_ts_recent_stamp && (!twp || (reuse && time_after32(ktime_get_seconds(), tcptw->tw_ts_recent_stamp)))) { + /* inet_twsk_hashdance() sets sk_refcnt after putting twsk + * and releasing the bucket lock. + */ + if (unlikely(!refcount_inc_not_zero(&sktw->sk_refcnt))) + return 0; + /* In case of repair and re-using TIME-WAIT sockets we still * want to be sure that it is safe as above but honor the * sequence numbers and time stamps set as part of the repair @@ -174,7 +180,7 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) tp->rx_opt.ts_recent = tcptw->tw_ts_recent; tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp; } - sock_hold(sktw); + return 1; } From 483bc08181827fc475643272ffb69c533007e546 Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Thu, 25 Apr 2024 22:23:45 +0800 Subject: [PATCH 06/47] Bluetooth: Fix use-after-free bugs caused by sco_sock_timeout When the sco connection is established and then, the sco socket is releasing, timeout_work will be scheduled to judge whether the sco disconnection is timeout. The sock will be deallocated later, but it is dereferenced again in sco_sock_timeout. As a result, the use-after-free bugs will happen. The root cause is shown below: Cleanup Thread | Worker Thread sco_sock_release | sco_sock_close | __sco_sock_close | sco_sock_set_timer | schedule_delayed_work | sco_sock_kill | (wait a time) sock_put(sk) //FREE | sco_sock_timeout | sock_hold(sk) //USE The KASAN report triggered by POC is shown below: [ 95.890016] ================================================================== [ 95.890496] BUG: KASAN: slab-use-after-free in sco_sock_timeout+0x5e/0x1c0 [ 95.890755] Write of size 4 at addr ffff88800c388080 by task kworker/0:0/7 ... [ 95.890755] Workqueue: events sco_sock_timeout [ 95.890755] Call Trace: [ 95.890755] [ 95.890755] dump_stack_lvl+0x45/0x110 [ 95.890755] print_address_description+0x78/0x390 [ 95.890755] print_report+0x11b/0x250 [ 95.890755] ? __virt_addr_valid+0xbe/0xf0 [ 95.890755] ? sco_sock_timeout+0x5e/0x1c0 [ 95.890755] kasan_report+0x139/0x170 [ 95.890755] ? update_load_avg+0xe5/0x9f0 [ 95.890755] ? sco_sock_timeout+0x5e/0x1c0 [ 95.890755] kasan_check_range+0x2c3/0x2e0 [ 95.890755] sco_sock_timeout+0x5e/0x1c0 [ 95.890755] process_one_work+0x561/0xc50 [ 95.890755] worker_thread+0xab2/0x13c0 [ 95.890755] ? pr_cont_work+0x490/0x490 [ 95.890755] kthread+0x279/0x300 [ 95.890755] ? pr_cont_work+0x490/0x490 [ 95.890755] ? kthread_blkcg+0xa0/0xa0 [ 95.890755] ret_from_fork+0x34/0x60 [ 95.890755] ? kthread_blkcg+0xa0/0xa0 [ 95.890755] ret_from_fork_asm+0x11/0x20 [ 95.890755] [ 95.890755] [ 95.890755] Allocated by task 506: [ 95.890755] kasan_save_track+0x3f/0x70 [ 95.890755] __kasan_kmalloc+0x86/0x90 [ 95.890755] __kmalloc+0x17f/0x360 [ 95.890755] sk_prot_alloc+0xe1/0x1a0 [ 95.890755] sk_alloc+0x31/0x4e0 [ 95.890755] bt_sock_alloc+0x2b/0x2a0 [ 95.890755] sco_sock_create+0xad/0x320 [ 95.890755] bt_sock_create+0x145/0x320 [ 95.890755] __sock_create+0x2e1/0x650 [ 95.890755] __sys_socket+0xd0/0x280 [ 95.890755] __x64_sys_socket+0x75/0x80 [ 95.890755] do_syscall_64+0xc4/0x1b0 [ 95.890755] entry_SYSCALL_64_after_hwframe+0x67/0x6f [ 95.890755] [ 95.890755] Freed by task 506: [ 95.890755] kasan_save_track+0x3f/0x70 [ 95.890755] kasan_save_free_info+0x40/0x50 [ 95.890755] poison_slab_object+0x118/0x180 [ 95.890755] __kasan_slab_free+0x12/0x30 [ 95.890755] kfree+0xb2/0x240 [ 95.890755] __sk_destruct+0x317/0x410 [ 95.890755] sco_sock_release+0x232/0x280 [ 95.890755] sock_close+0xb2/0x210 [ 95.890755] __fput+0x37f/0x770 [ 95.890755] task_work_run+0x1ae/0x210 [ 95.890755] get_signal+0xe17/0xf70 [ 95.890755] arch_do_signal_or_restart+0x3f/0x520 [ 95.890755] syscall_exit_to_user_mode+0x55/0x120 [ 95.890755] do_syscall_64+0xd1/0x1b0 [ 95.890755] entry_SYSCALL_64_after_hwframe+0x67/0x6f [ 95.890755] [ 95.890755] The buggy address belongs to the object at ffff88800c388000 [ 95.890755] which belongs to the cache kmalloc-1k of size 1024 [ 95.890755] The buggy address is located 128 bytes inside of [ 95.890755] freed 1024-byte region [ffff88800c388000, ffff88800c388400) [ 95.890755] [ 95.890755] The buggy address belongs to the physical page: [ 95.890755] page: refcount:1 mapcount:0 mapping:0000000000000000 index:0xffff88800c38a800 pfn:0xc388 [ 95.890755] head: order:3 entire_mapcount:0 nr_pages_mapped:0 pincount:0 [ 95.890755] anon flags: 0x100000000000840(slab|head|node=0|zone=1) [ 95.890755] page_type: 0xffffffff() [ 95.890755] raw: 0100000000000840 ffff888006842dc0 0000000000000000 0000000000000001 [ 95.890755] raw: ffff88800c38a800 000000000010000a 00000001ffffffff 0000000000000000 [ 95.890755] head: 0100000000000840 ffff888006842dc0 0000000000000000 0000000000000001 [ 95.890755] head: ffff88800c38a800 000000000010000a 00000001ffffffff 0000000000000000 [ 95.890755] head: 0100000000000003 ffffea000030e201 ffffea000030e248 00000000ffffffff [ 95.890755] head: 0000000800000000 0000000000000000 00000000ffffffff 0000000000000000 [ 95.890755] page dumped because: kasan: bad access detected [ 95.890755] [ 95.890755] Memory state around the buggy address: [ 95.890755] ffff88800c387f80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 95.890755] ffff88800c388000: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 95.890755] >ffff88800c388080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 95.890755] ^ [ 95.890755] ffff88800c388100: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 95.890755] ffff88800c388180: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 95.890755] ================================================================== Fix this problem by adding a check protected by sco_conn_lock to judget whether the conn->hcon is null. Because the conn->hcon will be set to null, when the sock is releasing. Fixes: ba316be1b6a0 ("Bluetooth: schedule SCO timeouts with delayed_work") Signed-off-by: Duoming Zhou Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/sco.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 5d03c5440b06..e0ad30862ee4 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -83,6 +83,10 @@ static void sco_sock_timeout(struct work_struct *work) struct sock *sk; sco_conn_lock(conn); + if (!conn->hcon) { + sco_conn_unlock(conn); + return; + } sk = conn->sk; if (sk) sock_hold(sk); From 66c39332d02d65e311ec89b0051130bfcd00c9ac Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 25 Apr 2024 09:55:03 +0200 Subject: [PATCH 07/47] Bluetooth: qca: fix wcn3991 device address check Qualcomm Bluetooth controllers may not have been provisioned with a valid device address and instead end up using the default address 00:00:00:00:5a:ad. This address is now used to determine if a controller has a valid address or if one needs to be provided through devicetree or by user space before the controller can be used. It turns out that the WCN3991 controllers used in Chromium Trogdor machines use a different default address, 39:98:00:00:5a:ad, which also needs to be marked as invalid so that the correct address is fetched from the devicetree. Qualcomm has unfortunately not yet provided any answers as to whether the 39:98 encodes a hardware id and if there are other variants of the default address that needs to be handled by the driver. For now, add the Trogdor WCN3991 default address to the device address check to avoid having these controllers start with the default address instead of their assigned addresses. Fixes: 32868e126c78 ("Bluetooth: qca: fix invalid device address check") Cc: stable@vger.kernel.org # 6.5 Cc: Doug Anderson Cc: Janaki Ramaiah Thota Signed-off-by: Johan Hovold Tested-by: Douglas Anderson Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btqca.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c index 216826c31ee3..cfa71708397b 100644 --- a/drivers/bluetooth/btqca.c +++ b/drivers/bluetooth/btqca.c @@ -16,6 +16,7 @@ #define VERSION "0.1" #define QCA_BDADDR_DEFAULT (&(bdaddr_t) {{ 0xad, 0x5a, 0x00, 0x00, 0x00, 0x00 }}) +#define QCA_BDADDR_WCN3991 (&(bdaddr_t) {{ 0xad, 0x5a, 0x00, 0x00, 0x98, 0x39 }}) int qca_read_soc_version(struct hci_dev *hdev, struct qca_btsoc_version *ver, enum qca_btsoc_type soc_type) @@ -638,8 +639,10 @@ static int qca_check_bdaddr(struct hci_dev *hdev) } bda = (struct hci_rp_read_bd_addr *)skb->data; - if (!bacmp(&bda->bdaddr, QCA_BDADDR_DEFAULT)) + if (!bacmp(&bda->bdaddr, QCA_BDADDR_DEFAULT) || + !bacmp(&bda->bdaddr, QCA_BDADDR_WCN3991)) { set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks); + } kfree_skb(skb); From 4d7b41c0e43995b0e992b9f8903109275744b658 Mon Sep 17 00:00:00 2001 From: Sungwoo Kim Date: Tue, 30 Apr 2024 02:32:10 -0400 Subject: [PATCH 08/47] Bluetooth: L2CAP: Fix slab-use-after-free in l2cap_connect() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extend a critical section to prevent chan from early freeing. Also make the l2cap_connect() return type void. Nothing is using the returned value but it is ugly to return a potentially freed pointer. Making it void will help with backports because earlier kernels did use the return value. Now the compile will break for kernels where this patch is not a complete fix. Call stack summary: [use] l2cap_bredr_sig_cmd l2cap_connect ┌ mutex_lock(&conn->chan_lock); │ chan = pchan->ops->new_connection(pchan); <- alloc chan │ __l2cap_chan_add(conn, chan); │ l2cap_chan_hold(chan); │ list_add(&chan->list, &conn->chan_l); ... (1) └ mutex_unlock(&conn->chan_lock); chan->conf_state ... (4) <- use after free [free] l2cap_conn_del ┌ mutex_lock(&conn->chan_lock); │ foreach chan in conn->chan_l: ... (2) │ l2cap_chan_put(chan); │ l2cap_chan_destroy │ kfree(chan) ... (3) <- chan freed └ mutex_unlock(&conn->chan_lock); ================================================================== BUG: KASAN: slab-use-after-free in instrument_atomic_read include/linux/instrumented.h:68 [inline] BUG: KASAN: slab-use-after-free in _test_bit include/asm-generic/bitops/instrumented-non-atomic.h:141 [inline] BUG: KASAN: slab-use-after-free in l2cap_connect+0xa67/0x11a0 net/bluetooth/l2cap_core.c:4260 Read of size 8 at addr ffff88810bf040a0 by task kworker/u3:1/311 Fixes: 73ffa904b782 ("Bluetooth: Move conf_{req,rsp} stuff to struct l2cap_chan") Signed-off-by: Sungwoo Kim Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/l2cap_core.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 84fc70862d78..868a370a16aa 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -3902,13 +3902,12 @@ static inline int l2cap_command_rej(struct l2cap_conn *conn, return 0; } -static struct l2cap_chan *l2cap_connect(struct l2cap_conn *conn, - struct l2cap_cmd_hdr *cmd, - u8 *data, u8 rsp_code, u8 amp_id) +static void l2cap_connect(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, + u8 *data, u8 rsp_code, u8 amp_id) { struct l2cap_conn_req *req = (struct l2cap_conn_req *) data; struct l2cap_conn_rsp rsp; - struct l2cap_chan *chan = NULL, *pchan; + struct l2cap_chan *chan = NULL, *pchan = NULL; int result, status = L2CAP_CS_NO_INFO; u16 dcid = 0, scid = __le16_to_cpu(req->scid); @@ -3921,7 +3920,7 @@ static struct l2cap_chan *l2cap_connect(struct l2cap_conn *conn, &conn->hcon->dst, ACL_LINK); if (!pchan) { result = L2CAP_CR_BAD_PSM; - goto sendresp; + goto response; } mutex_lock(&conn->chan_lock); @@ -4008,17 +4007,15 @@ static struct l2cap_chan *l2cap_connect(struct l2cap_conn *conn, } response: - l2cap_chan_unlock(pchan); - mutex_unlock(&conn->chan_lock); - l2cap_chan_put(pchan); - -sendresp: rsp.scid = cpu_to_le16(scid); rsp.dcid = cpu_to_le16(dcid); rsp.result = cpu_to_le16(result); rsp.status = cpu_to_le16(status); l2cap_send_cmd(conn, cmd->ident, rsp_code, sizeof(rsp), &rsp); + if (!pchan) + return; + if (result == L2CAP_CR_PEND && status == L2CAP_CS_NO_INFO) { struct l2cap_info_req info; info.type = cpu_to_le16(L2CAP_IT_FEAT_MASK); @@ -4041,7 +4038,9 @@ sendresp: chan->num_conf_req++; } - return chan; + l2cap_chan_unlock(pchan); + mutex_unlock(&conn->chan_lock); + l2cap_chan_put(pchan); } static int l2cap_connect_req(struct l2cap_conn *conn, From 10f9f426ac6e752c8d87bf4346930ba347aaabac Mon Sep 17 00:00:00 2001 From: Sungwoo Kim Date: Tue, 30 Apr 2024 12:20:51 -0400 Subject: [PATCH 09/47] Bluetooth: msft: fix slab-use-after-free in msft_do_close() Tying the msft->data lifetime to hdev by freeing it in hci_release_dev() to fix the following case: [use] msft_do_close() msft = hdev->msft_data; if (!msft) ...(1) <- passed. return; mutex_lock(&msft->filter_lock); ...(4) <- used after freed. [free] msft_unregister() msft = hdev->msft_data; hdev->msft_data = NULL; ...(2) kfree(msft); ...(3) <- msft is freed. ================================================================== BUG: KASAN: slab-use-after-free in __mutex_lock_common kernel/locking/mutex.c:587 [inline] BUG: KASAN: slab-use-after-free in __mutex_lock+0x8f/0xc30 kernel/locking/mutex.c:752 Read of size 8 at addr ffff888106cbbca8 by task kworker/u5:2/309 Fixes: bf6a4e30ffbd ("Bluetooth: disable advertisement filters during suspend") Signed-off-by: Sungwoo Kim Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_core.c | 3 +-- net/bluetooth/msft.c | 2 +- net/bluetooth/msft.h | 4 ++-- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index a7028d38c1f5..bc5086423ab8 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -2768,8 +2768,6 @@ void hci_unregister_dev(struct hci_dev *hdev) hci_unregister_suspend_notifier(hdev); - msft_unregister(hdev); - hci_dev_do_close(hdev); if (!test_bit(HCI_INIT, &hdev->flags) && @@ -2823,6 +2821,7 @@ void hci_release_dev(struct hci_dev *hdev) hci_discovery_filter_clear(hdev); hci_blocked_keys_clear(hdev); hci_codec_list_clear(&hdev->local_codecs); + msft_release(hdev); hci_dev_unlock(hdev); ida_destroy(&hdev->unset_handle_ida); diff --git a/net/bluetooth/msft.c b/net/bluetooth/msft.c index 9612c5d1b13f..d039683d3bdd 100644 --- a/net/bluetooth/msft.c +++ b/net/bluetooth/msft.c @@ -769,7 +769,7 @@ void msft_register(struct hci_dev *hdev) mutex_init(&msft->filter_lock); } -void msft_unregister(struct hci_dev *hdev) +void msft_release(struct hci_dev *hdev) { struct msft_data *msft = hdev->msft_data; diff --git a/net/bluetooth/msft.h b/net/bluetooth/msft.h index 2a63205b377b..fe538e9c91c0 100644 --- a/net/bluetooth/msft.h +++ b/net/bluetooth/msft.h @@ -14,7 +14,7 @@ bool msft_monitor_supported(struct hci_dev *hdev); void msft_register(struct hci_dev *hdev); -void msft_unregister(struct hci_dev *hdev); +void msft_release(struct hci_dev *hdev); void msft_do_open(struct hci_dev *hdev); void msft_do_close(struct hci_dev *hdev); void msft_vendor_evt(struct hci_dev *hdev, void *data, struct sk_buff *skb); @@ -35,7 +35,7 @@ static inline bool msft_monitor_supported(struct hci_dev *hdev) } static inline void msft_register(struct hci_dev *hdev) {} -static inline void msft_unregister(struct hci_dev *hdev) {} +static inline void msft_release(struct hci_dev *hdev) {} static inline void msft_do_open(struct hci_dev *hdev) {} static inline void msft_do_close(struct hci_dev *hdev) {} static inline void msft_vendor_evt(struct hci_dev *hdev, void *data, From 2e4edfa1e2bd821a317e7d006517dcf2f3fac68d Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 30 Apr 2024 19:07:39 +0200 Subject: [PATCH 10/47] Bluetooth: qca: add missing firmware sanity checks Add the missing sanity checks when parsing the firmware files before downloading them to avoid accessing and corrupting memory beyond the vmalloced buffer. Fixes: 83e81961ff7e ("Bluetooth: btqca: Introduce generic QCA ROME support") Cc: stable@vger.kernel.org # 4.10 Signed-off-by: Johan Hovold Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btqca.c | 38 ++++++++++++++++++++++++++++++++------ 1 file changed, 32 insertions(+), 6 deletions(-) diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c index cfa71708397b..6743b0a79d7a 100644 --- a/drivers/bluetooth/btqca.c +++ b/drivers/bluetooth/btqca.c @@ -268,9 +268,10 @@ int qca_send_pre_shutdown_cmd(struct hci_dev *hdev) } EXPORT_SYMBOL_GPL(qca_send_pre_shutdown_cmd); -static void qca_tlv_check_data(struct hci_dev *hdev, +static int qca_tlv_check_data(struct hci_dev *hdev, struct qca_fw_config *config, - u8 *fw_data, enum qca_btsoc_type soc_type) + u8 *fw_data, size_t fw_size, + enum qca_btsoc_type soc_type) { const u8 *data; u32 type_len; @@ -286,6 +287,9 @@ static void qca_tlv_check_data(struct hci_dev *hdev, switch (config->type) { case ELF_TYPE_PATCH: + if (fw_size < 7) + return -EINVAL; + config->dnld_mode = QCA_SKIP_EVT_VSE_CC; config->dnld_type = QCA_SKIP_EVT_VSE_CC; @@ -294,6 +298,9 @@ static void qca_tlv_check_data(struct hci_dev *hdev, bt_dev_dbg(hdev, "File version : 0x%x", fw_data[6]); break; case TLV_TYPE_PATCH: + if (fw_size < sizeof(struct tlv_type_hdr) + sizeof(struct tlv_type_patch)) + return -EINVAL; + tlv = (struct tlv_type_hdr *)fw_data; type_len = le32_to_cpu(tlv->type_len); tlv_patch = (struct tlv_type_patch *)tlv->data; @@ -333,6 +340,9 @@ static void qca_tlv_check_data(struct hci_dev *hdev, break; case TLV_TYPE_NVM: + if (fw_size < sizeof(struct tlv_type_hdr)) + return -EINVAL; + tlv = (struct tlv_type_hdr *)fw_data; type_len = le32_to_cpu(tlv->type_len); @@ -341,17 +351,26 @@ static void qca_tlv_check_data(struct hci_dev *hdev, BT_DBG("TLV Type\t\t : 0x%x", type_len & 0x000000ff); BT_DBG("Length\t\t : %d bytes", length); + if (fw_size < length + (tlv->data - fw_data)) + return -EINVAL; + idx = 0; data = tlv->data; - while (idx < length) { + while (idx < length - sizeof(struct tlv_type_nvm)) { tlv_nvm = (struct tlv_type_nvm *)(data + idx); tag_id = le16_to_cpu(tlv_nvm->tag_id); tag_len = le16_to_cpu(tlv_nvm->tag_len); + if (length < idx + sizeof(struct tlv_type_nvm) + tag_len) + return -EINVAL; + /* Update NVM tags as needed */ switch (tag_id) { case EDL_TAG_ID_HCI: + if (tag_len < 3) + return -EINVAL; + /* HCI transport layer parameters * enabling software inband sleep * onto controller side. @@ -367,6 +386,9 @@ static void qca_tlv_check_data(struct hci_dev *hdev, break; case EDL_TAG_ID_DEEP_SLEEP: + if (tag_len < 1) + return -EINVAL; + /* Sleep enable mask * enabling deep sleep feature on controller. */ @@ -375,14 +397,16 @@ static void qca_tlv_check_data(struct hci_dev *hdev, break; } - idx += (sizeof(u16) + sizeof(u16) + 8 + tag_len); + idx += sizeof(struct tlv_type_nvm) + tag_len; } break; default: BT_ERR("Unknown TLV type %d", config->type); - break; + return -EINVAL; } + + return 0; } static int qca_tlv_send_segment(struct hci_dev *hdev, int seg_size, @@ -532,7 +556,9 @@ static int qca_download_firmware(struct hci_dev *hdev, memcpy(data, fw->data, size); release_firmware(fw); - qca_tlv_check_data(hdev, config, data, soc_type); + ret = qca_tlv_check_data(hdev, config, data, size, soc_type); + if (ret) + return ret; segment = data; remain = size; From a112d3c72a227f2edbb6d8094472cc6e503e52af Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 30 Apr 2024 19:07:40 +0200 Subject: [PATCH 11/47] Bluetooth: qca: fix NVM configuration parsing The NVM configuration files used by WCN3988 and WCN3990/1/8 have two sets of configuration tags that are enclosed by a type-length header of type four which the current parser fails to account for. Instead the driver happily parses random data as if it were valid tags, something which can lead to the configuration data being corrupted if it ever encounters the words 0x0011 or 0x001b. As is clear from commit b63882549b2b ("Bluetooth: btqca: Fix the NVM baudrate tag offcet for wcn3991") the intention has always been to process the configuration data also for WCN3991 and WCN3998 which encodes the baud rate at a different offset. Fix the parser so that it can handle the WCN3xxx configuration files, which has an enclosing type-length header of type four and two sets of TLV tags enclosed by a type-length header of type two and three, respectively. Note that only the first set, which contains the tags the driver is currently looking for, will be parsed for now. With the parser fixed, the software in-band sleep bit will now be set for WCN3991 and WCN3998 (as it is for later controllers) and the default baud rate 3200000 may be updated by the driver also for WCN3xxx controllers. Notably the deep-sleep feature bit is already set by default in all configuration files in linux-firmware. Fixes: 4219d4686875 ("Bluetooth: btqca: Add wcn3990 firmware download support.") Cc: stable@vger.kernel.org # 4.19 Cc: Matthias Kaehlcke Signed-off-by: Johan Hovold Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btqca.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c index 6743b0a79d7a..f6c9f89a6311 100644 --- a/drivers/bluetooth/btqca.c +++ b/drivers/bluetooth/btqca.c @@ -281,6 +281,7 @@ static int qca_tlv_check_data(struct hci_dev *hdev, struct tlv_type_patch *tlv_patch; struct tlv_type_nvm *tlv_nvm; uint8_t nvm_baud_rate = config->user_baud_rate; + u8 type; config->dnld_mode = QCA_SKIP_EVT_NONE; config->dnld_type = QCA_SKIP_EVT_NONE; @@ -346,11 +347,30 @@ static int qca_tlv_check_data(struct hci_dev *hdev, tlv = (struct tlv_type_hdr *)fw_data; type_len = le32_to_cpu(tlv->type_len); - length = (type_len >> 8) & 0x00ffffff; + length = type_len >> 8; + type = type_len & 0xff; - BT_DBG("TLV Type\t\t : 0x%x", type_len & 0x000000ff); + /* Some NVM files have more than one set of tags, only parse + * the first set when it has type 2 for now. When there is + * more than one set there is an enclosing header of type 4. + */ + if (type == 4) { + if (fw_size < 2 * sizeof(struct tlv_type_hdr)) + return -EINVAL; + + tlv++; + + type_len = le32_to_cpu(tlv->type_len); + length = type_len >> 8; + type = type_len & 0xff; + } + + BT_DBG("TLV Type\t\t : 0x%x", type); BT_DBG("Length\t\t : %d bytes", length); + if (type != 2) + break; + if (fw_size < length + (tlv->data - fw_data)) return -EINVAL; From dd336649ba89789c845618dcbc09867010aec673 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 30 Apr 2024 19:07:41 +0200 Subject: [PATCH 12/47] Bluetooth: qca: generalise device address check The default device address apparently comes from the NVM configuration file and can differ quite a bit between controllers. Store the default address when parsing the configuration file and use it to determine whether the controller has been provisioned with an address. This makes sure that devices without a unique address start as unconfigured unless a valid address has been provided in the devicetree. Fixes: 32868e126c78 ("Bluetooth: qca: fix invalid device address check") Cc: stable@vger.kernel.org # 6.5 Cc: Doug Anderson Cc: Janaki Ramaiah Thota Signed-off-by: Johan Hovold Tested-by: Douglas Anderson Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btqca.c | 21 ++++++++++++--------- drivers/bluetooth/btqca.h | 2 ++ 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c index f6c9f89a6311..c6b2dd4d1716 100644 --- a/drivers/bluetooth/btqca.c +++ b/drivers/bluetooth/btqca.c @@ -15,9 +15,6 @@ #define VERSION "0.1" -#define QCA_BDADDR_DEFAULT (&(bdaddr_t) {{ 0xad, 0x5a, 0x00, 0x00, 0x00, 0x00 }}) -#define QCA_BDADDR_WCN3991 (&(bdaddr_t) {{ 0xad, 0x5a, 0x00, 0x00, 0x98, 0x39 }}) - int qca_read_soc_version(struct hci_dev *hdev, struct qca_btsoc_version *ver, enum qca_btsoc_type soc_type) { @@ -387,6 +384,14 @@ static int qca_tlv_check_data(struct hci_dev *hdev, /* Update NVM tags as needed */ switch (tag_id) { + case EDL_TAG_ID_BD_ADDR: + if (tag_len != sizeof(bdaddr_t)) + return -EINVAL; + + memcpy(&config->bdaddr, tlv_nvm->data, sizeof(bdaddr_t)); + + break; + case EDL_TAG_ID_HCI: if (tag_len < 3) return -EINVAL; @@ -661,7 +666,7 @@ int qca_set_bdaddr_rome(struct hci_dev *hdev, const bdaddr_t *bdaddr) } EXPORT_SYMBOL_GPL(qca_set_bdaddr_rome); -static int qca_check_bdaddr(struct hci_dev *hdev) +static int qca_check_bdaddr(struct hci_dev *hdev, const struct qca_fw_config *config) { struct hci_rp_read_bd_addr *bda; struct sk_buff *skb; @@ -685,10 +690,8 @@ static int qca_check_bdaddr(struct hci_dev *hdev) } bda = (struct hci_rp_read_bd_addr *)skb->data; - if (!bacmp(&bda->bdaddr, QCA_BDADDR_DEFAULT) || - !bacmp(&bda->bdaddr, QCA_BDADDR_WCN3991)) { + if (!bacmp(&bda->bdaddr, &config->bdaddr)) set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks); - } kfree_skb(skb); @@ -716,7 +719,7 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, enum qca_btsoc_type soc_type, struct qca_btsoc_version ver, const char *firmware_name) { - struct qca_fw_config config; + struct qca_fw_config config = {}; int err; u8 rom_ver = 0; u32 soc_ver; @@ -901,7 +904,7 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, break; } - err = qca_check_bdaddr(hdev); + err = qca_check_bdaddr(hdev, &config); if (err) return err; diff --git a/drivers/bluetooth/btqca.h b/drivers/bluetooth/btqca.h index dc31984f71dc..49ad668d0d0b 100644 --- a/drivers/bluetooth/btqca.h +++ b/drivers/bluetooth/btqca.h @@ -29,6 +29,7 @@ #define EDL_PATCH_CONFIG_RES_EVT (0x00) #define QCA_DISABLE_LOGGING_SUB_OP (0x14) +#define EDL_TAG_ID_BD_ADDR 2 #define EDL_TAG_ID_HCI (17) #define EDL_TAG_ID_DEEP_SLEEP (27) @@ -94,6 +95,7 @@ struct qca_fw_config { uint8_t user_baud_rate; enum qca_tlv_dnld_mode dnld_mode; enum qca_tlv_dnld_mode dnld_type; + bdaddr_t bdaddr; }; struct edl_event_hdr { From cda0d6a198e2a7ec6f176c36173a57bdd8af7af2 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 1 May 2024 14:34:52 +0200 Subject: [PATCH 13/47] Bluetooth: qca: fix info leak when fetching fw build id Add the missing sanity checks and move the 255-byte build-id buffer off the stack to avoid leaking stack data through debugfs in case the build-info reply is malformed. Fixes: c0187b0bd3e9 ("Bluetooth: btqca: Add support to read FW build version for WCN3991 BTSoC") Cc: stable@vger.kernel.org # 5.12 Signed-off-by: Johan Hovold Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btqca.c | 27 ++++++++++++++++++++++----- drivers/bluetooth/btqca.h | 1 - 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c index c6b2dd4d1716..664db524b1dd 100644 --- a/drivers/bluetooth/btqca.c +++ b/drivers/bluetooth/btqca.c @@ -99,7 +99,8 @@ static int qca_read_fw_build_info(struct hci_dev *hdev) { struct sk_buff *skb; struct edl_event_hdr *edl; - char cmd, build_label[QCA_FW_BUILD_VER_LEN]; + char *build_label; + char cmd; int build_lbl_len, err = 0; bt_dev_dbg(hdev, "QCA read fw build info"); @@ -114,6 +115,11 @@ static int qca_read_fw_build_info(struct hci_dev *hdev) return err; } + if (skb->len < sizeof(*edl)) { + err = -EILSEQ; + goto out; + } + edl = (struct edl_event_hdr *)(skb->data); if (!edl) { bt_dev_err(hdev, "QCA read fw build info with no header"); @@ -129,14 +135,25 @@ static int qca_read_fw_build_info(struct hci_dev *hdev) goto out; } - build_lbl_len = edl->data[0]; - if (build_lbl_len <= QCA_FW_BUILD_VER_LEN - 1) { - memcpy(build_label, edl->data + 1, build_lbl_len); - *(build_label + build_lbl_len) = '\0'; + if (skb->len < sizeof(*edl) + 1) { + err = -EILSEQ; + goto out; } + build_lbl_len = edl->data[0]; + + if (skb->len < sizeof(*edl) + 1 + build_lbl_len) { + err = -EILSEQ; + goto out; + } + + build_label = kstrndup(&edl->data[1], build_lbl_len, GFP_KERNEL); + if (!build_label) + goto out; + hci_set_fw_info(hdev, "%s", build_label); + kfree(build_label); out: kfree_skb(skb); return err; diff --git a/drivers/bluetooth/btqca.h b/drivers/bluetooth/btqca.h index 49ad668d0d0b..215433fd76a1 100644 --- a/drivers/bluetooth/btqca.h +++ b/drivers/bluetooth/btqca.h @@ -48,7 +48,6 @@ #define get_soc_ver(soc_id, rom_ver) \ ((le32_to_cpu(soc_id) << 16) | (le16_to_cpu(rom_ver))) -#define QCA_FW_BUILD_VER_LEN 255 #define QCA_HSP_GF_SOC_ID 0x1200 #define QCA_HSP_GF_SOC_MASK 0x0000ff00 From 0adcf6be1445ed50bfd4a451a7a782568f270197 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 1 May 2024 14:34:53 +0200 Subject: [PATCH 14/47] Bluetooth: qca: fix info leak when fetching board id Add the missing sanity check when fetching the board id to avoid leaking slab data when later requesting the firmware. Fixes: a7f8dedb4be2 ("Bluetooth: qca: add support for QCA2066") Cc: stable@vger.kernel.org # 6.7 Cc: Tim Jiang Signed-off-by: Johan Hovold Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btqca.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c index 664db524b1dd..8d8a664620a3 100644 --- a/drivers/bluetooth/btqca.c +++ b/drivers/bluetooth/btqca.c @@ -252,6 +252,11 @@ static int qca_read_fw_board_id(struct hci_dev *hdev, u16 *bid) goto out; } + if (skb->len < 3) { + err = -EILSEQ; + goto out; + } + *bid = (edl->data[1] << 8) + edl->data[2]; bt_dev_dbg(hdev, "%s: bid = %x", __func__, *bid); From cd17bcbd2b33d7c816b80640ae2fef2507576278 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Fri, 12 Apr 2024 15:30:43 +0800 Subject: [PATCH 15/47] arm64: dts: mediatek: mt8183-pico6: Fix bluetooth node Bluetooth is not a random device connected to the MMC/SD controller. It is function 2 of the SDIO device. Fix the address of the bluetooth node. Also fix the node name and drop the label. Fixes: 055ef10ccdd4 ("arm64: dts: mt8183: Add jacuzzi pico/pico6 board") Signed-off-by: Chen-Yu Tsai Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: Luiz Augusto von Dentz --- arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi-pico6.dts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi-pico6.dts b/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi-pico6.dts index a2e74b829320..6a7ae616512d 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi-pico6.dts +++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi-pico6.dts @@ -82,7 +82,8 @@ }; &mmc1 { - bt_reset: bt-reset { + bluetooth@2 { + reg = <2>; compatible = "mediatek,mt7921s-bluetooth"; pinctrl-names = "default"; pinctrl-0 = <&bt_pins_reset>; From d2706004a1b8b526592e823d7e52551b518a7941 Mon Sep 17 00:00:00 2001 From: Sungwoo Kim Date: Thu, 2 May 2024 12:09:31 -0400 Subject: [PATCH 16/47] Bluetooth: HCI: Fix potential null-ptr-deref Fix potential null-ptr-deref in hci_le_big_sync_established_evt(). Fixes: f777d8827817 (Bluetooth: ISO: Notify user space about failed bis connections) Signed-off-by: Sungwoo Kim Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_event.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 4a27e4a17a67..d72d238c1656 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -7037,6 +7037,8 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data, u16 handle = le16_to_cpu(ev->bis[i]); bis = hci_conn_hash_lookup_handle(hdev, handle); + if (!bis) + continue; set_bit(HCI_CONN_BIG_SYNC_FAILED, &bis->flags); hci_connect_cfm(bis, ev->status); From adf0398cee86643b8eacde95f17d073d022f782c Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Thu, 2 May 2024 20:57:36 +0800 Subject: [PATCH 17/47] Bluetooth: l2cap: fix null-ptr-deref in l2cap_chan_timeout There is a race condition between l2cap_chan_timeout() and l2cap_chan_del(). When we use l2cap_chan_del() to delete the channel, the chan->conn will be set to null. But the conn could be dereferenced again in the mutex_lock() of l2cap_chan_timeout(). As a result the null pointer dereference bug will happen. The KASAN report triggered by POC is shown below: [ 472.074580] ================================================================== [ 472.075284] BUG: KASAN: null-ptr-deref in mutex_lock+0x68/0xc0 [ 472.075308] Write of size 8 at addr 0000000000000158 by task kworker/0:0/7 [ 472.075308] [ 472.075308] CPU: 0 PID: 7 Comm: kworker/0:0 Not tainted 6.9.0-rc5-00356-g78c0094a146b #36 [ 472.075308] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu4 [ 472.075308] Workqueue: events l2cap_chan_timeout [ 472.075308] Call Trace: [ 472.075308] [ 472.075308] dump_stack_lvl+0x137/0x1a0 [ 472.075308] print_report+0x101/0x250 [ 472.075308] ? __virt_addr_valid+0x77/0x160 [ 472.075308] ? mutex_lock+0x68/0xc0 [ 472.075308] kasan_report+0x139/0x170 [ 472.075308] ? mutex_lock+0x68/0xc0 [ 472.075308] kasan_check_range+0x2c3/0x2e0 [ 472.075308] mutex_lock+0x68/0xc0 [ 472.075308] l2cap_chan_timeout+0x181/0x300 [ 472.075308] process_one_work+0x5d2/0xe00 [ 472.075308] worker_thread+0xe1d/0x1660 [ 472.075308] ? pr_cont_work+0x5e0/0x5e0 [ 472.075308] kthread+0x2b7/0x350 [ 472.075308] ? pr_cont_work+0x5e0/0x5e0 [ 472.075308] ? kthread_blkcg+0xd0/0xd0 [ 472.075308] ret_from_fork+0x4d/0x80 [ 472.075308] ? kthread_blkcg+0xd0/0xd0 [ 472.075308] ret_from_fork_asm+0x11/0x20 [ 472.075308] [ 472.075308] ================================================================== [ 472.094860] Disabling lock debugging due to kernel taint [ 472.096136] BUG: kernel NULL pointer dereference, address: 0000000000000158 [ 472.096136] #PF: supervisor write access in kernel mode [ 472.096136] #PF: error_code(0x0002) - not-present page [ 472.096136] PGD 0 P4D 0 [ 472.096136] Oops: 0002 [#1] PREEMPT SMP KASAN NOPTI [ 472.096136] CPU: 0 PID: 7 Comm: kworker/0:0 Tainted: G B 6.9.0-rc5-00356-g78c0094a146b #36 [ 472.096136] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu4 [ 472.096136] Workqueue: events l2cap_chan_timeout [ 472.096136] RIP: 0010:mutex_lock+0x88/0xc0 [ 472.096136] Code: be 08 00 00 00 e8 f8 23 1f fd 4c 89 f7 be 08 00 00 00 e8 eb 23 1f fd 42 80 3c 23 00 74 08 48 88 [ 472.096136] RSP: 0018:ffff88800744fc78 EFLAGS: 00000246 [ 472.096136] RAX: 0000000000000000 RBX: 1ffff11000e89f8f RCX: ffffffff8457c865 [ 472.096136] RDX: 0000000000000001 RSI: 0000000000000008 RDI: ffff88800744fc78 [ 472.096136] RBP: 0000000000000158 R08: ffff88800744fc7f R09: 1ffff11000e89f8f [ 472.096136] R10: dffffc0000000000 R11: ffffed1000e89f90 R12: dffffc0000000000 [ 472.096136] R13: 0000000000000158 R14: ffff88800744fc78 R15: ffff888007405a00 [ 472.096136] FS: 0000000000000000(0000) GS:ffff88806d200000(0000) knlGS:0000000000000000 [ 472.096136] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 472.096136] CR2: 0000000000000158 CR3: 000000000da32000 CR4: 00000000000006f0 [ 472.096136] Call Trace: [ 472.096136] [ 472.096136] ? __die_body+0x8d/0xe0 [ 472.096136] ? page_fault_oops+0x6b8/0x9a0 [ 472.096136] ? kernelmode_fixup_or_oops+0x20c/0x2a0 [ 472.096136] ? do_user_addr_fault+0x1027/0x1340 [ 472.096136] ? _printk+0x7a/0xa0 [ 472.096136] ? mutex_lock+0x68/0xc0 [ 472.096136] ? add_taint+0x42/0xd0 [ 472.096136] ? exc_page_fault+0x6a/0x1b0 [ 472.096136] ? asm_exc_page_fault+0x26/0x30 [ 472.096136] ? mutex_lock+0x75/0xc0 [ 472.096136] ? mutex_lock+0x88/0xc0 [ 472.096136] ? mutex_lock+0x75/0xc0 [ 472.096136] l2cap_chan_timeout+0x181/0x300 [ 472.096136] process_one_work+0x5d2/0xe00 [ 472.096136] worker_thread+0xe1d/0x1660 [ 472.096136] ? pr_cont_work+0x5e0/0x5e0 [ 472.096136] kthread+0x2b7/0x350 [ 472.096136] ? pr_cont_work+0x5e0/0x5e0 [ 472.096136] ? kthread_blkcg+0xd0/0xd0 [ 472.096136] ret_from_fork+0x4d/0x80 [ 472.096136] ? kthread_blkcg+0xd0/0xd0 [ 472.096136] ret_from_fork_asm+0x11/0x20 [ 472.096136] [ 472.096136] Modules linked in: [ 472.096136] CR2: 0000000000000158 [ 472.096136] ---[ end trace 0000000000000000 ]--- [ 472.096136] RIP: 0010:mutex_lock+0x88/0xc0 [ 472.096136] Code: be 08 00 00 00 e8 f8 23 1f fd 4c 89 f7 be 08 00 00 00 e8 eb 23 1f fd 42 80 3c 23 00 74 08 48 88 [ 472.096136] RSP: 0018:ffff88800744fc78 EFLAGS: 00000246 [ 472.096136] RAX: 0000000000000000 RBX: 1ffff11000e89f8f RCX: ffffffff8457c865 [ 472.096136] RDX: 0000000000000001 RSI: 0000000000000008 RDI: ffff88800744fc78 [ 472.096136] RBP: 0000000000000158 R08: ffff88800744fc7f R09: 1ffff11000e89f8f [ 472.132932] R10: dffffc0000000000 R11: ffffed1000e89f90 R12: dffffc0000000000 [ 472.132932] R13: 0000000000000158 R14: ffff88800744fc78 R15: ffff888007405a00 [ 472.132932] FS: 0000000000000000(0000) GS:ffff88806d200000(0000) knlGS:0000000000000000 [ 472.132932] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 472.132932] CR2: 0000000000000158 CR3: 000000000da32000 CR4: 00000000000006f0 [ 472.132932] Kernel panic - not syncing: Fatal exception [ 472.132932] Kernel Offset: disabled [ 472.132932] ---[ end Kernel panic - not syncing: Fatal exception ]--- Add a check to judge whether the conn is null in l2cap_chan_timeout() in order to mitigate the bug. Fixes: 3df91ea20e74 ("Bluetooth: Revert to mutexes from RCU list") Signed-off-by: Duoming Zhou Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/l2cap_core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 868a370a16aa..9223b1a698e3 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -415,6 +415,9 @@ static void l2cap_chan_timeout(struct work_struct *work) BT_DBG("chan %p state %s", chan, state_to_string(chan->state)); + if (!conn) + return; + mutex_lock(&conn->chan_lock); /* __set_chan_timer() calls l2cap_chan_hold(chan) while scheduling * this work. No need to call l2cap_chan_hold(chan) here again. From 40d442f969fb1e871da6fca73d3f8aef1f888558 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 1 May 2024 08:37:40 +0200 Subject: [PATCH 18/47] Bluetooth: qca: fix firmware check error path A recent commit fixed the code that parses the firmware files before downloading them to the controller but introduced a memory leak in case the sanity checks ever fail. Make sure to free the firmware buffer before returning on errors. Fixes: f905ae0be4b7 ("Bluetooth: qca: add missing firmware sanity checks") Cc: stable@vger.kernel.org # 4.19 Signed-off-by: Johan Hovold Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btqca.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c index 8d8a664620a3..638074992c82 100644 --- a/drivers/bluetooth/btqca.c +++ b/drivers/bluetooth/btqca.c @@ -605,7 +605,7 @@ static int qca_download_firmware(struct hci_dev *hdev, ret = qca_tlv_check_data(hdev, config, data, size, soc_type); if (ret) - return ret; + goto out; segment = data; remain = size; From e0863634bf9f7cf36291ebb5bfa2d16632f79c49 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Thu, 2 May 2024 20:32:59 +0200 Subject: [PATCH 19/47] net: ks8851: Queue RX packets in IRQ handler instead of disabling BHs Currently the driver uses local_bh_disable()/local_bh_enable() in its IRQ handler to avoid triggering net_rx_action() softirq on exit from netif_rx(). The net_rx_action() could trigger this driver .start_xmit callback, which is protected by the same lock as the IRQ handler, so calling the .start_xmit from netif_rx() from the IRQ handler critical section protected by the lock could lead to an attempt to claim the already claimed lock, and a hang. The local_bh_disable()/local_bh_enable() approach works only in case the IRQ handler is protected by a spinlock, but does not work if the IRQ handler is protected by mutex, i.e. this works for KS8851 with Parallel bus interface, but not for KS8851 with SPI bus interface. Remove the BH manipulation and instead of calling netif_rx() inside the IRQ handler code protected by the lock, queue all the received SKBs in the IRQ handler into a queue first, and once the IRQ handler exits the critical section protected by the lock, dequeue all the queued SKBs and push them all into netif_rx(). At this point, it is safe to trigger the net_rx_action() softirq, since the netif_rx() call is outside of the lock that protects the IRQ handler. Fixes: be0384bf599c ("net: ks8851: Handle softirqs at the end of IRQ thread to fix hang") Tested-by: Ronald Wahl # KS8851 SPI Signed-off-by: Marek Vasut Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240502183436.117117-1-marex@denx.de Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/micrel/ks8851_common.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/micrel/ks8851_common.c b/drivers/net/ethernet/micrel/ks8851_common.c index d4cdf3d4f552..502518cdb461 100644 --- a/drivers/net/ethernet/micrel/ks8851_common.c +++ b/drivers/net/ethernet/micrel/ks8851_common.c @@ -234,12 +234,13 @@ static void ks8851_dbg_dumpkkt(struct ks8851_net *ks, u8 *rxpkt) /** * ks8851_rx_pkts - receive packets from the host * @ks: The device information. + * @rxq: Queue of packets received in this function. * * This is called from the IRQ work queue when the system detects that there * are packets in the receive queue. Find out how many packets there are and * read them from the FIFO. */ -static void ks8851_rx_pkts(struct ks8851_net *ks) +static void ks8851_rx_pkts(struct ks8851_net *ks, struct sk_buff_head *rxq) { struct sk_buff *skb; unsigned rxfc; @@ -299,7 +300,7 @@ static void ks8851_rx_pkts(struct ks8851_net *ks) ks8851_dbg_dumpkkt(ks, rxpkt); skb->protocol = eth_type_trans(skb, ks->netdev); - __netif_rx(skb); + __skb_queue_tail(rxq, skb); ks->netdev->stats.rx_packets++; ks->netdev->stats.rx_bytes += rxlen; @@ -326,11 +327,11 @@ static void ks8851_rx_pkts(struct ks8851_net *ks) static irqreturn_t ks8851_irq(int irq, void *_ks) { struct ks8851_net *ks = _ks; + struct sk_buff_head rxq; unsigned handled = 0; unsigned long flags; unsigned int status; - - local_bh_disable(); + struct sk_buff *skb; ks8851_lock(ks, &flags); @@ -384,7 +385,8 @@ static irqreturn_t ks8851_irq(int irq, void *_ks) * from the device so do not bother masking just the RX * from the device. */ - ks8851_rx_pkts(ks); + __skb_queue_head_init(&rxq); + ks8851_rx_pkts(ks, &rxq); } /* if something stopped the rx process, probably due to wanting @@ -408,7 +410,9 @@ static irqreturn_t ks8851_irq(int irq, void *_ks) if (status & IRQ_LCI) mii_check_link(&ks->mii); - local_bh_enable(); + if (status & IRQ_RXI) + while ((skb = __skb_dequeue(&rxq))) + netif_rx(skb); return IRQ_HANDLED; } From 1aec77b2bb2ed1db0f5efc61c4c1ca3813307489 Mon Sep 17 00:00:00 2001 From: Roded Zats Date: Thu, 2 May 2024 18:57:51 +0300 Subject: [PATCH 20/47] rtnetlink: Correct nested IFLA_VF_VLAN_LIST attribute validation Each attribute inside a nested IFLA_VF_VLAN_LIST is assumed to be a struct ifla_vf_vlan_info so the size of such attribute needs to be at least of sizeof(struct ifla_vf_vlan_info) which is 14 bytes. The current size validation in do_setvfinfo is against NLA_HDRLEN (4 bytes) which is less than sizeof(struct ifla_vf_vlan_info) so this validation is not enough and a too small attribute might be cast to a struct ifla_vf_vlan_info, this might result in an out of bands read access when accessing the saved (casted) entry in ivvl. Fixes: 79aab093a0b5 ("net: Update API for VF vlan protocol 802.1ad support") Signed-off-by: Roded Zats Reviewed-by: Donald Hunter Link: https://lore.kernel.org/r/20240502155751.75705-1-rzats@paloaltonetworks.com Signed-off-by: Jakub Kicinski --- net/core/rtnetlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index a3d7847ce69d..8ba6a4e4be26 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2530,7 +2530,7 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) nla_for_each_nested(attr, tb[IFLA_VF_VLAN_LIST], rem) { if (nla_type(attr) != IFLA_VF_VLAN_INFO || - nla_len(attr) < NLA_HDRLEN) { + nla_len(attr) < sizeof(struct ifla_vf_vlan_info)) { return -EINVAL; } if (len >= MAX_VLAN_LIST_LEN) From fa870b45b08ad3a50a305b8f9f5896a5c5f565bc Mon Sep 17 00:00:00 2001 From: Potnuri Bharat Teja Date: Thu, 2 May 2024 14:42:09 -0400 Subject: [PATCH 21/47] MAINTAINERS: update cxgb4 and cxgb3 network drivers maintainer Add myself(Bharat) as maintainer for cxgb4 and cxgb3 network drivers. Signed-off-by: Potnuri Bharat Teja Link: https://lore.kernel.org/r/20240502184209.2723379-1-bharat@chelsio.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index ec0284125e8f..05720fcc95cb 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5709,7 +5709,7 @@ Q: http://patchwork.linuxtv.org/project/linux-media/list/ F: drivers/media/dvb-frontends/cxd2820r* CXGB3 ETHERNET DRIVER (CXGB3) -M: Raju Rangoju +M: Potnuri Bharat Teja L: netdev@vger.kernel.org S: Supported W: http://www.chelsio.com @@ -5730,7 +5730,7 @@ W: http://www.chelsio.com F: drivers/crypto/chelsio CXGB4 ETHERNET DRIVER (CXGB4) -M: Raju Rangoju +M: Potnuri Bharat Teja L: netdev@vger.kernel.org S: Supported W: http://www.chelsio.com @@ -5759,7 +5759,7 @@ F: drivers/infiniband/hw/cxgb4/ F: include/uapi/rdma/cxgb4-abi.h CXGB4VF ETHERNET DRIVER (CXGB4VF) -M: Raju Rangoju +M: Potnuri Bharat Teja L: netdev@vger.kernel.org S: Supported W: http://www.chelsio.com From a26ff37e624d12e28077e5b24d2b264f62764ad6 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Thu, 2 May 2024 10:20:06 -0300 Subject: [PATCH 22/47] net: fix out-of-bounds access in ops_init net_alloc_generic is called by net_alloc, which is called without any locking. It reads max_gen_ptrs, which is changed under pernet_ops_rwsem. It is read twice, first to allocate an array, then to set s.len, which is later used to limit the bounds of the array access. It is possible that the array is allocated and another thread is registering a new pernet ops, increments max_gen_ptrs, which is then used to set s.len with a larger than allocated length for the variable array. Fix it by reading max_gen_ptrs only once in net_alloc_generic. If max_gen_ptrs is later incremented, it will be caught in net_assign_generic. Signed-off-by: Thadeu Lima de Souza Cascardo Fixes: 073862ba5d24 ("netns: fix net_alloc_generic()") Reviewed-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240502132006.3430840-1-cascardo@igalia.com Signed-off-by: Paolo Abeni --- net/core/net_namespace.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index f0540c557515..9d690d32da33 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -69,12 +69,15 @@ DEFINE_COOKIE(net_cookie); static struct net_generic *net_alloc_generic(void) { + unsigned int gen_ptrs = READ_ONCE(max_gen_ptrs); + unsigned int generic_size; struct net_generic *ng; - unsigned int generic_size = offsetof(struct net_generic, ptr[max_gen_ptrs]); + + generic_size = offsetof(struct net_generic, ptr[gen_ptrs]); ng = kzalloc(generic_size, GFP_KERNEL); if (ng) - ng->s.len = max_gen_ptrs; + ng->s.len = gen_ptrs; return ng; } @@ -1307,7 +1310,11 @@ static int register_pernet_operations(struct list_head *list, if (error < 0) return error; *ops->id = error; - max_gen_ptrs = max(max_gen_ptrs, *ops->id + 1); + /* This does not require READ_ONCE as writers already hold + * pernet_ops_rwsem. But WRITE_ONCE is needed to protect + * net_alloc_generic. + */ + WRITE_ONCE(max_gen_ptrs, max(max_gen_ptrs, *ops->id + 1)); } error = __register_pernet_operations(list, ops); if (error) { From d8cac8568618dcb8a51af3db1103e8d4cc4aeea7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 2 May 2024 16:17:00 +0000 Subject: [PATCH 23/47] phonet: fix rtm_phonet_notify() skb allocation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fill_route() stores three components in the skb: - struct rtmsg - RTA_DST (u8) - RTA_OIF (u32) Therefore, rtm_phonet_notify() should use NLMSG_ALIGN(sizeof(struct rtmsg)) + nla_total_size(1) + nla_total_size(4) Fixes: f062f41d0657 ("Phonet: routing table Netlink interface") Signed-off-by: Eric Dumazet Acked-by: Rémi Denis-Courmont Link: https://lore.kernel.org/r/20240502161700.1804476-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/phonet/pn_netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c index 59aebe296890..dd4c7e9a634f 100644 --- a/net/phonet/pn_netlink.c +++ b/net/phonet/pn_netlink.c @@ -193,7 +193,7 @@ void rtm_phonet_notify(int event, struct net_device *dev, u8 dst) struct sk_buff *skb; int err = -ENOBUFS; - skb = nlmsg_new(NLMSG_ALIGN(sizeof(struct ifaddrmsg)) + + skb = nlmsg_new(NLMSG_ALIGN(sizeof(struct rtmsg)) + nla_total_size(1) + nla_total_size(4), GFP_KERNEL); if (skb == NULL) goto errout; From 9adcac6506185dd1a727f1784b89f30cd217ef7e Mon Sep 17 00:00:00 2001 From: Donald Hunter Date: Fri, 3 May 2024 17:43:04 +0100 Subject: [PATCH 24/47] netlink: specs: Add missing bridge linkinfo attrs Attributes for FDB learned entries were added to the if_link netlink api for bridge linkinfo but are missing from the rt_link.yaml spec. Add the missing attributes to the spec. Fixes: ddd1ad68826d ("net: bridge: Add netlink knobs for number / max learned FDB entries") Signed-off-by: Donald Hunter Acked-by: Nikolay Aleksandrov Reviewed-by: Jacob Keller Link: https://lore.kernel.org/r/20240503164304.87427-1-donald.hunter@gmail.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/rt_link.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Documentation/netlink/specs/rt_link.yaml b/Documentation/netlink/specs/rt_link.yaml index 8e4d19adee8c..4e702ac8bf66 100644 --- a/Documentation/netlink/specs/rt_link.yaml +++ b/Documentation/netlink/specs/rt_link.yaml @@ -1144,6 +1144,12 @@ attribute-sets: - name: mcast-querier-state type: binary + - + name: fdb-n-learned + type: u32 + - + name: fdb-max-learned + type: u32 - name: linkinfo-brport-attrs name-prefix: ifla-brport- From 19e35f24750ddf860c51e51c68cf07ea181b4881 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Sun, 5 May 2024 19:36:49 +0900 Subject: [PATCH 25/47] nfc: nci: Fix kcov check in nci_rx_work() Commit 7e8cdc97148c ("nfc: Add KCOV annotations") added kcov_remote_start_common()/kcov_remote_stop() pair into nci_rx_work(), with an assumption that kcov_remote_stop() is called upon continue of the for loop. But commit d24b03535e5e ("nfc: nci: Fix uninit-value in nci_dev_up and nci_ntf_packet") forgot to call kcov_remote_stop() before break of the for loop. Reported-by: syzbot Closes: https://syzkaller.appspot.com/bug?extid=0438378d6f157baae1a2 Fixes: d24b03535e5e ("nfc: nci: Fix uninit-value in nci_dev_up and nci_ntf_packet") Suggested-by: Andrey Konovalov Signed-off-by: Tetsuo Handa Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/6d10f829-5a0c-405a-b39a-d7266f3a1a0b@I-love.SAKURA.ne.jp Signed-off-by: Jakub Kicinski --- net/nfc/nci/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 0d26c8ec9993..b133dc55304c 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -1518,6 +1518,7 @@ static void nci_rx_work(struct work_struct *work) if (!nci_plen(skb->data)) { kfree_skb(skb); + kcov_remote_stop(); break; } From 6963c508fd7ab66ae0b7ae3db9a62ca6267f1ae8 Mon Sep 17 00:00:00 2001 From: Gregory Detal Date: Mon, 6 May 2024 17:35:28 +0200 Subject: [PATCH 26/47] mptcp: only allow set existing scheduler for net.mptcp.scheduler The current behavior is to accept any strings as inputs, this results in an inconsistent result where an unexisting scheduler can be set: # sysctl -w net.mptcp.scheduler=notdefault net.mptcp.scheduler = notdefault This patch changes this behavior by checking for existing scheduler before accepting the input. Fixes: e3b2870b6d22 ("mptcp: add a new sysctl scheduler") Cc: stable@vger.kernel.org Signed-off-by: Gregory Detal Reviewed-by: Matthieu Baerts (NGI0) Tested-by: Geliang Tang Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240506-upstream-net-20240506-mptcp-sched-exist-v1-1-2ed1529e521e@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/ctrl.c | 39 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c index 13fe0748dde8..2963ba84e2ee 100644 --- a/net/mptcp/ctrl.c +++ b/net/mptcp/ctrl.c @@ -96,6 +96,43 @@ static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet) } #ifdef CONFIG_SYSCTL +static int mptcp_set_scheduler(const struct net *net, const char *name) +{ + struct mptcp_pernet *pernet = mptcp_get_pernet(net); + struct mptcp_sched_ops *sched; + int ret = 0; + + rcu_read_lock(); + sched = mptcp_sched_find(name); + if (sched) + strscpy(pernet->scheduler, name, MPTCP_SCHED_NAME_MAX); + else + ret = -ENOENT; + rcu_read_unlock(); + + return ret; +} + +static int proc_scheduler(struct ctl_table *ctl, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + const struct net *net = current->nsproxy->net_ns; + char val[MPTCP_SCHED_NAME_MAX]; + struct ctl_table tbl = { + .data = val, + .maxlen = MPTCP_SCHED_NAME_MAX, + }; + int ret; + + strscpy(val, mptcp_get_scheduler(net), MPTCP_SCHED_NAME_MAX); + + ret = proc_dostring(&tbl, write, buffer, lenp, ppos); + if (write && ret == 0) + ret = mptcp_set_scheduler(net, val); + + return ret; +} + static struct ctl_table mptcp_sysctl_table[] = { { .procname = "enabled", @@ -148,7 +185,7 @@ static struct ctl_table mptcp_sysctl_table[] = { .procname = "scheduler", .maxlen = MPTCP_SCHED_NAME_MAX, .mode = 0644, - .proc_handler = proc_dostring, + .proc_handler = proc_scheduler, }, { .procname = "close_timeout", From 86b29d830ad69eecff25b22dc96c14c6573718e6 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sun, 5 May 2024 20:42:38 +0200 Subject: [PATCH 27/47] net: bridge: fix corrupted ethernet header on multicast-to-unicast The change from skb_copy to pskb_copy unfortunately changed the data copying to omit the ethernet header, since it was pulled before reaching this point. Fix this by calling __skb_push/pull around pskb_copy. Fixes: 59c878cbcdd8 ("net: bridge: fix multicast-to-unicast with fraglist GSO") Signed-off-by: Felix Fietkau Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_forward.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index d7c35f55bd69..d97064d460dc 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -258,6 +258,7 @@ static void maybe_deliver_addr(struct net_bridge_port *p, struct sk_buff *skb, { struct net_device *dev = BR_INPUT_SKB_CB(skb)->brdev; const unsigned char *src = eth_hdr(skb)->h_source; + struct sk_buff *nskb; if (!should_deliver(p, skb)) return; @@ -266,12 +267,16 @@ static void maybe_deliver_addr(struct net_bridge_port *p, struct sk_buff *skb, if (skb->dev == p->dev && ether_addr_equal(src, addr)) return; - skb = pskb_copy(skb, GFP_ATOMIC); - if (!skb) { + __skb_push(skb, ETH_HLEN); + nskb = pskb_copy(skb, GFP_ATOMIC); + __skb_pull(skb, ETH_HLEN); + if (!nskb) { DEV_STATS_INC(dev, tx_dropped); return; } + skb = nskb; + __skb_pull(skb, ETH_HLEN); if (!is_broadcast_ether_addr(addr)) memcpy(eth_hdr(skb)->h_dest, addr, ETH_ALEN); From 2e82a58d6c0797092eabe7ba66a532c11548047f Mon Sep 17 00:00:00 2001 From: Vincent Duvert Date: Sun, 5 May 2024 11:54:57 -0700 Subject: [PATCH 28/47] appletalk: Improve handling of broadcast packets When a broadcast AppleTalk packet is received, prefer queuing it on the socket whose address matches the address of the interface that received the packet (and is listening on the correct port). Userspace applications that handle such packets will usually send a response on the same socket that received the packet; this fix allows the response to be sent on the correct interface. If a socket matching the interface's address is not found, an arbitrary socket listening on the correct port will be used, if any. This matches the implementation's previous behavior. Fixes atalkd's responses to network information requests when multiple network interfaces are configured to use AppleTalk. Link: https://lore.kernel.org/netdev/20200722113752.1218-2-vincent.ldev@duvert.net/ Link: https://gist.github.com/VinDuv/4db433b6dce39d51a5b7847ee749b2a4 Signed-off-by: Vincent Duvert Signed-off-by: Doug Brown Signed-off-by: David S. Miller --- net/appletalk/ddp.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 198f5ba2feae..b068651984fe 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -88,6 +88,7 @@ static inline void atalk_remove_socket(struct sock *sk) static struct sock *atalk_search_socket(struct sockaddr_at *to, struct atalk_iface *atif) { + struct sock *def_socket = NULL; struct sock *s; read_lock_bh(&atalk_sockets_lock); @@ -98,8 +99,20 @@ static struct sock *atalk_search_socket(struct sockaddr_at *to, continue; if (to->sat_addr.s_net == ATADDR_ANYNET && - to->sat_addr.s_node == ATADDR_BCAST) - goto found; + to->sat_addr.s_node == ATADDR_BCAST) { + if (atif->address.s_node == at->src_node && + atif->address.s_net == at->src_net) { + /* This socket's address matches the address of the interface + * that received the packet -- use it + */ + goto found; + } + + /* Continue searching for a socket matching the interface address, + * but use this socket by default if no other one is found + */ + def_socket = s; + } if (to->sat_addr.s_net == at->src_net && (to->sat_addr.s_node == at->src_node || @@ -116,7 +129,7 @@ static struct sock *atalk_search_socket(struct sockaddr_at *to, goto found; } } - s = NULL; + s = def_socket; found: read_unlock_bh(&atalk_sockets_lock); return s; From ab0cde321adc96a755caf39eb0e90d61511ec6c4 Mon Sep 17 00:00:00 2001 From: Gregor Herburger Date: Mon, 6 May 2024 08:24:33 +0200 Subject: [PATCH 29/47] net: phy: marvell-88q2xxx: add support for Rev B1 and B2 Different revisions of the Marvell 88q2xxx phy needs different init sequences. Add init sequence for Rev B1 and Rev B2. Rev B2 init sequence skips one register write. Tested-by: Dimitri Fedrau Signed-off-by: Gregor Herburger Signed-off-by: David S. Miller --- drivers/net/phy/marvell-88q2xxx.c | 119 ++++++++++++++++++++++++++---- 1 file changed, 103 insertions(+), 16 deletions(-) diff --git a/drivers/net/phy/marvell-88q2xxx.c b/drivers/net/phy/marvell-88q2xxx.c index 6b4bd9883304..c812f16eaa3a 100644 --- a/drivers/net/phy/marvell-88q2xxx.c +++ b/drivers/net/phy/marvell-88q2xxx.c @@ -12,6 +12,8 @@ #include #define PHY_ID_88Q2220_REVB0 (MARVELL_PHY_ID_88Q2220 | 0x1) +#define PHY_ID_88Q2220_REVB1 (MARVELL_PHY_ID_88Q2220 | 0x2) +#define PHY_ID_88Q2220_REVB2 (MARVELL_PHY_ID_88Q2220 | 0x3) #define MDIO_MMD_AN_MV_STAT 32769 #define MDIO_MMD_AN_MV_STAT_ANEG 0x0100 @@ -129,6 +131,49 @@ static const struct mmd_val mv88q222x_revb0_init_seq1[] = { { MDIO_MMD_PCS, 0xfe05, 0x755c }, }; +static const struct mmd_val mv88q222x_revb1_init_seq0[] = { + { MDIO_MMD_PCS, 0xffe4, 0x0007 }, + { MDIO_MMD_AN, MDIO_AN_T1_CTRL, 0x0 }, + { MDIO_MMD_PCS, 0xffe3, 0x7000 }, + { MDIO_MMD_PMAPMD, MDIO_CTRL1, 0x0840 }, +}; + +static const struct mmd_val mv88q222x_revb2_init_seq0[] = { + { MDIO_MMD_PCS, 0xffe4, 0x0007 }, + { MDIO_MMD_AN, MDIO_AN_T1_CTRL, 0x0 }, + { MDIO_MMD_PMAPMD, MDIO_CTRL1, 0x0840 }, +}; + +static const struct mmd_val mv88q222x_revb1_revb2_init_seq1[] = { + { MDIO_MMD_PCS, 0xfe07, 0x125a }, + { MDIO_MMD_PCS, 0xfe09, 0x1288 }, + { MDIO_MMD_PCS, 0xfe08, 0x2588 }, + { MDIO_MMD_PCS, 0xfe72, 0x042c }, + { MDIO_MMD_PCS, 0xffe4, 0x0071 }, + { MDIO_MMD_PCS, 0xffe4, 0x0001 }, + { MDIO_MMD_PCS, 0xfe1b, 0x0048 }, + { MDIO_MMD_PMAPMD, 0x0000, 0x0000 }, + { MDIO_MMD_PCS, 0x0000, 0x0000 }, + { MDIO_MMD_PCS, 0xffdb, 0xfc10 }, + { MDIO_MMD_PCS, 0xfe1b, 0x58 }, + { MDIO_MMD_PCS, 0xfcad, 0x030c }, + { MDIO_MMD_PCS, 0x8032, 0x6001 }, + { MDIO_MMD_PCS, 0xfdff, 0x05a5 }, + { MDIO_MMD_PCS, 0xfdec, 0xdbaf }, + { MDIO_MMD_PCS, 0xfcab, 0x1054 }, + { MDIO_MMD_PCS, 0xfcac, 0x1483 }, + { MDIO_MMD_PCS, 0x8033, 0xc801 }, + { MDIO_MMD_AN, 0x8032, 0x2020 }, + { MDIO_MMD_AN, 0x8031, 0xa28 }, + { MDIO_MMD_AN, 0x8031, 0xc28 }, + { MDIO_MMD_PCS, 0xfbba, 0x0cb2 }, + { MDIO_MMD_PCS, 0xfbbb, 0x0c4a }, + { MDIO_MMD_PCS, 0xfe5f, 0xe8 }, + { MDIO_MMD_PCS, 0xfe05, 0x755c }, + { MDIO_MMD_PCS, 0xfa20, 0x002a }, + { MDIO_MMD_PCS, 0xfe11, 0x1105 }, +}; + static int mv88q2xxx_soft_reset(struct phy_device *phydev) { int ret; @@ -687,31 +732,72 @@ static int mv88q222x_soft_reset(struct phy_device *phydev) return 0; } -static int mv88q222x_revb0_config_init(struct phy_device *phydev) +static int mv88q222x_write_mmd_vals(struct phy_device *phydev, + const struct mmd_val *vals, size_t len) { - int ret, i; + int ret; - for (i = 0; i < ARRAY_SIZE(mv88q222x_revb0_init_seq0); i++) { - ret = phy_write_mmd(phydev, mv88q222x_revb0_init_seq0[i].devad, - mv88q222x_revb0_init_seq0[i].regnum, - mv88q222x_revb0_init_seq0[i].val); + for (; len; vals++, len--) { + ret = phy_write_mmd(phydev, vals->devad, vals->regnum, + vals->val); if (ret < 0) return ret; } + return 0; +} + +static int mv88q222x_revb0_config_init(struct phy_device *phydev) +{ + int ret; + + ret = mv88q222x_write_mmd_vals(phydev, mv88q222x_revb0_init_seq0, + ARRAY_SIZE(mv88q222x_revb0_init_seq0)); + if (ret < 0) + return ret; + usleep_range(5000, 10000); - for (i = 0; i < ARRAY_SIZE(mv88q222x_revb0_init_seq1); i++) { - ret = phy_write_mmd(phydev, mv88q222x_revb0_init_seq1[i].devad, - mv88q222x_revb0_init_seq1[i].regnum, - mv88q222x_revb0_init_seq1[i].val); - if (ret < 0) - return ret; - } + ret = mv88q222x_write_mmd_vals(phydev, mv88q222x_revb0_init_seq1, + ARRAY_SIZE(mv88q222x_revb0_init_seq1)); + if (ret < 0) + return ret; return mv88q2xxx_config_init(phydev); } +static int mv88q222x_revb1_revb2_config_init(struct phy_device *phydev) +{ + bool is_rev_b1 = phydev->c45_ids.device_ids[MDIO_MMD_PMAPMD] == PHY_ID_88Q2220_REVB1; + int ret; + + if (is_rev_b1) + ret = mv88q222x_write_mmd_vals(phydev, mv88q222x_revb1_init_seq0, + ARRAY_SIZE(mv88q222x_revb1_init_seq0)); + else + ret = mv88q222x_write_mmd_vals(phydev, mv88q222x_revb2_init_seq0, + ARRAY_SIZE(mv88q222x_revb2_init_seq0)); + if (ret < 0) + return ret; + + usleep_range(3000, 5000); + + ret = mv88q222x_write_mmd_vals(phydev, mv88q222x_revb1_revb2_init_seq1, + ARRAY_SIZE(mv88q222x_revb1_revb2_init_seq1)); + if (ret < 0) + return ret; + + return mv88q2xxx_config_init(phydev); +} + +static int mv88q222x_config_init(struct phy_device *phydev) +{ + if (phydev->c45_ids.device_ids[MDIO_MMD_PMAPMD] == PHY_ID_88Q2220_REVB0) + return mv88q222x_revb0_config_init(phydev); + else + return mv88q222x_revb1_revb2_config_init(phydev); +} + static int mv88q222x_cable_test_start(struct phy_device *phydev) { int ret; @@ -810,14 +896,15 @@ static struct phy_driver mv88q2xxx_driver[] = { .get_sqi_max = mv88q2xxx_get_sqi_max, }, { - PHY_ID_MATCH_EXACT(PHY_ID_88Q2220_REVB0), + .phy_id = MARVELL_PHY_ID_88Q2220, + .phy_id_mask = MARVELL_PHY_ID_MASK, .name = "mv88q2220", .flags = PHY_POLL_CABLE_TEST, .probe = mv88q2xxx_probe, .get_features = mv88q2xxx_get_features, .config_aneg = mv88q2xxx_config_aneg, .aneg_done = genphy_c45_aneg_done, - .config_init = mv88q222x_revb0_config_init, + .config_init = mv88q222x_config_init, .read_status = mv88q2xxx_read_status, .soft_reset = mv88q222x_soft_reset, .config_intr = mv88q2xxx_config_intr, @@ -836,7 +923,7 @@ module_phy_driver(mv88q2xxx_driver); static struct mdio_device_id __maybe_unused mv88q2xxx_tbl[] = { { MARVELL_PHY_ID_88Q2110, MARVELL_PHY_ID_MASK }, - { PHY_ID_MATCH_EXACT(PHY_ID_88Q2220_REVB0), }, + { MARVELL_PHY_ID_88Q2220, MARVELL_PHY_ID_MASK }, { /*sentinel*/ } }; MODULE_DEVICE_TABLE(mdio, mv88q2xxx_tbl); From 4e13d3a9c25b7080f8a619f961e943fe08c2672c Mon Sep 17 00:00:00 2001 From: Shigeru Yoshida Date: Mon, 6 May 2024 23:11:29 +0900 Subject: [PATCH 30/47] ipv6: Fix potential uninit-value access in __ip6_make_skb() As it was done in commit fc1092f51567 ("ipv4: Fix uninit-value access in __ip_make_skb()") for IPv4, check FLOWI_FLAG_KNOWN_NH on fl6->flowi6_flags instead of testing HDRINCL on the socket to avoid a race condition which causes uninit-value access. Fixes: ea30388baebc ("ipv6: Fix an uninit variable access bug in __ip6_make_skb()") Signed-off-by: Shigeru Yoshida Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index b9dd3a66e423..fa2937732665 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1933,7 +1933,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, u8 icmp6_type; if (sk->sk_socket->type == SOCK_RAW && - !inet_test_bit(HDRINCL, sk)) + !(fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH)) icmp6_type = fl6->fl6_icmp_type; else icmp6_type = icmp6_hdr(skb)->icmp6_type; From 9a169c267e946b0f47f67e8ccc70134708ccf3d4 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 7 May 2024 14:30:33 +0300 Subject: [PATCH 31/47] selftests: test_bridge_neigh_suppress.sh: Fix failures due to duplicate MAC When creating the topology for the test, three veth pairs are created in the initial network namespace before being moved to one of the network namespaces created by the test. On systems where systemd-udev uses MACAddressPolicy=persistent (default since systemd version 242), this will result in some net devices having the same MAC address since they were created with the same name in the initial network namespace. In turn, this leads to arping / ndisc6 failing since packets are dropped by the bridge's loopback filter. Fix by creating each net device in the correct network namespace instead of moving it there from the initial network namespace. Reported-by: Jakub Kicinski Closes: https://lore.kernel.org/netdev/20240426074015.251854d4@kernel.org/ Fixes: 7648ac72dcd7 ("selftests: net: Add bridge neighbor suppression test") Signed-off-by: Ido Schimmel Link: https://lore.kernel.org/r/20240507113033.1732534-1-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- .../selftests/net/test_bridge_neigh_suppress.sh | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/tools/testing/selftests/net/test_bridge_neigh_suppress.sh b/tools/testing/selftests/net/test_bridge_neigh_suppress.sh index 8533393a4f18..02b986c9c247 100755 --- a/tools/testing/selftests/net/test_bridge_neigh_suppress.sh +++ b/tools/testing/selftests/net/test_bridge_neigh_suppress.sh @@ -154,17 +154,9 @@ setup_topo() setup_topo_ns $ns done - ip link add name veth0 type veth peer name veth1 - ip link set dev veth0 netns $h1 name eth0 - ip link set dev veth1 netns $sw1 name swp1 - - ip link add name veth0 type veth peer name veth1 - ip link set dev veth0 netns $sw1 name veth0 - ip link set dev veth1 netns $sw2 name veth0 - - ip link add name veth0 type veth peer name veth1 - ip link set dev veth0 netns $h2 name eth0 - ip link set dev veth1 netns $sw2 name swp1 + ip -n $h1 link add name eth0 type veth peer name swp1 netns $sw1 + ip -n $sw1 link add name veth0 type veth peer name veth0 netns $sw2 + ip -n $h2 link add name eth0 type veth peer name swp1 netns $sw2 } setup_host_common() From ba4e103848d3a2a28a0445e39f4a9564187efe54 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 3 May 2024 16:07:39 +0100 Subject: [PATCH 32/47] rxrpc: Fix congestion control algorithm Make the following fixes to the congestion control algorithm: (1) Don't vary the cwnd starting value by the size of RXRPC_TX_SMSS since that's currently held constant - set to the size of a jumbo subpacket payload so that we can create jumbo packets on the fly. The current code invariably picks 3 as the starting value. Further, the starting cwnd needs to be an even number because we ack every other packet, so set it to 4. (2) Don't cut ssthresh when we see an ACK come from the peer with a receive window (rwind) less than ssthresh. ssthresh keeps track of characteristics of the connection whereas rwind may be reduced by the peer for any reason - and may be reduced to 0. Fixes: 1fc4fa2ac93d ("rxrpc: Fix congestion management") Fixes: 0851115090a3 ("rxrpc: Reduce ssthresh to peer's receive window") Signed-off-by: David Howells Suggested-by: Simon Wilkinson cc: Marc Dionne cc: linux-afs@lists.infradead.org Reviewed-by: Jeffrey Altman > Link: https://lore.kernel.org/r/20240503150749.1001323-2-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- net/rxrpc/ar-internal.h | 2 +- net/rxrpc/call_object.c | 7 +------ net/rxrpc/input.c | 3 --- 3 files changed, 2 insertions(+), 10 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 08c0a32db8c7..08de24658f4f 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -697,7 +697,7 @@ struct rxrpc_call { * packets) rather than bytes. */ #define RXRPC_TX_SMSS RXRPC_JUMBO_DATALEN -#define RXRPC_MIN_CWND (RXRPC_TX_SMSS > 2190 ? 2 : RXRPC_TX_SMSS > 1095 ? 3 : 4) +#define RXRPC_MIN_CWND 4 u8 cong_cwnd; /* Congestion window size */ u8 cong_extra; /* Extra to send for congestion management */ u8 cong_ssthresh; /* Slow-start threshold */ diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 01fa71e8b1f7..f9e983a12c14 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -174,12 +174,7 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp, call->rx_winsize = rxrpc_rx_window_size; call->tx_winsize = 16; - if (RXRPC_TX_SMSS > 2190) - call->cong_cwnd = 2; - else if (RXRPC_TX_SMSS > 1095) - call->cong_cwnd = 3; - else - call->cong_cwnd = 4; + call->cong_cwnd = RXRPC_MIN_CWND; call->cong_ssthresh = RXRPC_TX_MAX_WINDOW; call->rxnet = rxnet; diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 3dedb8c0618c..860075f1290b 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -685,9 +685,6 @@ static void rxrpc_input_ack_trailer(struct rxrpc_call *call, struct sk_buff *skb call->tx_winsize = rwind; } - if (call->cong_ssthresh > rwind) - call->cong_ssthresh = rwind; - mtu = min(ntohl(trailer->maxMTU), ntohl(trailer->ifMTU)); peer = call->peer; From 012b7206918dcc5a4dcf1432b3e643114c95957e Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 3 May 2024 16:07:40 +0100 Subject: [PATCH 33/47] rxrpc: Only transmit one ACK per jumbo packet received Only generate one ACK packet for all the subpackets in a jumbo packet. If we would like to generate more than one ACK, we prioritise them base on their reason code, in the order, highest first: OutOfSeq > NoSpace > ExceedsWin > Duplicate > Requested > Delay > Idle For the first four, we reference the lowest offending subpacket; for the last three, the highest. This reduces the number of ACKs we end up transmitting to one per UDP packet transmitted to reduce network loading and packet parsing. Fixes: 5d7edbc9231e ("rxrpc: Get rid of the Rx ring") Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org Reviewed-by: Jeffrey Altman > Link: https://lore.kernel.org/r/20240503150749.1001323-3-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- net/rxrpc/input.c | 46 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 35 insertions(+), 11 deletions(-) diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 860075f1290b..16d49a861dbb 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -9,6 +9,17 @@ #include "ar-internal.h" +/* Override priority when generating ACKs for received DATA */ +static const u8 rxrpc_ack_priority[RXRPC_ACK__INVALID] = { + [RXRPC_ACK_IDLE] = 1, + [RXRPC_ACK_DELAY] = 2, + [RXRPC_ACK_REQUESTED] = 3, + [RXRPC_ACK_DUPLICATE] = 4, + [RXRPC_ACK_EXCEEDS_WINDOW] = 5, + [RXRPC_ACK_NOSPACE] = 6, + [RXRPC_ACK_OUT_OF_SEQUENCE] = 7, +}; + static void rxrpc_proto_abort(struct rxrpc_call *call, rxrpc_seq_t seq, enum rxrpc_abort_reason why) { @@ -365,7 +376,7 @@ static void rxrpc_input_queue_data(struct rxrpc_call *call, struct sk_buff *skb, * Process a DATA packet. */ static void rxrpc_input_data_one(struct rxrpc_call *call, struct sk_buff *skb, - bool *_notify) + bool *_notify, rxrpc_serial_t *_ack_serial, int *_ack_reason) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct sk_buff *oos; @@ -418,8 +429,6 @@ static void rxrpc_input_data_one(struct rxrpc_call *call, struct sk_buff *skb, /* Send an immediate ACK if we fill in a hole */ else if (!skb_queue_empty(&call->rx_oos_queue)) ack_reason = RXRPC_ACK_DELAY; - else - call->ackr_nr_unacked++; window++; if (after(window, wtop)) { @@ -497,12 +506,16 @@ static void rxrpc_input_data_one(struct rxrpc_call *call, struct sk_buff *skb, } send_ack: - if (ack_reason >= 0) - rxrpc_send_ACK(call, ack_reason, serial, - rxrpc_propose_ack_input_data); - else - rxrpc_propose_delay_ACK(call, serial, - rxrpc_propose_ack_input_data); + if (ack_reason >= 0) { + if (rxrpc_ack_priority[ack_reason] > rxrpc_ack_priority[*_ack_reason]) { + *_ack_serial = serial; + *_ack_reason = ack_reason; + } else if (rxrpc_ack_priority[ack_reason] == rxrpc_ack_priority[*_ack_reason] && + ack_reason == RXRPC_ACK_REQUESTED) { + *_ack_serial = serial; + *_ack_reason = ack_reason; + } + } } /* @@ -513,9 +526,11 @@ static bool rxrpc_input_split_jumbo(struct rxrpc_call *call, struct sk_buff *skb struct rxrpc_jumbo_header jhdr; struct rxrpc_skb_priv *sp = rxrpc_skb(skb), *jsp; struct sk_buff *jskb; + rxrpc_serial_t ack_serial = 0; unsigned int offset = sizeof(struct rxrpc_wire_header); unsigned int len = skb->len - offset; bool notify = false; + int ack_reason = 0; while (sp->hdr.flags & RXRPC_JUMBO_PACKET) { if (len < RXRPC_JUMBO_SUBPKTLEN) @@ -535,7 +550,7 @@ static bool rxrpc_input_split_jumbo(struct rxrpc_call *call, struct sk_buff *skb jsp = rxrpc_skb(jskb); jsp->offset = offset; jsp->len = RXRPC_JUMBO_DATALEN; - rxrpc_input_data_one(call, jskb, ¬ify); + rxrpc_input_data_one(call, jskb, ¬ify, &ack_serial, &ack_reason); rxrpc_free_skb(jskb, rxrpc_skb_put_jumbo_subpacket); sp->hdr.flags = jhdr.flags; @@ -548,7 +563,16 @@ static bool rxrpc_input_split_jumbo(struct rxrpc_call *call, struct sk_buff *skb sp->offset = offset; sp->len = len; - rxrpc_input_data_one(call, skb, ¬ify); + rxrpc_input_data_one(call, skb, ¬ify, &ack_serial, &ack_reason); + + if (ack_reason > 0) { + rxrpc_send_ACK(call, ack_reason, ack_serial, + rxrpc_propose_ack_input_data); + } else { + call->ackr_nr_unacked++; + rxrpc_propose_delay_ACK(call, sp->hdr.serial, + rxrpc_propose_ack_input_data); + } if (notify) { trace_rxrpc_notify_socket(call->debug_id, sp->hdr.serial); rxrpc_notify_socket(call); From cc349b0771dccebf0fa9f5e1822ac444aef11448 Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Tue, 7 May 2024 13:20:43 +0100 Subject: [PATCH 34/47] dt-bindings: net: mediatek: remove wrongly added clocks and SerDes Several clocks as well as both sgmiisys phandles were added by mistake to the Ethernet bindings for MT7988. Also, the total number of clocks didn't match with the actual number of items listed. This happened because the vendor driver which served as a reference uses a high number of syscon phandles to access various parts of the SoC which wasn't acceptable upstream. Hence several parts which have never previously been supported (such SerDes PHY and USXGMII PCS) are going to be implemented by separate drivers. As a result the device tree will look much more sane. Quickly align the bindings with the upcoming reality of the drivers actually adding support for the remaining Ethernet-related features of the MT7988 SoC. Fixes: c94a9aabec36 ("dt-bindings: net: mediatek,net: add mt7988-eth binding") Signed-off-by: Daniel Golle Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/1569290b21cc787a424469ed74456a7e976b102d.1715084326.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- .../devicetree/bindings/net/mediatek,net.yaml | 22 ++----------------- 1 file changed, 2 insertions(+), 20 deletions(-) diff --git a/Documentation/devicetree/bindings/net/mediatek,net.yaml b/Documentation/devicetree/bindings/net/mediatek,net.yaml index e74502a0afe8..3202dc7967c5 100644 --- a/Documentation/devicetree/bindings/net/mediatek,net.yaml +++ b/Documentation/devicetree/bindings/net/mediatek,net.yaml @@ -337,8 +337,8 @@ allOf: minItems: 4 clocks: - minItems: 34 - maxItems: 34 + minItems: 24 + maxItems: 24 clock-names: items: @@ -351,18 +351,6 @@ allOf: - const: ethwarp_wocpu1 - const: ethwarp_wocpu0 - const: esw - - const: netsys0 - - const: netsys1 - - const: sgmii_tx250m - - const: sgmii_rx250m - - const: sgmii2_tx250m - - const: sgmii2_rx250m - - const: top_usxgmii0_sel - - const: top_usxgmii1_sel - - const: top_sgm0_sel - - const: top_sgm1_sel - - const: top_xfi_phy0_xtal_sel - - const: top_xfi_phy1_xtal_sel - const: top_eth_gmii_sel - const: top_eth_refck_50m_sel - const: top_eth_sys_200m_sel @@ -375,16 +363,10 @@ allOf: - const: top_netsys_sync_250m_sel - const: top_netsys_ppefb_250m_sel - const: top_netsys_warp_sel - - const: wocpu1 - - const: wocpu0 - const: xgp1 - const: xgp2 - const: xgp3 - mediatek,sgmiisys: - minItems: 2 - maxItems: 2 - patternProperties: "^mac@[0-1]$": type: object From d101291b2681e5ab938554e3e323f7a7ee33e3aa Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 7 May 2024 16:31:45 +0000 Subject: [PATCH 35/47] ipv6: fib6_rules: avoid possible NULL dereference in fib6_rule_action() syzbot is able to trigger the following crash [1], caused by unsafe ip6_dst_idev() use. Indeed ip6_dst_idev() can return NULL, and must always be checked. [1] Oops: general protection fault, probably for non-canonical address 0xdffffc0000000000: 0000 [#1] PREEMPT SMP KASAN PTI KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007] CPU: 0 PID: 31648 Comm: syz-executor.0 Not tainted 6.9.0-rc4-next-20240417-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 03/27/2024 RIP: 0010:__fib6_rule_action net/ipv6/fib6_rules.c:237 [inline] RIP: 0010:fib6_rule_action+0x241/0x7b0 net/ipv6/fib6_rules.c:267 Code: 02 00 00 49 8d 9f d8 00 00 00 48 89 d8 48 c1 e8 03 42 80 3c 20 00 74 08 48 89 df e8 f9 32 bf f7 48 8b 1b 48 89 d8 48 c1 e8 03 <42> 80 3c 20 00 74 08 48 89 df e8 e0 32 bf f7 4c 8b 03 48 89 ef 4c RSP: 0018:ffffc9000fc1f2f0 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 1a772f98c8186700 RDX: 0000000000000003 RSI: ffffffff8bcac4e0 RDI: ffffffff8c1f9760 RBP: ffff8880673fb980 R08: ffffffff8fac15ef R09: 1ffffffff1f582bd R10: dffffc0000000000 R11: fffffbfff1f582be R12: dffffc0000000000 R13: 0000000000000080 R14: ffff888076509000 R15: ffff88807a029a00 FS: 00007f55e82ca6c0(0000) GS:ffff8880b9400000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000001b31d23000 CR3: 0000000022b66000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: fib_rules_lookup+0x62c/0xdb0 net/core/fib_rules.c:317 fib6_rule_lookup+0x1fd/0x790 net/ipv6/fib6_rules.c:108 ip6_route_output_flags_noref net/ipv6/route.c:2637 [inline] ip6_route_output_flags+0x38e/0x610 net/ipv6/route.c:2649 ip6_route_output include/net/ip6_route.h:93 [inline] ip6_dst_lookup_tail+0x189/0x11a0 net/ipv6/ip6_output.c:1120 ip6_dst_lookup_flow+0xb9/0x180 net/ipv6/ip6_output.c:1250 sctp_v6_get_dst+0x792/0x1e20 net/sctp/ipv6.c:326 sctp_transport_route+0x12c/0x2e0 net/sctp/transport.c:455 sctp_assoc_add_peer+0x614/0x15c0 net/sctp/associola.c:662 sctp_connect_new_asoc+0x31d/0x6c0 net/sctp/socket.c:1099 __sctp_connect+0x66d/0xe30 net/sctp/socket.c:1197 sctp_connect net/sctp/socket.c:4819 [inline] sctp_inet_connect+0x149/0x1f0 net/sctp/socket.c:4834 __sys_connect_file net/socket.c:2048 [inline] __sys_connect+0x2df/0x310 net/socket.c:2065 __do_sys_connect net/socket.c:2075 [inline] __se_sys_connect net/socket.c:2072 [inline] __x64_sys_connect+0x7a/0x90 net/socket.c:2072 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf5/0x240 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f Fixes: 5e5f3f0f8013 ("[IPV6] ADDRCONF: Convert ipv6_get_saddr() to ipv6_dev_get_saddr().") Signed-off-by: Eric Dumazet Reviewed-by: Simon Horman Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240507163145.835254-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv6/fib6_rules.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 52c04f0ac498..9e254de7462f 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -233,8 +233,12 @@ static int __fib6_rule_action(struct fib_rule *rule, struct flowi *flp, rt = pol_lookup_func(lookup, net, table, flp6, arg->lookup_data, flags); if (rt != net->ipv6.ip6_null_entry) { + struct inet6_dev *idev = ip6_dst_idev(&rt->dst); + + if (!idev) + goto again; err = fib6_rule_saddr(net, rule, flags, flp6, - ip6_dst_idev(&rt->dst)->dev); + idev->dev); if (err == -EAGAIN) goto again; From 4893b8b3ef8db2b182d1a1bebf6c7acf91405000 Mon Sep 17 00:00:00 2001 From: Lukasz Majewski Date: Tue, 7 May 2024 13:12:14 +0200 Subject: [PATCH 36/47] hsr: Simplify code for announcing HSR nodes timer setup Up till now the code to start HSR announce timer, which triggers sending supervisory frames, was assuming that hsr_netdev_notify() would be called at least twice for hsrX interface. This was required to have different values for old and current values of network device's operstate. This is problematic for a case where hsrX interface is already in the operational state when hsr_netdev_notify() is called, so timer is not configured to trigger and as a result the hsrX is not sending supervisory frames to HSR ring. This error has been discovered when hsr_ping.sh script was run. To be more specific - for the hsr1 and hsr2 the hsr_netdev_notify() was called at least twice with different IF_OPER_{LOWERDOWN|DOWN|UP} states assigned in hsr_check_carrier_and_operstate(hsr). As a result there was no issue with sending supervisory frames. However, with hsr3, the notify function was called only once with operstate set to IF_OPER_UP and timer responsible for triggering supervisory frames was not fired. The solution is to use netif_oper_up() and netif_running() helper functions to assess if network hsrX device is up. Only then, when the timer is not already pending, it is started. Otherwise it is deactivated. Fixes: f421436a591d ("net/hsr: Add support for the High-availability Seamless Redundancy protocol (HSRv0)") Signed-off-by: Lukasz Majewski Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240507111214.3519800-1-lukma@denx.de Signed-off-by: Jakub Kicinski --- net/hsr/hsr_device.c | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index e9d45133d641..5afc450d0855 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -61,39 +61,36 @@ static bool hsr_check_carrier(struct hsr_port *master) return false; } -static void hsr_check_announce(struct net_device *hsr_dev, - unsigned char old_operstate) +static void hsr_check_announce(struct net_device *hsr_dev) { struct hsr_priv *hsr; hsr = netdev_priv(hsr_dev); - - if (READ_ONCE(hsr_dev->operstate) == IF_OPER_UP && old_operstate != IF_OPER_UP) { - /* Went up */ - hsr->announce_count = 0; - mod_timer(&hsr->announce_timer, - jiffies + msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL)); + if (netif_running(hsr_dev) && netif_oper_up(hsr_dev)) { + /* Enable announce timer and start sending supervisory frames */ + if (!timer_pending(&hsr->announce_timer)) { + hsr->announce_count = 0; + mod_timer(&hsr->announce_timer, jiffies + + msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL)); + } + } else { + /* Deactivate the announce timer */ + timer_delete(&hsr->announce_timer); } - - if (READ_ONCE(hsr_dev->operstate) != IF_OPER_UP && old_operstate == IF_OPER_UP) - /* Went down */ - del_timer(&hsr->announce_timer); } void hsr_check_carrier_and_operstate(struct hsr_priv *hsr) { struct hsr_port *master; - unsigned char old_operstate; bool has_carrier; master = hsr_port_get_hsr(hsr, HSR_PT_MASTER); /* netif_stacked_transfer_operstate() cannot be used here since * it doesn't set IF_OPER_LOWERLAYERDOWN (?) */ - old_operstate = READ_ONCE(master->dev->operstate); has_carrier = hsr_check_carrier(master); hsr_set_operstate(master, has_carrier); - hsr_check_announce(master->dev, old_operstate); + hsr_check_announce(master->dev); } int hsr_get_max_mtu(struct hsr_priv *hsr) From 4db783d68b9b39a411a96096c10828ff5dfada7a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 7 May 2024 16:18:42 +0000 Subject: [PATCH 37/47] ipv6: prevent NULL dereference in ip6_output() According to syzbot, there is a chance that ip6_dst_idev() returns NULL in ip6_output(). Most places in IPv6 stack deal with a NULL idev just fine, but not here. syzbot reported: general protection fault, probably for non-canonical address 0xdffffc00000000bc: 0000 [#1] PREEMPT SMP KASAN PTI KASAN: null-ptr-deref in range [0x00000000000005e0-0x00000000000005e7] CPU: 0 PID: 9775 Comm: syz-executor.4 Not tainted 6.9.0-rc5-syzkaller-00157-g6a30653b604a #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 03/27/2024 RIP: 0010:ip6_output+0x231/0x3f0 net/ipv6/ip6_output.c:237 Code: 3c 1e 00 49 89 df 74 08 4c 89 ef e8 19 58 db f7 48 8b 44 24 20 49 89 45 00 49 89 c5 48 8d 9d e0 05 00 00 48 89 d8 48 c1 e8 03 <42> 0f b6 04 38 84 c0 4c 8b 74 24 28 0f 85 61 01 00 00 8b 1b 31 ff RSP: 0018:ffffc9000927f0d8 EFLAGS: 00010202 RAX: 00000000000000bc RBX: 00000000000005e0 RCX: 0000000000040000 RDX: ffffc900131f9000 RSI: 0000000000004f47 RDI: 0000000000004f48 RBP: 0000000000000000 R08: ffffffff8a1f0b9a R09: 1ffffffff1f51fad R10: dffffc0000000000 R11: fffffbfff1f51fae R12: ffff8880293ec8c0 R13: ffff88805d7fc000 R14: 1ffff1100527d91a R15: dffffc0000000000 FS: 00007f135c6856c0(0000) GS:ffff8880b9400000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020000080 CR3: 0000000064096000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: NF_HOOK include/linux/netfilter.h:314 [inline] ip6_xmit+0xefe/0x17f0 net/ipv6/ip6_output.c:358 sctp_v6_xmit+0x9f2/0x13f0 net/sctp/ipv6.c:248 sctp_packet_transmit+0x26ad/0x2ca0 net/sctp/output.c:653 sctp_packet_singleton+0x22c/0x320 net/sctp/outqueue.c:783 sctp_outq_flush_ctrl net/sctp/outqueue.c:914 [inline] sctp_outq_flush+0x6d5/0x3e20 net/sctp/outqueue.c:1212 sctp_side_effects net/sctp/sm_sideeffect.c:1198 [inline] sctp_do_sm+0x59cc/0x60c0 net/sctp/sm_sideeffect.c:1169 sctp_primitive_ASSOCIATE+0x95/0xc0 net/sctp/primitive.c:73 __sctp_connect+0x9cd/0xe30 net/sctp/socket.c:1234 sctp_connect net/sctp/socket.c:4819 [inline] sctp_inet_connect+0x149/0x1f0 net/sctp/socket.c:4834 __sys_connect_file net/socket.c:2048 [inline] __sys_connect+0x2df/0x310 net/socket.c:2065 __do_sys_connect net/socket.c:2075 [inline] __se_sys_connect net/socket.c:2072 [inline] __x64_sys_connect+0x7a/0x90 net/socket.c:2072 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf5/0x240 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f Fixes: 778d80be5269 ("ipv6: Add disable_ipv6 sysctl to disable IPv6 operaion on specific interface.") Reported-by: syzbot Signed-off-by: Eric Dumazet Reviewed-by: Larysa Zaremba Link: https://lore.kernel.org/r/20240507161842.773961-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv6/ip6_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index fa2937732665..97b0788b31ba 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -234,7 +234,7 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb) skb->protocol = htons(ETH_P_IPV6); skb->dev = dev; - if (unlikely(READ_ONCE(idev->cnf.disable_ipv6))) { + if (unlikely(!idev || READ_ONCE(idev->cnf.disable_ipv6))) { IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); kfree_skb_reason(skb, SKB_DROP_REASON_IPV6DISABLED); return 0; From 2ddc0dd7fec86ee53b8928a5cca5fbddd4fc7c06 Mon Sep 17 00:00:00 2001 From: Wen Gu Date: Tue, 7 May 2024 20:53:31 +0800 Subject: [PATCH 38/47] net/smc: fix neighbour and rtable leak in smc_ib_find_route() In smc_ib_find_route(), the neighbour found by neigh_lookup() and rtable resolved by ip_route_output_flow() are not released or put before return. It may cause the refcount leak, so fix it. Link: https://lore.kernel.org/r/20240506015439.108739-1-guwen@linux.alibaba.com Fixes: e5c4744cfb59 ("net/smc: add SMC-Rv2 connection establishment") Signed-off-by: Wen Gu Link: https://lore.kernel.org/r/20240507125331.2808-1-guwen@linux.alibaba.com Signed-off-by: Paolo Abeni --- net/smc/smc_ib.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index 97704a9e84c7..9297dc20bfe2 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -209,13 +209,18 @@ int smc_ib_find_route(struct net *net, __be32 saddr, __be32 daddr, if (IS_ERR(rt)) goto out; if (rt->rt_uses_gateway && rt->rt_gw_family != AF_INET) - goto out; - neigh = rt->dst.ops->neigh_lookup(&rt->dst, NULL, &fl4.daddr); - if (neigh) { - memcpy(nexthop_mac, neigh->ha, ETH_ALEN); - *uses_gateway = rt->rt_uses_gateway; - return 0; - } + goto out_rt; + neigh = dst_neigh_lookup(&rt->dst, &fl4.daddr); + if (!neigh) + goto out_rt; + memcpy(nexthop_mac, neigh->ha, ETH_ALEN); + *uses_gateway = rt->rt_uses_gateway; + neigh_release(neigh); + ip_rt_put(rt); + return 0; + +out_rt: + ip_rt_put(rt); out: return -ENOENT; } From 05eb60e9648cca0beeebdbcd263b599fb58aee48 Mon Sep 17 00:00:00 2001 From: Peiyang Wang Date: Tue, 7 May 2024 21:42:18 +0800 Subject: [PATCH 39/47] net: hns3: using user configure after hardware reset When a reset occurring, it's supposed to recover user's configuration. Currently, the port info(speed, duplex and autoneg) is stored in hclge_mac and will be scheduled updated. Consider the case that reset was happened consecutively. During the first reset, the port info is configured with a temporary value cause the PHY is reset and looking for best link config. Second reset start and use pervious configuration which is not the user's. The specific process is as follows: +------+ +----+ +----+ | USER | | PF | | HW | +---+--+ +-+--+ +-+--+ | ethtool --reset | | +------------------->| reset command | | ethtool --reset +-------------------->| +------------------->| +---+ | +---+ | | | | |reset currently | | HW RESET | | |and wait to do | | | |<--+ | | | | send pervious cfg |<--+ | | (1000M FULL AN_ON) | | +-------------------->| | | read cfg(time task) | | | (10M HALF AN_OFF) +---+ | |<--------------------+ | cfg take effect | | reset command |<--+ | +-------------------->| | | +---+ | | send pervious cfg | | HW RESET | | (10M HALF AN_OFF) |<--+ | +-------------------->| | | read cfg(time task) | | | (10M HALF AN_OFF) +---+ | |<--------------------+ | cfg take effect | | | | | | read cfg(time task) |<--+ | | (10M HALF AN_OFF) | | |<--------------------+ | | | v v v To avoid aboved situation, this patch introduced req_speed, req_duplex, req_autoneg to store user's configuration and it only be used after hardware reset and to recover user's configuration Fixes: f5f2b3e4dcc0 ("net: hns3: add support for imp-controlled PHYs") Signed-off-by: Peiyang Wang Signed-off-by: Jijie Shao Reviewed-by: Przemek Kitszel Reviewed-by: Simon Horman Signed-off-by: Paolo Abeni --- .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 15 +++++++++------ .../ethernet/hisilicon/hns3/hns3pf/hclge_main.h | 3 +++ 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index ff6a2ed23ddb..8043f1795dc7 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -1537,6 +1537,9 @@ static int hclge_configure(struct hclge_dev *hdev) cfg.default_speed, ret); return ret; } + hdev->hw.mac.req_speed = hdev->hw.mac.speed; + hdev->hw.mac.req_autoneg = AUTONEG_ENABLE; + hdev->hw.mac.req_duplex = DUPLEX_FULL; hclge_parse_link_mode(hdev, cfg.speed_ability); @@ -3342,9 +3345,9 @@ hclge_set_phy_link_ksettings(struct hnae3_handle *handle, return ret; } - hdev->hw.mac.autoneg = cmd->base.autoneg; - hdev->hw.mac.speed = cmd->base.speed; - hdev->hw.mac.duplex = cmd->base.duplex; + hdev->hw.mac.req_autoneg = cmd->base.autoneg; + hdev->hw.mac.req_speed = cmd->base.speed; + hdev->hw.mac.req_duplex = cmd->base.duplex; linkmode_copy(hdev->hw.mac.advertising, cmd->link_modes.advertising); return 0; @@ -3377,9 +3380,9 @@ static int hclge_tp_port_init(struct hclge_dev *hdev) if (!hnae3_dev_phy_imp_supported(hdev)) return 0; - cmd.base.autoneg = hdev->hw.mac.autoneg; - cmd.base.speed = hdev->hw.mac.speed; - cmd.base.duplex = hdev->hw.mac.duplex; + cmd.base.autoneg = hdev->hw.mac.req_autoneg; + cmd.base.speed = hdev->hw.mac.req_speed; + cmd.base.duplex = hdev->hw.mac.req_duplex; linkmode_copy(cmd.link_modes.advertising, hdev->hw.mac.advertising); return hclge_set_phy_link_ksettings(&hdev->vport->nic, &cmd); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index e821dd2f1528..e3c69be8256f 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -279,11 +279,14 @@ struct hclge_mac { u8 media_type; /* port media type, e.g. fibre/copper/backplane */ u8 mac_addr[ETH_ALEN]; u8 autoneg; + u8 req_autoneg; u8 duplex; + u8 req_duplex; u8 support_autoneg; u8 speed_type; /* 0: sfp speed, 1: active speed */ u8 lane_num; u32 speed; + u32 req_speed; u32 max_speed; u32 speed_ability; /* speed ability supported by current media */ u32 module_type; /* sub media type, e.g. kr/cr/sr/lr */ From 669554c512d2107e2f21616f38e050d40655101f Mon Sep 17 00:00:00 2001 From: Jian Shen Date: Tue, 7 May 2024 21:42:19 +0800 Subject: [PATCH 40/47] net: hns3: direct return when receive a unknown mailbox message Currently, the driver didn't return when receive a unknown mailbox message, and continue checking whether need to generate a response. It's unnecessary and may be incorrect. Fixes: bb5790b71bad ("net: hns3: refactor mailbox response scheme between PF and VF") Signed-off-by: Jian Shen Signed-off-by: Jijie Shao Reviewed-by: Simon Horman Signed-off-by: Paolo Abeni --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c index d4a0e0be7a72..59c863306657 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c @@ -1077,12 +1077,13 @@ static void hclge_mbx_request_handling(struct hclge_mbx_ops_param *param) hdev = param->vport->back; cmd_func = hclge_mbx_ops_list[param->req->msg.code]; - if (cmd_func) - ret = cmd_func(param); - else + if (!cmd_func) { dev_err(&hdev->pdev->dev, "un-supported mailbox message, code = %u\n", param->req->msg.code); + return; + } + ret = cmd_func(param); /* PF driver should not reply IMP */ if (hnae3_get_bit(param->req->mbx_need_resp, HCLGE_MBX_NEED_RESP_B) && From 6639a7b953212ac51aa4baa7d7fb855bf736cf56 Mon Sep 17 00:00:00 2001 From: Peiyang Wang Date: Tue, 7 May 2024 21:42:20 +0800 Subject: [PATCH 41/47] net: hns3: change type of numa_node_mask as nodemask_t It provides nodemask_t to describe the numa node mask in kernel. To improve transportability, change the type of numa_node_mask as nodemask_t. Fixes: 38caee9d3ee8 ("net: hns3: Add support of the HNAE3 framework") Signed-off-by: Peiyang Wang Signed-off-by: Jijie Shao Reviewed-by: Simon Horman Signed-off-by: Paolo Abeni --- drivers/net/ethernet/hisilicon/hns3/hnae3.h | 2 +- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 6 ++++-- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h | 2 +- drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 7 ++++--- drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h | 2 +- 5 files changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index f19f1e1d1f9f..133c94646c21 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -897,7 +897,7 @@ struct hnae3_handle { struct hnae3_roce_private_info rinfo; }; - u32 numa_node_mask; /* for multi-chip support */ + nodemask_t numa_node_mask; /* for multi-chip support */ enum hnae3_port_base_vlan_state port_base_vlan_state; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 8043f1795dc7..ecff9b426527 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -1769,7 +1769,8 @@ static int hclge_vport_setup(struct hclge_vport *vport, u16 num_tqps) nic->pdev = hdev->pdev; nic->ae_algo = &ae_algo; - nic->numa_node_mask = hdev->numa_node_mask; + bitmap_copy(nic->numa_node_mask.bits, hdev->numa_node_mask.bits, + MAX_NUMNODES); nic->kinfo.io_base = hdev->hw.hw.io_base; ret = hclge_knic_setup(vport, num_tqps, @@ -2461,7 +2462,8 @@ static int hclge_init_roce_base_info(struct hclge_vport *vport) roce->pdev = nic->pdev; roce->ae_algo = nic->ae_algo; - roce->numa_node_mask = nic->numa_node_mask; + bitmap_copy(roce->numa_node_mask.bits, nic->numa_node_mask.bits, + MAX_NUMNODES); return 0; } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index e3c69be8256f..3a9186457ad8 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -894,7 +894,7 @@ struct hclge_dev { u16 fdir_pf_filter_count; /* Num of guaranteed filters for this PF */ u16 num_alloc_vport; /* Num vports this driver supports */ - u32 numa_node_mask; + nodemask_t numa_node_mask; u16 rx_buf_len; u16 num_tx_desc; /* desc num of per tx queue */ u16 num_rx_desc; /* desc num of per rx queue */ diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 0aa9beefd1c7..b57111252d07 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -412,7 +412,8 @@ static int hclgevf_set_handle_info(struct hclgevf_dev *hdev) nic->ae_algo = &ae_algovf; nic->pdev = hdev->pdev; - nic->numa_node_mask = hdev->numa_node_mask; + bitmap_copy(nic->numa_node_mask.bits, hdev->numa_node_mask.bits, + MAX_NUMNODES); nic->flags |= HNAE3_SUPPORT_VF; nic->kinfo.io_base = hdev->hw.hw.io_base; @@ -2082,8 +2083,8 @@ static int hclgevf_init_roce_base_info(struct hclgevf_dev *hdev) roce->pdev = nic->pdev; roce->ae_algo = nic->ae_algo; - roce->numa_node_mask = nic->numa_node_mask; - + bitmap_copy(roce->numa_node_mask.bits, nic->numa_node_mask.bits, + MAX_NUMNODES); return 0; } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h index a73f2bf3a56a..cccef3228461 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h @@ -236,7 +236,7 @@ struct hclgevf_dev { u16 rss_size_max; /* HW defined max RSS task queue */ u16 num_alloc_vport; /* num vports this driver supports */ - u32 numa_node_mask; + nodemask_t numa_node_mask; u16 rx_buf_len; u16 num_tx_desc; /* desc num of per tx queue */ u16 num_rx_desc; /* desc num of per rx queue */ From 950aa42399893a170d9b57eda0e4a3ff91fd8b70 Mon Sep 17 00:00:00 2001 From: Peiyang Wang Date: Tue, 7 May 2024 21:42:21 +0800 Subject: [PATCH 42/47] net: hns3: release PTP resources if pf initialization failed During the PF initialization process, hclge_update_port_info may return an error code for some reason. At this point, the ptp initialization has been completed. To void memory leaks, the resources that are applied by ptp should be released. Therefore, when hclge_update_port_info returns an error code, hclge_ptp_uninit is called to release the corresponding resources. Fixes: eaf83ae59e18 ("net: hns3: add querying fec ability from firmware") Signed-off-by: Peiyang Wang Signed-off-by: Jijie Shao Reviewed-by: Hariprasad Kelam Signed-off-by: Paolo Abeni --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index ecff9b426527..1d255b3ae988 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -11764,7 +11764,7 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) ret = hclge_update_port_info(hdev); if (ret) - goto err_mdiobus_unreg; + goto err_ptp_uninit; INIT_KFIFO(hdev->mac_tnl_log); @@ -11815,6 +11815,8 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) devl_unlock(hdev->devlink); return 0; +err_ptp_uninit: + hclge_ptp_uninit(hdev); err_mdiobus_unreg: if (hdev->hw.mac.phydev) mdiobus_unregister(hdev->hw.mac.mdio_bus); From 094c281228529d333458208fd02fcac3b139d93b Mon Sep 17 00:00:00 2001 From: Peiyang Wang Date: Tue, 7 May 2024 21:42:22 +0800 Subject: [PATCH 43/47] net: hns3: use appropriate barrier function after setting a bit value There is a memory barrier in followed case. When set the port down, hclgevf_set_timmer will set DOWN in state. Meanwhile, the service task has different behaviour based on whether the state is DOWN. Thus, to make sure service task see DOWN, use smp_mb__after_atomic after calling set_bit(). CPU0 CPU1 ========================== =================================== hclgevf_set_timer_task() hclgevf_periodic_service_task() set_bit(DOWN,state) test_bit(DOWN,state) pf also has this issue. Fixes: ff200099d271 ("net: hns3: remove unnecessary work in hclgevf_main") Fixes: 1c6dfe6fc6f7 ("net: hns3: remove mailbox and reset work in hclge_main") Signed-off-by: Peiyang Wang Signed-off-by: Jijie Shao Reviewed-by: Simon Horman Signed-off-by: Paolo Abeni --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 3 +-- drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 1d255b3ae988..e3d6a64b4575 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -7957,8 +7957,7 @@ static void hclge_set_timer_task(struct hnae3_handle *handle, bool enable) /* Set the DOWN flag here to disable link updating */ set_bit(HCLGE_STATE_DOWN, &hdev->state); - /* flush memory to make sure DOWN is seen by service task */ - smp_mb__before_atomic(); + smp_mb__after_atomic(); /* flush memory to make sure DOWN is seen by service task */ hclge_flush_link_update(hdev); } } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index b57111252d07..08db8e84be4e 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -2181,8 +2181,7 @@ static void hclgevf_set_timer_task(struct hnae3_handle *handle, bool enable) } else { set_bit(HCLGEVF_STATE_DOWN, &hdev->state); - /* flush memory to make sure DOWN is seen by service task */ - smp_mb__before_atomic(); + smp_mb__after_atomic(); /* flush memory to make sure DOWN is seen by service task */ hclgevf_flush_link_update(hdev); } } From f5db7a3b65c84d723ca5e2bb6e83115180ab6336 Mon Sep 17 00:00:00 2001 From: Yonglong Liu Date: Tue, 7 May 2024 21:42:23 +0800 Subject: [PATCH 44/47] net: hns3: fix port vlan filter not disabled issue According to hardware limitation, for device support modify VLAN filter state but not support bypass port VLAN filter, it should always disable the port VLAN filter. but the driver enables port VLAN filter when initializing, if there is no VLAN(except VLAN 0) id added, the driver will disable it in service task. In most time, it works fine. But there is a time window before the service task shceduled and net device being registered. So if user adds VLAN at this time, the driver will not update the VLAN filter state, and the port VLAN filter remains enabled. To fix the problem, if support modify VLAN filter state but not support bypass port VLAN filter, set the port vlan filter to "off". Fixes: 184cd221a863 ("net: hns3: disable port VLAN filter when support function level VLAN filter control") Fixes: 2ba306627f59 ("net: hns3: add support for modify VLAN filter state") Signed-off-by: Yonglong Liu Signed-off-by: Jijie Shao Reviewed-by: Simon Horman Signed-off-by: Paolo Abeni --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index e3d6a64b4575..0773124440e9 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -9910,6 +9910,7 @@ static int hclge_set_vlan_protocol_type(struct hclge_dev *hdev) static int hclge_init_vlan_filter(struct hclge_dev *hdev) { struct hclge_vport *vport; + bool enable = true; int ret; int i; @@ -9929,8 +9930,12 @@ static int hclge_init_vlan_filter(struct hclge_dev *hdev) vport->cur_vlan_fltr_en = true; } + if (test_bit(HNAE3_DEV_SUPPORT_VLAN_FLTR_MDF_B, hdev->ae_dev->caps) && + !test_bit(HNAE3_DEV_SUPPORT_PORT_VLAN_BYPASS_B, hdev->ae_dev->caps)) + enable = false; + return hclge_set_vlan_filter_ctrl(hdev, HCLGE_FILTER_TYPE_PORT, - HCLGE_FILTER_FE_INGRESS, true, 0); + HCLGE_FILTER_FE_INGRESS, enable, 0); } static int hclge_init_vlan_type(struct hclge_dev *hdev) From 35d92abfbad88cf947c010baf34b075e40566095 Mon Sep 17 00:00:00 2001 From: Yonglong Liu Date: Tue, 7 May 2024 21:42:24 +0800 Subject: [PATCH 45/47] net: hns3: fix kernel crash when devlink reload during initialization The devlink reload process will access the hardware resources, but the register operation is done before the hardware is initialized. So, processing the devlink reload during initialization may lead to kernel crash. This patch fixes this by registering the devlink after hardware initialization. Fixes: cd6242991d2e ("net: hns3: add support for registering devlink for VF") Fixes: 93305b77ffcb ("net: hns3: fix kernel crash when devlink reload during pf initialization") Signed-off-by: Yonglong Liu Signed-off-by: Jijie Shao Signed-off-by: Paolo Abeni --- .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 17 +++++------------ .../hisilicon/hns3/hns3vf/hclgevf_main.c | 10 ++++------ 2 files changed, 9 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 0773124440e9..ce60332d83c3 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -11631,16 +11631,10 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) if (ret) goto out; - ret = hclge_devlink_init(hdev); - if (ret) - goto err_pci_uninit; - - devl_lock(hdev->devlink); - /* Firmware command queue initialize */ ret = hclge_comm_cmd_queue_init(hdev->pdev, &hdev->hw.hw); if (ret) - goto err_devlink_uninit; + goto err_pci_uninit; /* Firmware command initialize */ ret = hclge_comm_cmd_init(hdev->ae_dev, &hdev->hw.hw, &hdev->fw_version, @@ -11808,6 +11802,10 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) dev_warn(&pdev->dev, "failed to wake on lan init, ret = %d\n", ret); + ret = hclge_devlink_init(hdev); + if (ret) + goto err_ptp_uninit; + hclge_state_init(hdev); hdev->last_reset_time = jiffies; @@ -11815,8 +11813,6 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) HCLGE_DRIVER_NAME); hclge_task_schedule(hdev, round_jiffies_relative(HZ)); - - devl_unlock(hdev->devlink); return 0; err_ptp_uninit: @@ -11830,9 +11826,6 @@ err_msi_uninit: pci_free_irq_vectors(pdev); err_cmd_uninit: hclge_comm_cmd_uninit(hdev->ae_dev, &hdev->hw.hw); -err_devlink_uninit: - devl_unlock(hdev->devlink); - hclge_devlink_uninit(hdev); err_pci_uninit: pcim_iounmap(pdev, hdev->hw.hw.io_base); pci_release_regions(pdev); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 08db8e84be4e..43ee20eb03d1 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -2845,10 +2845,6 @@ static int hclgevf_init_hdev(struct hclgevf_dev *hdev) if (ret) return ret; - ret = hclgevf_devlink_init(hdev); - if (ret) - goto err_devlink_init; - ret = hclge_comm_cmd_queue_init(hdev->pdev, &hdev->hw.hw); if (ret) goto err_cmd_queue_init; @@ -2941,6 +2937,10 @@ static int hclgevf_init_hdev(struct hclgevf_dev *hdev) hclgevf_init_rxd_adv_layout(hdev); + ret = hclgevf_devlink_init(hdev); + if (ret) + goto err_config; + set_bit(HCLGEVF_STATE_SERVICE_INITED, &hdev->state); hdev->last_reset_time = jiffies; @@ -2960,8 +2960,6 @@ err_misc_irq_init: err_cmd_init: hclge_comm_cmd_uninit(hdev->ae_dev, &hdev->hw.hw); err_cmd_queue_init: - hclgevf_devlink_uninit(hdev); -err_devlink_init: hclgevf_pci_uninit(hdev); clear_bit(HCLGEVF_STATE_IRQ_INITED, &hdev->state); return ret; From f39bf3cf08a49e7d20c44bc8bc8e390fea69959a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Steffen=20B=C3=A4tz?= Date: Wed, 8 May 2024 09:29:43 +0200 Subject: [PATCH 46/47] net: dsa: mv88e6xxx: add phylink_get_caps for the mv88e6320/21 family MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As of commit de5c9bf40c45 ("net: phylink: require supported_interfaces to be filled") Marvell 88e6320/21 switches fail to be probed: ... mv88e6085 30be0000.ethernet-1:00: phylink: error: empty supported_interfaces error creating PHYLINK: -22 ... The problem stems from the use of mv88e6185_phylink_get_caps() to get the device capabilities. Since there are serdes only ports 0/1 included, create a new dedicated phylink_get_caps for the 6320 and 6321 to properly support their set of capabilities. Fixes: de5c9bf40c45 ("net: phylink: require supported_interfaces to be filled") Signed-off-by: Steffen Bätz Reviewed-by: Andrew Lunn Reviewed-by: Fabio Estevam Link: https://lore.kernel.org/r/20240508072944.54880-2-steffen@innosonix.de Signed-off-by: Paolo Abeni --- drivers/net/dsa/mv88e6xxx/chip.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 14daf432f30b..61c0070c7610 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -697,6 +697,18 @@ static void mv88e6352_phylink_get_caps(struct mv88e6xxx_chip *chip, int port, } } +static void mv88e632x_phylink_get_caps(struct mv88e6xxx_chip *chip, int port, + struct phylink_config *config) +{ + unsigned long *supported = config->supported_interfaces; + + /* Translate the default cmode */ + mv88e6xxx_translate_cmode(chip->ports[port].cmode, supported); + + config->mac_capabilities = MAC_SYM_PAUSE | MAC_10 | MAC_100 | + MAC_1000FD; +} + static void mv88e6341_phylink_get_caps(struct mv88e6xxx_chip *chip, int port, struct phylink_config *config) { @@ -5093,7 +5105,7 @@ static const struct mv88e6xxx_ops mv88e6320_ops = { .gpio_ops = &mv88e6352_gpio_ops, .avb_ops = &mv88e6352_avb_ops, .ptp_ops = &mv88e6352_ptp_ops, - .phylink_get_caps = mv88e6185_phylink_get_caps, + .phylink_get_caps = mv88e632x_phylink_get_caps, }; static const struct mv88e6xxx_ops mv88e6321_ops = { @@ -5139,7 +5151,7 @@ static const struct mv88e6xxx_ops mv88e6321_ops = { .gpio_ops = &mv88e6352_gpio_ops, .avb_ops = &mv88e6352_avb_ops, .ptp_ops = &mv88e6352_ptp_ops, - .phylink_get_caps = mv88e6185_phylink_get_caps, + .phylink_get_caps = mv88e632x_phylink_get_caps, }; static const struct mv88e6xxx_ops mv88e6341_ops = { From 6e7ffa180a532b6fe2e22aa6182e02ce988a43aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Steffen=20B=C3=A4tz?= Date: Wed, 8 May 2024 09:29:44 +0200 Subject: [PATCH 47/47] net: dsa: mv88e6xxx: read cmode on mv88e6320/21 serdes only ports MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On the mv88e6320 and 6321 switch family, port 0/1 are serdes only ports. Modified the mv88e6352_get_port4_serdes_cmode function to pass a port number since the register set of the 6352 is equal on the 6320/21. Signed-off-by: Steffen Bätz Reviewed-by: Andrew Lunn Reviewed-by: Fabio Estevam Link: https://lore.kernel.org/r/20240508072944.54880-3-steffen@innosonix.de Signed-off-by: Paolo Abeni --- drivers/net/dsa/mv88e6xxx/chip.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 61c0070c7610..0918bd6fa81d 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -637,12 +637,12 @@ static void mv88e6351_phylink_get_caps(struct mv88e6xxx_chip *chip, int port, MAC_1000FD; } -static int mv88e6352_get_port4_serdes_cmode(struct mv88e6xxx_chip *chip) +static int mv88e63xx_get_port_serdes_cmode(struct mv88e6xxx_chip *chip, int port) { u16 reg, val; int err; - err = mv88e6xxx_port_read(chip, 4, MV88E6XXX_PORT_STS, ®); + err = mv88e6xxx_port_read(chip, port, MV88E6XXX_PORT_STS, ®); if (err) return err; @@ -651,16 +651,16 @@ static int mv88e6352_get_port4_serdes_cmode(struct mv88e6xxx_chip *chip) return 0xf; val = reg & ~MV88E6XXX_PORT_STS_PHY_DETECT; - err = mv88e6xxx_port_write(chip, 4, MV88E6XXX_PORT_STS, val); + err = mv88e6xxx_port_write(chip, port, MV88E6XXX_PORT_STS, val); if (err) return err; - err = mv88e6xxx_port_read(chip, 4, MV88E6XXX_PORT_STS, &val); + err = mv88e6xxx_port_read(chip, port, MV88E6XXX_PORT_STS, &val); if (err) return err; /* Restore PHY_DETECT value */ - err = mv88e6xxx_port_write(chip, 4, MV88E6XXX_PORT_STS, reg); + err = mv88e6xxx_port_write(chip, port, MV88E6XXX_PORT_STS, reg); if (err) return err; @@ -688,7 +688,7 @@ static void mv88e6352_phylink_get_caps(struct mv88e6xxx_chip *chip, int port, if (err <= 0) return; - cmode = mv88e6352_get_port4_serdes_cmode(chip); + cmode = mv88e63xx_get_port_serdes_cmode(chip, port); if (cmode < 0) dev_err(chip->dev, "p%d: failed to read serdes cmode\n", port); @@ -701,12 +701,23 @@ static void mv88e632x_phylink_get_caps(struct mv88e6xxx_chip *chip, int port, struct phylink_config *config) { unsigned long *supported = config->supported_interfaces; + int cmode; /* Translate the default cmode */ mv88e6xxx_translate_cmode(chip->ports[port].cmode, supported); config->mac_capabilities = MAC_SYM_PAUSE | MAC_10 | MAC_100 | MAC_1000FD; + + /* Port 0/1 are serdes only ports */ + if (port == 0 || port == 1) { + cmode = mv88e63xx_get_port_serdes_cmode(chip, port); + if (cmode < 0) + dev_err(chip->dev, "p%d: failed to read serdes cmode\n", + port); + else + mv88e6xxx_translate_cmode(cmode, supported); + } } static void mv88e6341_phylink_get_caps(struct mv88e6xxx_chip *chip, int port,