forked from Minki/linux
57f015f5ec
syzkaller found a race condition fanout_demux_rollover() while removing a packet socket from a fanout group. po->rollover is read and operated on during packet_rcv_fanout(), via fanout_demux_rollover(), but the pointer is currently cleared before the synchronization in packet_release(). It is safer to delay the cleanup until after synchronize_net() has been called, ensuring all calls to packet_rcv_fanout() for this socket have finished. To further simplify synchronization around the rollover structure, set po->rollover in fanout_add() only if there are no errors. This removes the need for rcu in the struct and in the call to packet_getsockopt(..., PACKET_ROLLOVER_STATS, ...). Crashing stack trace: fanout_demux_rollover+0xb6/0x4d0 net/packet/af_packet.c:1392 packet_rcv_fanout+0x649/0x7c8 net/packet/af_packet.c:1487 dev_queue_xmit_nit+0x835/0xc10 net/core/dev.c:1953 xmit_one net/core/dev.c:2975 [inline] dev_hard_start_xmit+0x16b/0xac0 net/core/dev.c:2995 __dev_queue_xmit+0x17a4/0x2050 net/core/dev.c:3476 dev_queue_xmit+0x17/0x20 net/core/dev.c:3509 neigh_connected_output+0x489/0x720 net/core/neighbour.c:1379 neigh_output include/net/neighbour.h:482 [inline] ip6_finish_output2+0xad1/0x22a0 net/ipv6/ip6_output.c:120 ip6_finish_output+0x2f9/0x920 net/ipv6/ip6_output.c:146 NF_HOOK_COND include/linux/netfilter.h:239 [inline] ip6_output+0x1f4/0x850 net/ipv6/ip6_output.c:163 dst_output include/net/dst.h:459 [inline] NF_HOOK.constprop.35+0xff/0x630 include/linux/netfilter.h:250 mld_sendpack+0x6a8/0xcc0 net/ipv6/mcast.c:1660 mld_send_initial_cr.part.24+0x103/0x150 net/ipv6/mcast.c:2072 mld_send_initial_cr net/ipv6/mcast.c:2056 [inline] ipv6_mc_dad_complete+0x99/0x130 net/ipv6/mcast.c:2079 addrconf_dad_completed+0x595/0x970 net/ipv6/addrconf.c:4039 addrconf_dad_work+0xac9/0x1160 net/ipv6/addrconf.c:3971 process_one_work+0xbf0/0x1bc0 kernel/workqueue.c:2113 worker_thread+0x223/0x1990 kernel/workqueue.c:2247 kthread+0x35e/0x430 kernel/kthread.c:231 ret_from_fork+0x2a/0x40 arch/x86/entry/entry_64.S:432 Fixes:0648ab70af
("packet: rollover prepare: per-socket state") Fixes:509c7a1ecc
("packet: avoid panic in packet_getsockopt()") Reported-by: syzbot <syzkaller@googlegroups.com> Signed-off-by: Mike Maloney <maloney@google.com> Reviewed-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
142 lines
3.1 KiB
C
142 lines
3.1 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef __PACKET_INTERNAL_H__
|
|
#define __PACKET_INTERNAL_H__
|
|
|
|
#include <linux/refcount.h>
|
|
|
|
struct packet_mclist {
|
|
struct packet_mclist *next;
|
|
int ifindex;
|
|
int count;
|
|
unsigned short type;
|
|
unsigned short alen;
|
|
unsigned char addr[MAX_ADDR_LEN];
|
|
};
|
|
|
|
/* kbdq - kernel block descriptor queue */
|
|
struct tpacket_kbdq_core {
|
|
struct pgv *pkbdq;
|
|
unsigned int feature_req_word;
|
|
unsigned int hdrlen;
|
|
unsigned char reset_pending_on_curr_blk;
|
|
unsigned char delete_blk_timer;
|
|
unsigned short kactive_blk_num;
|
|
unsigned short blk_sizeof_priv;
|
|
|
|
/* last_kactive_blk_num:
|
|
* trick to see if user-space has caught up
|
|
* in order to avoid refreshing timer when every single pkt arrives.
|
|
*/
|
|
unsigned short last_kactive_blk_num;
|
|
|
|
char *pkblk_start;
|
|
char *pkblk_end;
|
|
int kblk_size;
|
|
unsigned int max_frame_len;
|
|
unsigned int knum_blocks;
|
|
uint64_t knxt_seq_num;
|
|
char *prev;
|
|
char *nxt_offset;
|
|
struct sk_buff *skb;
|
|
|
|
atomic_t blk_fill_in_prog;
|
|
|
|
/* Default is set to 8ms */
|
|
#define DEFAULT_PRB_RETIRE_TOV (8)
|
|
|
|
unsigned short retire_blk_tov;
|
|
unsigned short version;
|
|
unsigned long tov_in_jiffies;
|
|
|
|
/* timer to retire an outstanding block */
|
|
struct timer_list retire_blk_timer;
|
|
};
|
|
|
|
struct pgv {
|
|
char *buffer;
|
|
};
|
|
|
|
struct packet_ring_buffer {
|
|
struct pgv *pg_vec;
|
|
|
|
unsigned int head;
|
|
unsigned int frames_per_block;
|
|
unsigned int frame_size;
|
|
unsigned int frame_max;
|
|
|
|
unsigned int pg_vec_order;
|
|
unsigned int pg_vec_pages;
|
|
unsigned int pg_vec_len;
|
|
|
|
unsigned int __percpu *pending_refcnt;
|
|
|
|
struct tpacket_kbdq_core prb_bdqc;
|
|
};
|
|
|
|
extern struct mutex fanout_mutex;
|
|
#define PACKET_FANOUT_MAX 256
|
|
|
|
struct packet_fanout {
|
|
possible_net_t net;
|
|
unsigned int num_members;
|
|
u16 id;
|
|
u8 type;
|
|
u8 flags;
|
|
union {
|
|
atomic_t rr_cur;
|
|
struct bpf_prog __rcu *bpf_prog;
|
|
};
|
|
struct list_head list;
|
|
struct sock *arr[PACKET_FANOUT_MAX];
|
|
spinlock_t lock;
|
|
refcount_t sk_ref;
|
|
struct packet_type prot_hook ____cacheline_aligned_in_smp;
|
|
};
|
|
|
|
struct packet_rollover {
|
|
int sock;
|
|
atomic_long_t num;
|
|
atomic_long_t num_huge;
|
|
atomic_long_t num_failed;
|
|
#define ROLLOVER_HLEN (L1_CACHE_BYTES / sizeof(u32))
|
|
u32 history[ROLLOVER_HLEN] ____cacheline_aligned;
|
|
} ____cacheline_aligned_in_smp;
|
|
|
|
struct packet_sock {
|
|
/* struct sock has to be the first member of packet_sock */
|
|
struct sock sk;
|
|
struct packet_fanout *fanout;
|
|
union tpacket_stats_u stats;
|
|
struct packet_ring_buffer rx_ring;
|
|
struct packet_ring_buffer tx_ring;
|
|
int copy_thresh;
|
|
spinlock_t bind_lock;
|
|
struct mutex pg_vec_lock;
|
|
unsigned int running:1, /* prot_hook is attached*/
|
|
auxdata:1,
|
|
origdev:1,
|
|
has_vnet_hdr:1;
|
|
int pressure;
|
|
int ifindex; /* bound device */
|
|
__be16 num;
|
|
struct packet_rollover *rollover;
|
|
struct packet_mclist *mclist;
|
|
atomic_t mapped;
|
|
enum tpacket_versions tp_version;
|
|
unsigned int tp_hdrlen;
|
|
unsigned int tp_reserve;
|
|
unsigned int tp_loss:1;
|
|
unsigned int tp_tx_has_off:1;
|
|
unsigned int tp_tstamp;
|
|
struct net_device __rcu *cached_dev;
|
|
int (*xmit)(struct sk_buff *skb);
|
|
struct packet_type prot_hook ____cacheline_aligned_in_smp;
|
|
};
|
|
|
|
static struct packet_sock *pkt_sk(struct sock *sk)
|
|
{
|
|
return (struct packet_sock *)sk;
|
|
}
|
|
|
|
#endif
|