linux/net/netlink/af_netlink.h
Daniel Borkmann 1853c94964 netlink, mmap: transform mmap skb into full skb on taps
Ken-ichirou reported that running netlink in mmap mode for receive in
combination with nlmon will throw a NULL pointer dereference in
__kfree_skb() on nlmon_xmit(), in my case I can also trigger an "unable
to handle kernel paging request". The problem is the skb_clone() in
__netlink_deliver_tap_skb() for skbs that are mmaped.

I.e. the cloned skb doesn't have a destructor, whereas the mmap netlink
skb has it pointed to netlink_skb_destructor(), set in the handler
netlink_ring_setup_skb(). There, skb->head is being set to NULL, so
that in such cases, __kfree_skb() doesn't perform a skb_release_data()
via skb_release_all(), where skb->head is possibly being freed through
kfree(head) into slab allocator, although netlink mmap skb->head points
to the mmap buffer. Similarly, the same has to be done also for large
netlink skbs where the data area is vmalloced. Therefore, as discussed,
make a copy for these rather rare cases for now. This fixes the issue
on my and Ken-ichirou's test-cases.

Reference: http://thread.gmane.org/gmane.linux.network/371129
Fixes: bcbde0d449 ("net: netlink: virtual tap device management")
Reported-by: Ken-ichirou MATSUZAWA <chamaken@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Tested-by: Ken-ichirou MATSUZAWA <chamaken@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-09-11 14:36:49 -07:00

89 lines
2.0 KiB
C

#ifndef _AF_NETLINK_H
#define _AF_NETLINK_H
#include <linux/rhashtable.h>
#include <linux/atomic.h>
#include <net/sock.h>
#define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8)
#define NLGRPLONGS(x) (NLGRPSZ(x)/sizeof(unsigned long))
struct netlink_ring {
void **pg_vec;
unsigned int head;
unsigned int frames_per_block;
unsigned int frame_size;
unsigned int frame_max;
unsigned int pg_vec_order;
unsigned int pg_vec_pages;
unsigned int pg_vec_len;
atomic_t pending;
};
struct netlink_sock {
/* struct sock has to be the first member of netlink_sock */
struct sock sk;
u32 portid;
u32 dst_portid;
u32 dst_group;
u32 flags;
u32 subscriptions;
u32 ngroups;
unsigned long *groups;
unsigned long state;
size_t max_recvmsg_len;
wait_queue_head_t wait;
bool cb_running;
struct netlink_callback cb;
struct mutex *cb_mutex;
struct mutex cb_def_mutex;
void (*netlink_rcv)(struct sk_buff *skb);
int (*netlink_bind)(struct net *net, int group);
void (*netlink_unbind)(struct net *net, int group);
struct module *module;
#ifdef CONFIG_NETLINK_MMAP
struct mutex pg_vec_lock;
struct netlink_ring rx_ring;
struct netlink_ring tx_ring;
atomic_t mapped;
#endif /* CONFIG_NETLINK_MMAP */
struct rhash_head node;
struct rcu_head rcu;
};
static inline struct netlink_sock *nlk_sk(struct sock *sk)
{
return container_of(sk, struct netlink_sock, sk);
}
static inline bool netlink_skb_is_mmaped(const struct sk_buff *skb)
{
#ifdef CONFIG_NETLINK_MMAP
return NETLINK_CB(skb).flags & NETLINK_SKB_MMAPED;
#else
return false;
#endif /* CONFIG_NETLINK_MMAP */
}
struct netlink_table {
struct rhashtable hash;
struct hlist_head mc_list;
struct listeners __rcu *listeners;
unsigned int flags;
unsigned int groups;
struct mutex *cb_mutex;
struct module *module;
int (*bind)(struct net *net, int group);
void (*unbind)(struct net *net, int group);
bool (*compare)(struct net *net, struct sock *sock);
int registered;
};
extern struct netlink_table *nl_table;
extern rwlock_t nl_table_lock;
#endif