linux/net/sched/sch_ingress.c
Daniel Borkmann 81d947e2b8 net, sched: fix panic when updating miniq {b,q}stats
While working on fixing another bug, I ran into the following panic
on arm64 by simply attaching clsact qdisc, adding a filter and running
traffic on ingress to it:

  [...]
  [  178.188591] Unable to handle kernel read from unreadable memory at virtual address 810fb501f000
  [  178.197314] Mem abort info:
  [  178.200121]   ESR = 0x96000004
  [  178.203168]   Exception class = DABT (current EL), IL = 32 bits
  [  178.209095]   SET = 0, FnV = 0
  [  178.212157]   EA = 0, S1PTW = 0
  [  178.215288] Data abort info:
  [  178.218175]   ISV = 0, ISS = 0x00000004
  [  178.222019]   CM = 0, WnR = 0
  [  178.224997] user pgtable: 4k pages, 48-bit VAs, pgd = 0000000023cb3f33
  [  178.231531] [0000810fb501f000] *pgd=0000000000000000
  [  178.236508] Internal error: Oops: 96000004 [#1] SMP
  [...]
  [  178.311855] CPU: 73 PID: 2497 Comm: ping Tainted: G        W        4.15.0-rc7+ #5
  [  178.319413] Hardware name: FOXCONN R2-1221R-A4/C2U4N_MB, BIOS G31FB18A 03/31/2017
  [  178.326887] pstate: 60400005 (nZCv daif +PAN -UAO)
  [  178.331685] pc : __netif_receive_skb_core+0x49c/0xac8
  [  178.336728] lr : __netif_receive_skb+0x28/0x78
  [  178.341161] sp : ffff00002344b750
  [  178.344465] x29: ffff00002344b750 x28: ffff810fbdfd0580
  [  178.349769] x27: 0000000000000000 x26: ffff000009378000
  [...]
  [  178.418715] x1 : 0000000000000054 x0 : 0000000000000000
  [  178.424020] Process ping (pid: 2497, stack limit = 0x000000009f0a3ff4)
  [  178.430537] Call trace:
  [  178.432976]  __netif_receive_skb_core+0x49c/0xac8
  [  178.437670]  __netif_receive_skb+0x28/0x78
  [  178.441757]  process_backlog+0x9c/0x160
  [  178.445584]  net_rx_action+0x2f8/0x3f0
  [...]

Reason is that sch_ingress and sch_clsact are doing mini_qdisc_pair_init()
which sets up miniq pointers to cpu_{b,q}stats from the underlying qdisc.
Problem is that this cannot work since they are actually set up right after
the qdisc ->init() callback in qdisc_create(), so first packet going into
sch_handle_ingress() tries to call mini_qdisc_bstats_cpu_update() and we
therefore panic.

In order to fix this, allocation of {b,q}stats needs to happen before we
call into ->init(). In net-next, there's already such option through commit
d59f5ffa59 ("net: sched: a dflt qdisc may be used with per cpu stats").
However, the bug needs to be fixed in net still for 4.15. Thus, include
these bits to reduce any merge churn and reuse the static_flags field to
set TCQ_F_CPUSTATS, and remove the allocation from qdisc_create() since
there is no other user left. Prashant Bhole ran into the same issue but
for net-next, thus adding him below as well as co-author. Same issue was
also reported by Sandipan Das when using bcc.

Fixes: 46209401f8 ("net: core: introduce mini_Qdisc and eliminate usage of tp->q for clsact fastpath")
Reference: https://lists.iovisor.org/pipermail/iovisor-dev/2018-January/001190.html
Reported-by: Sandipan Das <sandipan@linux.vnet.ibm.com>
Co-authored-by: Prashant Bhole <bhole_prashant_q7@lab.ntt.co.jp>
Co-authored-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-01-16 15:02:36 -05:00

248 lines
6.0 KiB
C

/* net/sched/sch_ingress.c - Ingress and clsact qdisc
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Authors: Jamal Hadi Salim 1999
*/
#include <linux/module.h>
#include <linux/types.h>
#include <linux/list.h>
#include <linux/skbuff.h>
#include <linux/rtnetlink.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>
struct ingress_sched_data {
struct tcf_block *block;
struct tcf_block_ext_info block_info;
struct mini_Qdisc_pair miniqp;
};
static struct Qdisc *ingress_leaf(struct Qdisc *sch, unsigned long arg)
{
return NULL;
}
static unsigned long ingress_find(struct Qdisc *sch, u32 classid)
{
return TC_H_MIN(classid) + 1;
}
static unsigned long ingress_bind_filter(struct Qdisc *sch,
unsigned long parent, u32 classid)
{
return ingress_find(sch, classid);
}
static void ingress_unbind_filter(struct Qdisc *sch, unsigned long cl)
{
}
static void ingress_walk(struct Qdisc *sch, struct qdisc_walker *walker)
{
}
static struct tcf_block *ingress_tcf_block(struct Qdisc *sch, unsigned long cl)
{
struct ingress_sched_data *q = qdisc_priv(sch);
return q->block;
}
static void clsact_chain_head_change(struct tcf_proto *tp_head, void *priv)
{
struct mini_Qdisc_pair *miniqp = priv;
mini_qdisc_pair_swap(miniqp, tp_head);
}
static int ingress_init(struct Qdisc *sch, struct nlattr *opt)
{
struct ingress_sched_data *q = qdisc_priv(sch);
struct net_device *dev = qdisc_dev(sch);
net_inc_ingress_queue();
mini_qdisc_pair_init(&q->miniqp, sch, &dev->miniq_ingress);
q->block_info.binder_type = TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
q->block_info.chain_head_change = clsact_chain_head_change;
q->block_info.chain_head_change_priv = &q->miniqp;
return tcf_block_get_ext(&q->block, sch, &q->block_info);
}
static void ingress_destroy(struct Qdisc *sch)
{
struct ingress_sched_data *q = qdisc_priv(sch);
tcf_block_put_ext(q->block, sch, &q->block_info);
net_dec_ingress_queue();
}
static int ingress_dump(struct Qdisc *sch, struct sk_buff *skb)
{
struct nlattr *nest;
nest = nla_nest_start(skb, TCA_OPTIONS);
if (nest == NULL)
goto nla_put_failure;
return nla_nest_end(skb, nest);
nla_put_failure:
nla_nest_cancel(skb, nest);
return -1;
}
static const struct Qdisc_class_ops ingress_class_ops = {
.leaf = ingress_leaf,
.find = ingress_find,
.walk = ingress_walk,
.tcf_block = ingress_tcf_block,
.bind_tcf = ingress_bind_filter,
.unbind_tcf = ingress_unbind_filter,
};
static struct Qdisc_ops ingress_qdisc_ops __read_mostly = {
.cl_ops = &ingress_class_ops,
.id = "ingress",
.priv_size = sizeof(struct ingress_sched_data),
.static_flags = TCQ_F_CPUSTATS,
.init = ingress_init,
.destroy = ingress_destroy,
.dump = ingress_dump,
.owner = THIS_MODULE,
};
struct clsact_sched_data {
struct tcf_block *ingress_block;
struct tcf_block *egress_block;
struct tcf_block_ext_info ingress_block_info;
struct tcf_block_ext_info egress_block_info;
struct mini_Qdisc_pair miniqp_ingress;
struct mini_Qdisc_pair miniqp_egress;
};
static unsigned long clsact_find(struct Qdisc *sch, u32 classid)
{
switch (TC_H_MIN(classid)) {
case TC_H_MIN(TC_H_MIN_INGRESS):
case TC_H_MIN(TC_H_MIN_EGRESS):
return TC_H_MIN(classid);
default:
return 0;
}
}
static unsigned long clsact_bind_filter(struct Qdisc *sch,
unsigned long parent, u32 classid)
{
return clsact_find(sch, classid);
}
static struct tcf_block *clsact_tcf_block(struct Qdisc *sch, unsigned long cl)
{
struct clsact_sched_data *q = qdisc_priv(sch);
switch (cl) {
case TC_H_MIN(TC_H_MIN_INGRESS):
return q->ingress_block;
case TC_H_MIN(TC_H_MIN_EGRESS):
return q->egress_block;
default:
return NULL;
}
}
static int clsact_init(struct Qdisc *sch, struct nlattr *opt)
{
struct clsact_sched_data *q = qdisc_priv(sch);
struct net_device *dev = qdisc_dev(sch);
int err;
net_inc_ingress_queue();
net_inc_egress_queue();
mini_qdisc_pair_init(&q->miniqp_ingress, sch, &dev->miniq_ingress);
q->ingress_block_info.binder_type = TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
q->ingress_block_info.chain_head_change = clsact_chain_head_change;
q->ingress_block_info.chain_head_change_priv = &q->miniqp_ingress;
err = tcf_block_get_ext(&q->ingress_block, sch, &q->ingress_block_info);
if (err)
return err;
mini_qdisc_pair_init(&q->miniqp_egress, sch, &dev->miniq_egress);
q->egress_block_info.binder_type = TCF_BLOCK_BINDER_TYPE_CLSACT_EGRESS;
q->egress_block_info.chain_head_change = clsact_chain_head_change;
q->egress_block_info.chain_head_change_priv = &q->miniqp_egress;
return tcf_block_get_ext(&q->egress_block, sch, &q->egress_block_info);
}
static void clsact_destroy(struct Qdisc *sch)
{
struct clsact_sched_data *q = qdisc_priv(sch);
tcf_block_put_ext(q->egress_block, sch, &q->egress_block_info);
tcf_block_put_ext(q->ingress_block, sch, &q->ingress_block_info);
net_dec_ingress_queue();
net_dec_egress_queue();
}
static const struct Qdisc_class_ops clsact_class_ops = {
.leaf = ingress_leaf,
.find = clsact_find,
.walk = ingress_walk,
.tcf_block = clsact_tcf_block,
.bind_tcf = clsact_bind_filter,
.unbind_tcf = ingress_unbind_filter,
};
static struct Qdisc_ops clsact_qdisc_ops __read_mostly = {
.cl_ops = &clsact_class_ops,
.id = "clsact",
.priv_size = sizeof(struct clsact_sched_data),
.static_flags = TCQ_F_CPUSTATS,
.init = clsact_init,
.destroy = clsact_destroy,
.dump = ingress_dump,
.owner = THIS_MODULE,
};
static int __init ingress_module_init(void)
{
int ret;
ret = register_qdisc(&ingress_qdisc_ops);
if (!ret) {
ret = register_qdisc(&clsact_qdisc_ops);
if (ret)
unregister_qdisc(&ingress_qdisc_ops);
}
return ret;
}
static void __exit ingress_module_exit(void)
{
unregister_qdisc(&ingress_qdisc_ops);
unregister_qdisc(&clsact_qdisc_ops);
}
module_init(ingress_module_init);
module_exit(ingress_module_exit);
MODULE_ALIAS("sch_clsact");
MODULE_LICENSE("GPL");