forked from Minki/linux
4eaf3b84f2
qdisc_tree_decrease_qlen() suffers from two problems on multiqueue devices. One problem is that it updates sch->q.qlen and sch->qstats.drops on the mq/mqprio root qdisc, while it should not : Daniele reported underflows errors : [ 681.774821] PAX: sch->q.qlen: 0 n: 1 [ 681.774825] PAX: size overflow detected in function qdisc_tree_decrease_qlen net/sched/sch_api.c:769 cicus.693_49 min, count: 72, decl: qlen; num: 0; context: sk_buff_head; [ 681.774954] CPU: 2 PID: 19 Comm: ksoftirqd/2 Tainted: G O 4.2.6.201511282239-1-grsec #1 [ 681.774955] Hardware name: ASUSTeK COMPUTER INC. X302LJ/X302LJ, BIOS X302LJ.202 03/05/2015 [ 681.774956] ffffffffa9a04863 0000000000000000 0000000000000000 ffffffffa990ff7c [ 681.774959] ffffc90000d3bc38 ffffffffa95d2810 0000000000000007 ffffffffa991002b [ 681.774960] ffffc90000d3bc68 ffffffffa91a44f4 0000000000000001 0000000000000001 [ 681.774962] Call Trace: [ 681.774967] [<ffffffffa95d2810>] dump_stack+0x4c/0x7f [ 681.774970] [<ffffffffa91a44f4>] report_size_overflow+0x34/0x50 [ 681.774972] [<ffffffffa94d17e2>] qdisc_tree_decrease_qlen+0x152/0x160 [ 681.774976] [<ffffffffc02694b1>] fq_codel_dequeue+0x7b1/0x820 [sch_fq_codel] [ 681.774978] [<ffffffffc02680a0>] ? qdisc_peek_dequeued+0xa0/0xa0 [sch_fq_codel] [ 681.774980] [<ffffffffa94cd92d>] __qdisc_run+0x4d/0x1d0 [ 681.774983] [<ffffffffa949b2b2>] net_tx_action+0xc2/0x160 [ 681.774985] [<ffffffffa90664c1>] __do_softirq+0xf1/0x200 [ 681.774987] [<ffffffffa90665ee>] run_ksoftirqd+0x1e/0x30 [ 681.774989] [<ffffffffa90896b0>] smpboot_thread_fn+0x150/0x260 [ 681.774991] [<ffffffffa9089560>] ? sort_range+0x40/0x40 [ 681.774992] [<ffffffffa9085fe4>] kthread+0xe4/0x100 [ 681.774994] [<ffffffffa9085f00>] ? kthread_worker_fn+0x170/0x170 [ 681.774995] [<ffffffffa95d8d1e>] ret_from_fork+0x3e/0x70 mq/mqprio have their own ways to report qlen/drops by folding stats on all their queues, with appropriate locking. A second problem is that qdisc_tree_decrease_qlen() calls qdisc_lookup() without proper locking : concurrent qdisc updates could corrupt the list that qdisc_match_from_root() parses to find a qdisc given its handle. Fix first problem adding a TCQ_F_NOPARENT qdisc flag that qdisc_tree_decrease_qlen() can use to abort its tree traversal, as soon as it meets a mq/mqprio qdisc children. Second problem can be fixed by RCU protection. Qdisc are already freed after RCU grace period, so qdisc_list_add() and qdisc_list_del() simply have to use appropriate rcu list variants. A future patch will add a per struct netdev_queue list anchor, so that qdisc_tree_decrease_qlen() can have more efficient lookups. Reported-by: Daniele Fucini <dfucini@gmail.com> Signed-off-by: Eric Dumazet <edumazet@google.com> Cc: Cong Wang <cwang@twopensource.com> Cc: Jamal Hadi Salim <jhs@mojatatu.com> Signed-off-by: David S. Miller <davem@davemloft.net>
247 lines
5.8 KiB
C
247 lines
5.8 KiB
C
/*
|
|
* net/sched/sch_mq.c Classful multiqueue dummy scheduler
|
|
*
|
|
* Copyright (c) 2009 Patrick McHardy <kaber@trash.net>
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License
|
|
* version 2 as published by the Free Software Foundation.
|
|
*/
|
|
|
|
#include <linux/types.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/export.h>
|
|
#include <linux/string.h>
|
|
#include <linux/errno.h>
|
|
#include <linux/skbuff.h>
|
|
#include <net/netlink.h>
|
|
#include <net/pkt_sched.h>
|
|
|
|
struct mq_sched {
|
|
struct Qdisc **qdiscs;
|
|
};
|
|
|
|
static void mq_destroy(struct Qdisc *sch)
|
|
{
|
|
struct net_device *dev = qdisc_dev(sch);
|
|
struct mq_sched *priv = qdisc_priv(sch);
|
|
unsigned int ntx;
|
|
|
|
if (!priv->qdiscs)
|
|
return;
|
|
for (ntx = 0; ntx < dev->num_tx_queues && priv->qdiscs[ntx]; ntx++)
|
|
qdisc_destroy(priv->qdiscs[ntx]);
|
|
kfree(priv->qdiscs);
|
|
}
|
|
|
|
static int mq_init(struct Qdisc *sch, struct nlattr *opt)
|
|
{
|
|
struct net_device *dev = qdisc_dev(sch);
|
|
struct mq_sched *priv = qdisc_priv(sch);
|
|
struct netdev_queue *dev_queue;
|
|
struct Qdisc *qdisc;
|
|
unsigned int ntx;
|
|
|
|
if (sch->parent != TC_H_ROOT)
|
|
return -EOPNOTSUPP;
|
|
|
|
if (!netif_is_multiqueue(dev))
|
|
return -EOPNOTSUPP;
|
|
|
|
/* pre-allocate qdiscs, attachment can't fail */
|
|
priv->qdiscs = kcalloc(dev->num_tx_queues, sizeof(priv->qdiscs[0]),
|
|
GFP_KERNEL);
|
|
if (priv->qdiscs == NULL)
|
|
return -ENOMEM;
|
|
|
|
for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
|
|
dev_queue = netdev_get_tx_queue(dev, ntx);
|
|
qdisc = qdisc_create_dflt(dev_queue, default_qdisc_ops,
|
|
TC_H_MAKE(TC_H_MAJ(sch->handle),
|
|
TC_H_MIN(ntx + 1)));
|
|
if (qdisc == NULL)
|
|
goto err;
|
|
priv->qdiscs[ntx] = qdisc;
|
|
qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
|
|
}
|
|
|
|
sch->flags |= TCQ_F_MQROOT;
|
|
return 0;
|
|
|
|
err:
|
|
mq_destroy(sch);
|
|
return -ENOMEM;
|
|
}
|
|
|
|
static void mq_attach(struct Qdisc *sch)
|
|
{
|
|
struct net_device *dev = qdisc_dev(sch);
|
|
struct mq_sched *priv = qdisc_priv(sch);
|
|
struct Qdisc *qdisc, *old;
|
|
unsigned int ntx;
|
|
|
|
for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
|
|
qdisc = priv->qdiscs[ntx];
|
|
old = dev_graft_qdisc(qdisc->dev_queue, qdisc);
|
|
if (old)
|
|
qdisc_destroy(old);
|
|
#ifdef CONFIG_NET_SCHED
|
|
if (ntx < dev->real_num_tx_queues)
|
|
qdisc_list_add(qdisc);
|
|
#endif
|
|
|
|
}
|
|
kfree(priv->qdiscs);
|
|
priv->qdiscs = NULL;
|
|
}
|
|
|
|
static int mq_dump(struct Qdisc *sch, struct sk_buff *skb)
|
|
{
|
|
struct net_device *dev = qdisc_dev(sch);
|
|
struct Qdisc *qdisc;
|
|
unsigned int ntx;
|
|
|
|
sch->q.qlen = 0;
|
|
memset(&sch->bstats, 0, sizeof(sch->bstats));
|
|
memset(&sch->qstats, 0, sizeof(sch->qstats));
|
|
|
|
for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
|
|
qdisc = netdev_get_tx_queue(dev, ntx)->qdisc_sleeping;
|
|
spin_lock_bh(qdisc_lock(qdisc));
|
|
sch->q.qlen += qdisc->q.qlen;
|
|
sch->bstats.bytes += qdisc->bstats.bytes;
|
|
sch->bstats.packets += qdisc->bstats.packets;
|
|
sch->qstats.backlog += qdisc->qstats.backlog;
|
|
sch->qstats.drops += qdisc->qstats.drops;
|
|
sch->qstats.requeues += qdisc->qstats.requeues;
|
|
sch->qstats.overlimits += qdisc->qstats.overlimits;
|
|
spin_unlock_bh(qdisc_lock(qdisc));
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static struct netdev_queue *mq_queue_get(struct Qdisc *sch, unsigned long cl)
|
|
{
|
|
struct net_device *dev = qdisc_dev(sch);
|
|
unsigned long ntx = cl - 1;
|
|
|
|
if (ntx >= dev->num_tx_queues)
|
|
return NULL;
|
|
return netdev_get_tx_queue(dev, ntx);
|
|
}
|
|
|
|
static struct netdev_queue *mq_select_queue(struct Qdisc *sch,
|
|
struct tcmsg *tcm)
|
|
{
|
|
unsigned int ntx = TC_H_MIN(tcm->tcm_parent);
|
|
struct netdev_queue *dev_queue = mq_queue_get(sch, ntx);
|
|
|
|
if (!dev_queue) {
|
|
struct net_device *dev = qdisc_dev(sch);
|
|
|
|
return netdev_get_tx_queue(dev, 0);
|
|
}
|
|
return dev_queue;
|
|
}
|
|
|
|
static int mq_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
|
|
struct Qdisc **old)
|
|
{
|
|
struct netdev_queue *dev_queue = mq_queue_get(sch, cl);
|
|
struct net_device *dev = qdisc_dev(sch);
|
|
|
|
if (dev->flags & IFF_UP)
|
|
dev_deactivate(dev);
|
|
|
|
*old = dev_graft_qdisc(dev_queue, new);
|
|
if (new)
|
|
new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
|
|
if (dev->flags & IFF_UP)
|
|
dev_activate(dev);
|
|
return 0;
|
|
}
|
|
|
|
static struct Qdisc *mq_leaf(struct Qdisc *sch, unsigned long cl)
|
|
{
|
|
struct netdev_queue *dev_queue = mq_queue_get(sch, cl);
|
|
|
|
return dev_queue->qdisc_sleeping;
|
|
}
|
|
|
|
static unsigned long mq_get(struct Qdisc *sch, u32 classid)
|
|
{
|
|
unsigned int ntx = TC_H_MIN(classid);
|
|
|
|
if (!mq_queue_get(sch, ntx))
|
|
return 0;
|
|
return ntx;
|
|
}
|
|
|
|
static void mq_put(struct Qdisc *sch, unsigned long cl)
|
|
{
|
|
}
|
|
|
|
static int mq_dump_class(struct Qdisc *sch, unsigned long cl,
|
|
struct sk_buff *skb, struct tcmsg *tcm)
|
|
{
|
|
struct netdev_queue *dev_queue = mq_queue_get(sch, cl);
|
|
|
|
tcm->tcm_parent = TC_H_ROOT;
|
|
tcm->tcm_handle |= TC_H_MIN(cl);
|
|
tcm->tcm_info = dev_queue->qdisc_sleeping->handle;
|
|
return 0;
|
|
}
|
|
|
|
static int mq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
|
|
struct gnet_dump *d)
|
|
{
|
|
struct netdev_queue *dev_queue = mq_queue_get(sch, cl);
|
|
|
|
sch = dev_queue->qdisc_sleeping;
|
|
if (gnet_stats_copy_basic(d, NULL, &sch->bstats) < 0 ||
|
|
gnet_stats_copy_queue(d, NULL, &sch->qstats, sch->q.qlen) < 0)
|
|
return -1;
|
|
return 0;
|
|
}
|
|
|
|
static void mq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
|
|
{
|
|
struct net_device *dev = qdisc_dev(sch);
|
|
unsigned int ntx;
|
|
|
|
if (arg->stop)
|
|
return;
|
|
|
|
arg->count = arg->skip;
|
|
for (ntx = arg->skip; ntx < dev->num_tx_queues; ntx++) {
|
|
if (arg->fn(sch, ntx + 1, arg) < 0) {
|
|
arg->stop = 1;
|
|
break;
|
|
}
|
|
arg->count++;
|
|
}
|
|
}
|
|
|
|
static const struct Qdisc_class_ops mq_class_ops = {
|
|
.select_queue = mq_select_queue,
|
|
.graft = mq_graft,
|
|
.leaf = mq_leaf,
|
|
.get = mq_get,
|
|
.put = mq_put,
|
|
.walk = mq_walk,
|
|
.dump = mq_dump_class,
|
|
.dump_stats = mq_dump_class_stats,
|
|
};
|
|
|
|
struct Qdisc_ops mq_qdisc_ops __read_mostly = {
|
|
.cl_ops = &mq_class_ops,
|
|
.id = "mq",
|
|
.priv_size = sizeof(struct mq_sched),
|
|
.init = mq_init,
|
|
.destroy = mq_destroy,
|
|
.attach = mq_attach,
|
|
.dump = mq_dump,
|
|
.owner = THIS_MODULE,
|
|
};
|