linux/net/sched/sch_atm.c

709 lines
20 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0-only
/* net/sched/sch_atm.c - ATM VC selection "queueing discipline" */
/* Written 1998-2000 by Werner Almesberger, EPFL ICA */
#include <linux/module.h>
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo <tj@kernel.org> Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 08:04:11 +00:00
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/skbuff.h>
#include <linux/atmdev.h>
#include <linux/atmclip.h>
#include <linux/rtnetlink.h>
#include <linux/file.h> /* for fput */
#include <net/netlink.h>
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>
/*
* The ATM queuing discipline provides a framework for invoking classifiers
* (aka "filters"), which in turn select classes of this queuing discipline.
* Each class maps the flow(s) it is handling to a given VC. Multiple classes
* may share the same VC.
*
* When creating a class, VCs are specified by passing the number of the open
* socket descriptor by which the calling process references the VC. The kernel
* keeps the VC open at least until all classes using it are removed.
*
* In this file, most functions are named atm_tc_* to avoid confusion with all
* the atm_* in net/atm. This naming convention differs from what's used in the
* rest of net/sched.
*
* Known bugs:
* - sometimes messes up the IP stack
* - any manipulations besides the few operations described in the README, are
* untested and likely to crash the system
* - should lock the flow while there is data in the queue (?)
*/
#define VCC2FLOW(vcc) ((struct atm_flow_data *) ((vcc)->user_back))
struct atm_flow_data {
struct Qdisc_class_common common;
struct Qdisc *q; /* FIFO, TBF, etc. */
struct tcf_proto __rcu *filter_list;
struct tcf_block *block;
struct atm_vcc *vcc; /* VCC; NULL if VCC is closed */
void (*old_pop)(struct atm_vcc *vcc,
struct sk_buff *skb); /* chaining */
struct atm_qdisc_data *parent; /* parent qdisc */
struct socket *sock; /* for closing */
int ref; /* reference count */
struct gnet_stats_basic_sync bstats;
struct gnet_stats_queue qstats;
struct list_head list;
struct atm_flow_data *excess; /* flow for excess traffic;
NULL to set CLP instead */
int hdr_len;
unsigned char hdr[]; /* header data; MUST BE LAST */
};
struct atm_qdisc_data {
struct atm_flow_data link; /* unclassified skbs go here */
struct list_head flows; /* NB: "link" is also on this
list */
struct tasklet_struct task; /* dequeue tasklet */
};
/* ------------------------- Class/flow operations ------------------------- */
static inline struct atm_flow_data *lookup_flow(struct Qdisc *sch, u32 classid)
{
struct atm_qdisc_data *p = qdisc_priv(sch);
struct atm_flow_data *flow;
list_for_each_entry(flow, &p->flows, list) {
if (flow->common.classid == classid)
return flow;
}
return NULL;
}
static int atm_tc_graft(struct Qdisc *sch, unsigned long arg,
struct Qdisc *new, struct Qdisc **old,
struct netlink_ext_ack *extack)
{
struct atm_qdisc_data *p = qdisc_priv(sch);
struct atm_flow_data *flow = (struct atm_flow_data *)arg;
pr_debug("atm_tc_graft(sch %p,[qdisc %p],flow %p,new %p,old %p)\n",
sch, p, flow, new, old);
if (list_empty(&flow->list))
return -EINVAL;
if (!new)
new = &noop_qdisc;
*old = flow->q;
flow->q = new;
if (*old)
qdisc_reset(*old);
return 0;
}
static struct Qdisc *atm_tc_leaf(struct Qdisc *sch, unsigned long cl)
{
struct atm_flow_data *flow = (struct atm_flow_data *)cl;
pr_debug("atm_tc_leaf(sch %p,flow %p)\n", sch, flow);
return flow ? flow->q : NULL;
}
static unsigned long atm_tc_find(struct Qdisc *sch, u32 classid)
{
struct atm_qdisc_data *p __maybe_unused = qdisc_priv(sch);
struct atm_flow_data *flow;
pr_debug("%s(sch %p,[qdisc %p],classid %x)\n", __func__, sch, p, classid);
flow = lookup_flow(sch, classid);
pr_debug("%s: flow %p\n", __func__, flow);
return (unsigned long)flow;
}
static unsigned long atm_tc_bind_filter(struct Qdisc *sch,
unsigned long parent, u32 classid)
{
struct atm_qdisc_data *p __maybe_unused = qdisc_priv(sch);
struct atm_flow_data *flow;
pr_debug("%s(sch %p,[qdisc %p],classid %x)\n", __func__, sch, p, classid);
flow = lookup_flow(sch, classid);
if (flow)
flow->ref++;
pr_debug("%s: flow %p\n", __func__, flow);
return (unsigned long)flow;
}
/*
* atm_tc_put handles all destructions, including the ones that are explicitly
* requested (atm_tc_destroy, etc.). The assumption here is that we never drop
* anything that still seems to be in use.
*/
static void atm_tc_put(struct Qdisc *sch, unsigned long cl)
{
struct atm_qdisc_data *p = qdisc_priv(sch);
struct atm_flow_data *flow = (struct atm_flow_data *)cl;
pr_debug("atm_tc_put(sch %p,[qdisc %p],flow %p)\n", sch, p, flow);
if (--flow->ref)
return;
pr_debug("atm_tc_put: destroying\n");
list_del_init(&flow->list);
pr_debug("atm_tc_put: qdisc %p\n", flow->q);
qdisc_put(flow->q);
tcf_block_put(flow->block);
if (flow->sock) {
pr_debug("atm_tc_put: f_count %ld\n",
file_count(flow->sock->file));
flow->vcc->pop = flow->old_pop;
sockfd_put(flow->sock);
}
if (flow->excess)
atm_tc_put(sch, (unsigned long)flow->excess);
if (flow != &p->link)
kfree(flow);
/*
* If flow == &p->link, the qdisc no longer works at this point and
* needs to be removed. (By the caller of atm_tc_put.)
*/
}
static void sch_atm_pop(struct atm_vcc *vcc, struct sk_buff *skb)
{
struct atm_qdisc_data *p = VCC2FLOW(vcc)->parent;
pr_debug("sch_atm_pop(vcc %p,skb %p,[qdisc %p])\n", vcc, skb, p);
VCC2FLOW(vcc)->old_pop(vcc, skb);
tasklet_schedule(&p->task);
}
static const u8 llc_oui_ip[] = {
0xaa, /* DSAP: non-ISO */
0xaa, /* SSAP: non-ISO */
0x03, /* Ctrl: Unnumbered Information Command PDU */
0x00, /* OUI: EtherType */
0x00, 0x00,
0x08, 0x00
}; /* Ethertype IP (0800) */
static const struct nla_policy atm_policy[TCA_ATM_MAX + 1] = {
[TCA_ATM_FD] = { .type = NLA_U32 },
[TCA_ATM_EXCESS] = { .type = NLA_U32 },
};
static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
struct nlattr **tca, unsigned long *arg,
struct netlink_ext_ack *extack)
{
struct atm_qdisc_data *p = qdisc_priv(sch);
struct atm_flow_data *flow = (struct atm_flow_data *)*arg;
struct atm_flow_data *excess = NULL;
struct nlattr *opt = tca[TCA_OPTIONS];
struct nlattr *tb[TCA_ATM_MAX + 1];
struct socket *sock;
int fd, error, hdr_len;
void *hdr;
pr_debug("atm_tc_change(sch %p,[qdisc %p],classid %x,parent %x,"
"flow %p,opt %p)\n", sch, p, classid, parent, flow, opt);
/*
* The concept of parents doesn't apply for this qdisc.
*/
if (parent && parent != TC_H_ROOT && parent != sch->handle)
return -EINVAL;
/*
* ATM classes cannot be changed. In order to change properties of the
* ATM connection, that socket needs to be modified directly (via the
* native ATM API. In order to send a flow to a different VC, the old
* class needs to be removed and a new one added. (This may be changed
* later.)
*/
if (flow)
return -EBUSY;
if (opt == NULL)
return -EINVAL;
netlink: make validation more configurable for future strictness We currently have two levels of strict validation: 1) liberal (default) - undefined (type >= max) & NLA_UNSPEC attributes accepted - attribute length >= expected accepted - garbage at end of message accepted 2) strict (opt-in) - NLA_UNSPEC attributes accepted - attribute length >= expected accepted Split out parsing strictness into four different options: * TRAILING - check that there's no trailing data after parsing attributes (in message or nested) * MAXTYPE - reject attrs > max known type * UNSPEC - reject attributes with NLA_UNSPEC policy entries * STRICT_ATTRS - strictly validate attribute size The default for future things should be *everything*. The current *_strict() is a combination of TRAILING and MAXTYPE, and is renamed to _deprecated_strict(). The current regular parsing has none of this, and is renamed to *_parse_deprecated(). Additionally it allows us to selectively set one of the new flags even on old policies. Notably, the UNSPEC flag could be useful in this case, since it can be arranged (by filling in the policy) to not be an incompatible userspace ABI change, but would then going forward prevent forgetting attribute entries. Similar can apply to the POLICY flag. We end up with the following renames: * nla_parse -> nla_parse_deprecated * nla_parse_strict -> nla_parse_deprecated_strict * nlmsg_parse -> nlmsg_parse_deprecated * nlmsg_parse_strict -> nlmsg_parse_deprecated_strict * nla_parse_nested -> nla_parse_nested_deprecated * nla_validate_nested -> nla_validate_nested_deprecated Using spatch, of course: @@ expression TB, MAX, HEAD, LEN, POL, EXT; @@ -nla_parse(TB, MAX, HEAD, LEN, POL, EXT) +nla_parse_deprecated(TB, MAX, HEAD, LEN, POL, EXT) @@ expression NLH, HDRLEN, TB, MAX, POL, EXT; @@ -nlmsg_parse(NLH, HDRLEN, TB, MAX, POL, EXT) +nlmsg_parse_deprecated(NLH, HDRLEN, TB, MAX, POL, EXT) @@ expression NLH, HDRLEN, TB, MAX, POL, EXT; @@ -nlmsg_parse_strict(NLH, HDRLEN, TB, MAX, POL, EXT) +nlmsg_parse_deprecated_strict(NLH, HDRLEN, TB, MAX, POL, EXT) @@ expression TB, MAX, NLA, POL, EXT; @@ -nla_parse_nested(TB, MAX, NLA, POL, EXT) +nla_parse_nested_deprecated(TB, MAX, NLA, POL, EXT) @@ expression START, MAX, POL, EXT; @@ -nla_validate_nested(START, MAX, POL, EXT) +nla_validate_nested_deprecated(START, MAX, POL, EXT) @@ expression NLH, HDRLEN, MAX, POL, EXT; @@ -nlmsg_validate(NLH, HDRLEN, MAX, POL, EXT) +nlmsg_validate_deprecated(NLH, HDRLEN, MAX, POL, EXT) For this patch, don't actually add the strict, non-renamed versions yet so that it breaks compile if I get it wrong. Also, while at it, make nla_validate and nla_parse go down to a common __nla_validate_parse() function to avoid code duplication. Ultimately, this allows us to have very strict validation for every new caller of nla_parse()/nlmsg_parse() etc as re-introduced in the next patch, while existing things will continue to work as is. In effect then, this adds fully strict validation for any new command. Signed-off-by: Johannes Berg <johannes.berg@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-26 12:07:28 +00:00
error = nla_parse_nested_deprecated(tb, TCA_ATM_MAX, opt, atm_policy,
NULL);
if (error < 0)
return error;
if (!tb[TCA_ATM_FD])
return -EINVAL;
fd = nla_get_u32(tb[TCA_ATM_FD]);
pr_debug("atm_tc_change: fd %d\n", fd);
if (tb[TCA_ATM_HDR]) {
hdr_len = nla_len(tb[TCA_ATM_HDR]);
hdr = nla_data(tb[TCA_ATM_HDR]);
} else {
hdr_len = RFC1483LLC_LEN;
hdr = NULL; /* default LLC/SNAP for IP */
}
if (!tb[TCA_ATM_EXCESS])
excess = NULL;
else {
excess = (struct atm_flow_data *)
atm_tc_find(sch, nla_get_u32(tb[TCA_ATM_EXCESS]));
if (!excess)
return -ENOENT;
}
pr_debug("atm_tc_change: type %d, payload %d, hdr_len %d\n",
opt->nla_type, nla_len(opt), hdr_len);
sock = sockfd_lookup(fd, &error);
if (!sock)
return error; /* f_count++ */
pr_debug("atm_tc_change: f_count %ld\n", file_count(sock->file));
if (sock->ops->family != PF_ATMSVC && sock->ops->family != PF_ATMPVC) {
error = -EPROTOTYPE;
goto err_out;
}
/* @@@ should check if the socket is really operational or we'll crash
on vcc->send */
if (classid) {
if (TC_H_MAJ(classid ^ sch->handle)) {
pr_debug("atm_tc_change: classid mismatch\n");
error = -EINVAL;
goto err_out;
}
} else {
int i;
unsigned long cl;
for (i = 1; i < 0x8000; i++) {
classid = TC_H_MAKE(sch->handle, 0x8000 | i);
cl = atm_tc_find(sch, classid);
if (!cl)
break;
}
}
pr_debug("atm_tc_change: new id %x\n", classid);
flow = kzalloc(sizeof(struct atm_flow_data) + hdr_len, GFP_KERNEL);
pr_debug("atm_tc_change: flow %p\n", flow);
if (!flow) {
error = -ENOBUFS;
goto err_out;
}
error = tcf_block_get(&flow->block, &flow->filter_list, sch,
extack);
if (error) {
kfree(flow);
goto err_out;
}
flow->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid,
extack);
if (!flow->q)
flow->q = &noop_qdisc;
pr_debug("atm_tc_change: qdisc %p\n", flow->q);
flow->sock = sock;
flow->vcc = ATM_SD(sock); /* speedup */
flow->vcc->user_back = flow;
pr_debug("atm_tc_change: vcc %p\n", flow->vcc);
flow->old_pop = flow->vcc->pop;
flow->parent = p;
flow->vcc->pop = sch_atm_pop;
flow->common.classid = classid;
flow->ref = 1;
flow->excess = excess;
list_add(&flow->list, &p->link.list);
flow->hdr_len = hdr_len;
if (hdr)
memcpy(flow->hdr, hdr, hdr_len);
else
memcpy(flow->hdr, llc_oui_ip, sizeof(llc_oui_ip));
*arg = (unsigned long)flow;
return 0;
err_out:
sockfd_put(sock);
return error;
}
static int atm_tc_delete(struct Qdisc *sch, unsigned long arg,
struct netlink_ext_ack *extack)
{
struct atm_qdisc_data *p = qdisc_priv(sch);
struct atm_flow_data *flow = (struct atm_flow_data *)arg;
pr_debug("atm_tc_delete(sch %p,[qdisc %p],flow %p)\n", sch, p, flow);
if (list_empty(&flow->list))
return -EINVAL;
if (rcu_access_pointer(flow->filter_list) || flow == &p->link)
return -EBUSY;
/*
* Reference count must be 2: one for "keepalive" (set at class
* creation), and one for the reference held when calling delete.
*/
if (flow->ref < 2) {
pr_err("atm_tc_delete: flow->ref == %d\n", flow->ref);
return -EINVAL;
}
if (flow->ref > 2)
return -EBUSY; /* catch references via excess, etc. */
atm_tc_put(sch, arg);
return 0;
}
static void atm_tc_walk(struct Qdisc *sch, struct qdisc_walker *walker)
{
struct atm_qdisc_data *p = qdisc_priv(sch);
struct atm_flow_data *flow;
pr_debug("atm_tc_walk(sch %p,[qdisc %p],walker %p)\n", sch, p, walker);
if (walker->stop)
return;
list_for_each_entry(flow, &p->flows, list) {
if (walker->count >= walker->skip &&
walker->fn(sch, (unsigned long)flow, walker) < 0) {
walker->stop = 1;
break;
}
walker->count++;
}
}
static struct tcf_block *atm_tc_tcf_block(struct Qdisc *sch, unsigned long cl,
struct netlink_ext_ack *extack)
{
struct atm_qdisc_data *p = qdisc_priv(sch);
struct atm_flow_data *flow = (struct atm_flow_data *)cl;
pr_debug("atm_tc_find_tcf(sch %p,[qdisc %p],flow %p)\n", sch, p, flow);
return flow ? flow->block : p->link.block;
}
/* --------------------------- Qdisc operations ---------------------------- */
static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
struct atm_qdisc_data *p = qdisc_priv(sch);
struct atm_flow_data *flow;
struct tcf_result res;
int result;
int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
pr_debug("atm_tc_enqueue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p);
result = TC_ACT_OK; /* be nice to gcc */
flow = NULL;
if (TC_H_MAJ(skb->priority) != sch->handle ||
!(flow = (struct atm_flow_data *)atm_tc_find(sch, skb->priority))) {
struct tcf_proto *fl;
list_for_each_entry(flow, &p->flows, list) {
fl = rcu_dereference_bh(flow->filter_list);
if (fl) {
result = tcf_classify(skb, NULL, fl, &res, true);
if (result < 0)
continue;
flow = (struct atm_flow_data *)res.class;
if (!flow)
flow = lookup_flow(sch, res.classid);
goto done;
}
}
flow = NULL;
done:
;
}
if (!flow) {
flow = &p->link;
} else {
if (flow->vcc)
ATM_SKB(skb)->atm_options = flow->vcc->atm_options;
/*@@@ looks good ... but it's not supposed to work :-) */
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
case TC_ACT_QUEUED:
case TC_ACT_STOLEN:
case TC_ACT_TRAP:
__qdisc_drop(skb, to_free);
return NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
case TC_ACT_SHOT:
__qdisc_drop(skb, to_free);
goto drop;
case TC_ACT_RECLASSIFY:
if (flow->excess)
flow = flow->excess;
else
ATM_SKB(skb)->atm_options |= ATM_ATMOPT_CLP;
break;
}
#endif
}
ret = qdisc_enqueue(skb, flow->q, to_free);
if (ret != NET_XMIT_SUCCESS) {
drop: __maybe_unused
if (net_xmit_drop_count(ret)) {
qdisc_qstats_drop(sch);
if (flow)
flow->qstats.drops++;
}
return ret;
}
/*
* Okay, this may seem weird. We pretend we've dropped the packet if
* it goes via ATM. The reason for this is that the outer qdisc
* expects to be able to q->dequeue the packet later on if we return
* success at this place. Also, sch->q.qdisc needs to reflect whether
* there is a packet egligible for dequeuing or not. Note that the
* statistics of the outer qdisc are necessarily wrong because of all
* this. There's currently no correct solution for this.
*/
if (flow == &p->link) {
sch->q.qlen++;
return NET_XMIT_SUCCESS;
}
tasklet_schedule(&p->task);
return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
}
/*
* Dequeue packets and send them over ATM. Note that we quite deliberately
* avoid checking net_device's flow control here, simply because sch_atm
* uses its own channels, which have nothing to do with any CLIP/LANE/or
* non-ATM interfaces.
*/
static void sch_atm_dequeue(struct tasklet_struct *t)
{
struct atm_qdisc_data *p = from_tasklet(p, t, task);
struct Qdisc *sch = qdisc_from_priv(p);
struct atm_flow_data *flow;
struct sk_buff *skb;
pr_debug("sch_atm_dequeue(sch %p,[qdisc %p])\n", sch, p);
list_for_each_entry(flow, &p->flows, list) {
if (flow == &p->link)
continue;
/*
* If traffic is properly shaped, this won't generate nasty
* little bursts. Otherwise, it may ... (but that's okay)
*/
while ((skb = flow->q->ops->peek(flow->q))) {
if (!atm_may_send(flow->vcc, skb->truesize))
break;
skb = qdisc_dequeue_peeked(flow->q);
if (unlikely(!skb))
break;
qdisc_bstats_update(sch, skb);
bstats_update(&flow->bstats, skb);
pr_debug("atm_tc_dequeue: sending on class %p\n", flow);
/* remove any LL header somebody else has attached */
skb_pull(skb, skb_network_offset(skb));
if (skb_headroom(skb) < flow->hdr_len) {
struct sk_buff *new;
new = skb_realloc_headroom(skb, flow->hdr_len);
dev_kfree_skb(skb);
if (!new)
continue;
skb = new;
}
pr_debug("sch_atm_dequeue: ip %p, data %p\n",
skb_network_header(skb), skb->data);
ATM_SKB(skb)->vcc = flow->vcc;
memcpy(skb_push(skb, flow->hdr_len), flow->hdr,
flow->hdr_len);
refcount_add(skb->truesize,
&sk_atm(flow->vcc)->sk_wmem_alloc);
/* atm.atm_options are already set by atm_tc_enqueue */
flow->vcc->send(flow->vcc, skb);
}
}
}
static struct sk_buff *atm_tc_dequeue(struct Qdisc *sch)
{
struct atm_qdisc_data *p = qdisc_priv(sch);
struct sk_buff *skb;
pr_debug("atm_tc_dequeue(sch %p,[qdisc %p])\n", sch, p);
tasklet_schedule(&p->task);
skb = qdisc_dequeue_peeked(p->link.q);
if (skb)
sch->q.qlen--;
return skb;
}
static struct sk_buff *atm_tc_peek(struct Qdisc *sch)
{
struct atm_qdisc_data *p = qdisc_priv(sch);
pr_debug("atm_tc_peek(sch %p,[qdisc %p])\n", sch, p);
return p->link.q->ops->peek(p->link.q);
}
static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt,
struct netlink_ext_ack *extack)
{
struct atm_qdisc_data *p = qdisc_priv(sch);
int err;
pr_debug("atm_tc_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt);
INIT_LIST_HEAD(&p->flows);
INIT_LIST_HEAD(&p->link.list);
gnet_stats_basic_sync_init(&p->link.bstats);
list_add(&p->link.list, &p->flows);
p->link.q = qdisc_create_dflt(sch->dev_queue,
&pfifo_qdisc_ops, sch->handle, extack);
if (!p->link.q)
p->link.q = &noop_qdisc;
pr_debug("atm_tc_init: link (%p) qdisc %p\n", &p->link, p->link.q);
p->link.vcc = NULL;
p->link.sock = NULL;
p->link.common.classid = sch->handle;
p->link.ref = 1;
err = tcf_block_get(&p->link.block, &p->link.filter_list, sch,
extack);
if (err)
return err;
tasklet_setup(&p->task, sch_atm_dequeue);
return 0;
}
static void atm_tc_reset(struct Qdisc *sch)
{
struct atm_qdisc_data *p = qdisc_priv(sch);
struct atm_flow_data *flow;
pr_debug("atm_tc_reset(sch %p,[qdisc %p])\n", sch, p);
list_for_each_entry(flow, &p->flows, list)
qdisc_reset(flow->q);
sch->q.qlen = 0;
}
static void atm_tc_destroy(struct Qdisc *sch)
{
struct atm_qdisc_data *p = qdisc_priv(sch);
struct atm_flow_data *flow, *tmp;
pr_debug("atm_tc_destroy(sch %p,[qdisc %p])\n", sch, p);
list_for_each_entry(flow, &p->flows, list) {
tcf_block_put(flow->block);
flow->block = NULL;
}
list_for_each_entry_safe(flow, tmp, &p->flows, list) {
if (flow->ref > 1)
pr_err("atm_destroy: %p->ref = %d\n", flow, flow->ref);
atm_tc_put(sch, (unsigned long)flow);
}
tasklet_kill(&p->task);
}
static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl,
struct sk_buff *skb, struct tcmsg *tcm)
{
struct atm_qdisc_data *p = qdisc_priv(sch);
struct atm_flow_data *flow = (struct atm_flow_data *)cl;
struct nlattr *nest;
pr_debug("atm_tc_dump_class(sch %p,[qdisc %p],flow %p,skb %p,tcm %p)\n",
sch, p, flow, skb, tcm);
if (list_empty(&flow->list))
return -EINVAL;
tcm->tcm_handle = flow->common.classid;
tcm->tcm_info = flow->q->handle;
nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
if (nest == NULL)
goto nla_put_failure;
if (nla_put(skb, TCA_ATM_HDR, flow->hdr_len, flow->hdr))
goto nla_put_failure;
if (flow->vcc) {
struct sockaddr_atmpvc pvc;
int state;
memset(&pvc, 0, sizeof(pvc));
pvc.sap_family = AF_ATMPVC;
pvc.sap_addr.itf = flow->vcc->dev ? flow->vcc->dev->number : -1;
pvc.sap_addr.vpi = flow->vcc->vpi;
pvc.sap_addr.vci = flow->vcc->vci;
if (nla_put(skb, TCA_ATM_ADDR, sizeof(pvc), &pvc))
goto nla_put_failure;
state = ATM_VF2VS(flow->vcc->flags);
if (nla_put_u32(skb, TCA_ATM_STATE, state))
goto nla_put_failure;
}
if (flow->excess) {
if (nla_put_u32(skb, TCA_ATM_EXCESS, flow->common.classid))
goto nla_put_failure;
} else {
if (nla_put_u32(skb, TCA_ATM_EXCESS, 0))
goto nla_put_failure;
}
return nla_nest_end(skb, nest);
nla_put_failure:
nla_nest_cancel(skb, nest);
return -1;
}
static int
atm_tc_dump_class_stats(struct Qdisc *sch, unsigned long arg,
struct gnet_dump *d)
{
struct atm_flow_data *flow = (struct atm_flow_data *)arg;
net: sched: Remove Qdisc::running sequence counter The Qdisc::running sequence counter has two uses: 1. Reliably reading qdisc's tc statistics while the qdisc is running (a seqcount read/retry loop at gnet_stats_add_basic()). 2. As a flag, indicating whether the qdisc in question is running (without any retry loops). For the first usage, the Qdisc::running sequence counter write section, qdisc_run_begin() => qdisc_run_end(), covers a much wider area than what is actually needed: the raw qdisc's bstats update. A u64_stats sync point was thus introduced (in previous commits) inside the bstats structure itself. A local u64_stats write section is then started and stopped for the bstats updates. Use that u64_stats sync point mechanism for the bstats read/retry loop at gnet_stats_add_basic(). For the second qdisc->running usage, a __QDISC_STATE_RUNNING bit flag, accessed with atomic bitops, is sufficient. Using a bit flag instead of a sequence counter at qdisc_run_begin/end() and qdisc_is_running() leads to the SMP barriers implicitly added through raw_read_seqcount() and write_seqcount_begin/end() getting removed. All call sites have been surveyed though, and no required ordering was identified. Now that the qdisc->running sequence counter is no longer used, remove it. Note, using u64_stats implies no sequence counter protection for 64-bit architectures. This can lead to the qdisc tc statistics "packets" vs. "bytes" values getting out of sync on rare occasions. The individual values will still be valid. Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Signed-off-by: David S. Miller <davem@davemloft.net>
2021-10-16 08:49:10 +00:00
if (gnet_stats_copy_basic(d, NULL, &flow->bstats, true) < 0 ||
gnet_stats_copy_queue(d, NULL, &flow->qstats, flow->q->q.qlen) < 0)
return -1;
return 0;
}
static int atm_tc_dump(struct Qdisc *sch, struct sk_buff *skb)
{
return 0;
}
static const struct Qdisc_class_ops atm_class_ops = {
.graft = atm_tc_graft,
.leaf = atm_tc_leaf,
.find = atm_tc_find,
.change = atm_tc_change,
.delete = atm_tc_delete,
.walk = atm_tc_walk,
.tcf_block = atm_tc_tcf_block,
.bind_tcf = atm_tc_bind_filter,
.unbind_tcf = atm_tc_put,
.dump = atm_tc_dump_class,
.dump_stats = atm_tc_dump_class_stats,
};
static struct Qdisc_ops atm_qdisc_ops __read_mostly = {
.cl_ops = &atm_class_ops,
.id = "atm",
.priv_size = sizeof(struct atm_qdisc_data),
.enqueue = atm_tc_enqueue,
.dequeue = atm_tc_dequeue,
.peek = atm_tc_peek,
.init = atm_tc_init,
.reset = atm_tc_reset,
.destroy = atm_tc_destroy,
.dump = atm_tc_dump,
.owner = THIS_MODULE,
};
static int __init atm_init(void)
{
return register_qdisc(&atm_qdisc_ops);
}
static void __exit atm_exit(void)
{
unregister_qdisc(&atm_qdisc_ops);
}
module_init(atm_init)
module_exit(atm_exit)
MODULE_LICENSE("GPL");