Merge branch 'TC-refactor-act_mirred-packets-re-injection'
Paolo Abeni says: ==================== TC: refactor act_mirred packets re-injection This series is aimed at improving the act_mirred redirect performances. Such action is used by OVS to represent TC S/W flows, and it's current largest bottle-neck is the need for a skb_clone() for each packet. The first 2 patches introduce some cleanup and safeguards to allow extending tca_result - we will use it to store RCU protected redirect information - and introduce a clear separation between user-space accessible tcfa_action values and internal values accessible only by the kernel. Then a new tcfa_action value is introduced: TC_ACT_REINJECT, similar to TC_ACT_REDIRECT, but preserving the mirred semantic. Such value is not accessible from user-space. The last patch exploits the newly introduced infrastructure in the act_mirred action, to avoid a skb_clone, when possible. Overall this the above gives a ~10% performance improvement in forwarding tput, when using the TC S/W datapath. v1 -> v2: - preserve the rcu lock in act_bpf - add and use a new action value to reinject the packets, preserving the mirred semantic v2 -> v3: - renamed to new action as TC_ACT_REINJECT - TC_ACT_REINJECT is not exposed to user-space v3 -> v4: - dropped the TC_ACT_REDIRECT patch - report failure via extack, too - rename the new action as TC_ACT_REINSERT - skip clone only if the control action don't touch tcf_result v4 -> v5: - fix a couple of build issue reported by kbuild bot - dont split messages ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
8f3f6500c7
@ -85,7 +85,7 @@ struct tc_action_ops {
|
||||
size_t size;
|
||||
struct module *owner;
|
||||
int (*act)(struct sk_buff *, const struct tc_action *,
|
||||
struct tcf_result *);
|
||||
struct tcf_result *); /* called under RCU BH lock*/
|
||||
int (*dump)(struct sk_buff *, struct tc_action *, int, int);
|
||||
void (*cleanup)(struct tc_action *);
|
||||
int (*lookup)(struct net *net, struct tc_action **a, u32 index,
|
||||
|
@ -7,6 +7,9 @@
|
||||
#include <net/sch_generic.h>
|
||||
#include <net/act_api.h>
|
||||
|
||||
/* TC action not accessible from user space */
|
||||
#define TC_ACT_REINSERT (TC_ACT_VALUE_MAX + 1)
|
||||
|
||||
/* Basic packet classifier frontend definitions. */
|
||||
|
||||
struct tcf_walker {
|
||||
|
@ -235,6 +235,12 @@ struct tcf_result {
|
||||
u32 classid;
|
||||
};
|
||||
const struct tcf_proto *goto_tp;
|
||||
|
||||
/* used by the TC_ACT_REINSERT action */
|
||||
struct {
|
||||
bool ingress;
|
||||
struct gnet_stats_queue *qstats;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
@ -285,6 +291,8 @@ struct tcf_proto {
|
||||
/* Fast access part */
|
||||
struct tcf_proto __rcu *next;
|
||||
void __rcu *root;
|
||||
|
||||
/* called under RCU BH lock*/
|
||||
int (*classify)(struct sk_buff *,
|
||||
const struct tcf_proto *,
|
||||
struct tcf_result *);
|
||||
@ -567,6 +575,15 @@ static inline void skb_reset_tc(struct sk_buff *skb)
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline bool skb_is_tc_redirected(const struct sk_buff *skb)
|
||||
{
|
||||
#ifdef CONFIG_NET_CLS_ACT
|
||||
return skb->tc_redirected;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline bool skb_at_tc_ingress(const struct sk_buff *skb)
|
||||
{
|
||||
#ifdef CONFIG_NET_CLS_ACT
|
||||
@ -1106,4 +1123,17 @@ void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp,
|
||||
void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc,
|
||||
struct mini_Qdisc __rcu **p_miniq);
|
||||
|
||||
static inline void skb_tc_reinsert(struct sk_buff *skb, struct tcf_result *res)
|
||||
{
|
||||
struct gnet_stats_queue *stats = res->qstats;
|
||||
int ret;
|
||||
|
||||
if (res->ingress)
|
||||
ret = netif_receive_skb(skb);
|
||||
else
|
||||
ret = dev_queue_xmit(skb);
|
||||
if (ret && stats)
|
||||
qstats_overlimit_inc(res->qstats);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -45,6 +45,7 @@ enum {
|
||||
* the skb and act like everything
|
||||
* is alright.
|
||||
*/
|
||||
#define TC_ACT_VALUE_MAX TC_ACT_TRAP
|
||||
|
||||
/* There is a special kind of actions called "extended actions",
|
||||
* which need a value parameter. These have a local opcode located in
|
||||
@ -55,11 +56,12 @@ enum {
|
||||
#define __TC_ACT_EXT_SHIFT 28
|
||||
#define __TC_ACT_EXT(local) ((local) << __TC_ACT_EXT_SHIFT)
|
||||
#define TC_ACT_EXT_VAL_MASK ((1 << __TC_ACT_EXT_SHIFT) - 1)
|
||||
#define TC_ACT_EXT_CMP(combined, opcode) \
|
||||
(((combined) & (~TC_ACT_EXT_VAL_MASK)) == opcode)
|
||||
#define TC_ACT_EXT_OPCODE(combined) ((combined) & (~TC_ACT_EXT_VAL_MASK))
|
||||
#define TC_ACT_EXT_CMP(combined, opcode) (TC_ACT_EXT_OPCODE(combined) == opcode)
|
||||
|
||||
#define TC_ACT_JUMP __TC_ACT_EXT(1)
|
||||
#define TC_ACT_GOTO_CHAIN __TC_ACT_EXT(2)
|
||||
#define TC_ACT_EXT_OPCODE_MAX TC_ACT_GOTO_CHAIN
|
||||
|
||||
/* Action type identifiers*/
|
||||
enum {
|
||||
|
@ -4252,7 +4252,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
|
||||
/* Reinjected packets coming from act_mirred or similar should
|
||||
* not get XDP generic processing.
|
||||
*/
|
||||
if (skb_cloned(skb))
|
||||
if (skb_cloned(skb) || skb_is_tc_redirected(skb))
|
||||
return XDP_PASS;
|
||||
|
||||
/* XDP packets must be linear and must have sufficient headroom
|
||||
@ -4602,6 +4602,10 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
|
||||
__skb_push(skb, skb->mac_len);
|
||||
skb_do_redirect(skb);
|
||||
return NULL;
|
||||
case TC_ACT_REINSERT:
|
||||
/* this does not scrub the packet, and updates stats on error */
|
||||
skb_tc_reinsert(skb, &cl_res);
|
||||
return NULL;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -786,6 +786,15 @@ static struct tc_cookie *nla_memdup_cookie(struct nlattr **tb)
|
||||
return c;
|
||||
}
|
||||
|
||||
static bool tcf_action_valid(int action)
|
||||
{
|
||||
int opcode = TC_ACT_EXT_OPCODE(action);
|
||||
|
||||
if (!opcode)
|
||||
return action <= TC_ACT_VALUE_MAX;
|
||||
return opcode <= TC_ACT_EXT_OPCODE_MAX || action == TC_ACT_UNSPEC;
|
||||
}
|
||||
|
||||
struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
|
||||
struct nlattr *nla, struct nlattr *est,
|
||||
char *name, int ovr, int bind,
|
||||
@ -895,6 +904,11 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
|
||||
}
|
||||
}
|
||||
|
||||
if (!tcf_action_valid(a->tcfa_action)) {
|
||||
NL_SET_ERR_MSG(extack, "invalid action value, using TC_ACT_UNSPEC instead");
|
||||
a->tcfa_action = TC_ACT_UNSPEC;
|
||||
}
|
||||
|
||||
return a;
|
||||
|
||||
err_mod:
|
||||
|
@ -561,15 +561,14 @@ static int tcf_csum(struct sk_buff *skb, const struct tc_action *a,
|
||||
u32 update_flags;
|
||||
int action;
|
||||
|
||||
rcu_read_lock();
|
||||
params = rcu_dereference(p->params);
|
||||
params = rcu_dereference_bh(p->params);
|
||||
|
||||
tcf_lastuse_update(&p->tcf_tm);
|
||||
bstats_cpu_update(this_cpu_ptr(p->common.cpu_bstats), skb);
|
||||
|
||||
action = READ_ONCE(p->tcf_action);
|
||||
if (unlikely(action == TC_ACT_SHOT))
|
||||
goto drop_stats;
|
||||
goto drop;
|
||||
|
||||
update_flags = params->update_flags;
|
||||
switch (tc_skb_protocol(skb)) {
|
||||
@ -583,16 +582,11 @@ static int tcf_csum(struct sk_buff *skb, const struct tc_action *a,
|
||||
break;
|
||||
}
|
||||
|
||||
unlock:
|
||||
rcu_read_unlock();
|
||||
return action;
|
||||
|
||||
drop:
|
||||
action = TC_ACT_SHOT;
|
||||
|
||||
drop_stats:
|
||||
qstats_drop_inc(this_cpu_ptr(p->common.cpu_qstats));
|
||||
goto unlock;
|
||||
return TC_ACT_SHOT;
|
||||
}
|
||||
|
||||
static int tcf_csum_dump(struct sk_buff *skb, struct tc_action *a, int bind,
|
||||
|
@ -820,14 +820,11 @@ static int tcf_ife_act(struct sk_buff *skb, const struct tc_action *a,
|
||||
struct tcf_ife_params *p;
|
||||
int ret;
|
||||
|
||||
rcu_read_lock();
|
||||
p = rcu_dereference(ife->params);
|
||||
p = rcu_dereference_bh(ife->params);
|
||||
if (p->flags & IFE_ENCODE) {
|
||||
ret = tcf_ife_encode(skb, a, res, p);
|
||||
rcu_read_unlock();
|
||||
return ret;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
return tcf_ife_decode(skb, a, res);
|
||||
}
|
||||
|
@ -25,6 +25,7 @@
|
||||
#include <net/net_namespace.h>
|
||||
#include <net/netlink.h>
|
||||
#include <net/pkt_sched.h>
|
||||
#include <net/pkt_cls.h>
|
||||
#include <linux/tc_act/tc_mirred.h>
|
||||
#include <net/tc_act/tc_mirred.h>
|
||||
|
||||
@ -49,6 +50,18 @@ static bool tcf_mirred_act_wants_ingress(int action)
|
||||
}
|
||||
}
|
||||
|
||||
static bool tcf_mirred_can_reinsert(int action)
|
||||
{
|
||||
switch (action) {
|
||||
case TC_ACT_SHOT:
|
||||
case TC_ACT_STOLEN:
|
||||
case TC_ACT_QUEUED:
|
||||
case TC_ACT_TRAP:
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static void tcf_mirred_release(struct tc_action *a)
|
||||
{
|
||||
struct tcf_mirred *m = to_mirred(a);
|
||||
@ -171,21 +184,23 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
|
||||
struct tcf_result *res)
|
||||
{
|
||||
struct tcf_mirred *m = to_mirred(a);
|
||||
struct sk_buff *skb2 = skb;
|
||||
bool m_mac_header_xmit;
|
||||
struct net_device *dev;
|
||||
struct sk_buff *skb2;
|
||||
int retval, err = 0;
|
||||
bool use_reinsert;
|
||||
bool want_ingress;
|
||||
bool is_redirect;
|
||||
int m_eaction;
|
||||
int mac_len;
|
||||
|
||||
tcf_lastuse_update(&m->tcf_tm);
|
||||
bstats_cpu_update(this_cpu_ptr(m->common.cpu_bstats), skb);
|
||||
|
||||
rcu_read_lock();
|
||||
m_mac_header_xmit = READ_ONCE(m->tcfm_mac_header_xmit);
|
||||
m_eaction = READ_ONCE(m->tcfm_eaction);
|
||||
retval = READ_ONCE(m->tcf_action);
|
||||
dev = rcu_dereference(m->tcfm_dev);
|
||||
dev = rcu_dereference_bh(m->tcfm_dev);
|
||||
if (unlikely(!dev)) {
|
||||
pr_notice_once("tc mirred: target device is gone\n");
|
||||
goto out;
|
||||
@ -197,16 +212,25 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
|
||||
goto out;
|
||||
}
|
||||
|
||||
skb2 = skb_clone(skb, GFP_ATOMIC);
|
||||
if (!skb2)
|
||||
goto out;
|
||||
/* we could easily avoid the clone only if called by ingress and clsact;
|
||||
* since we can't easily detect the clsact caller, skip clone only for
|
||||
* ingress - that covers the TC S/W datapath.
|
||||
*/
|
||||
is_redirect = tcf_mirred_is_act_redirect(m_eaction);
|
||||
use_reinsert = skb_at_tc_ingress(skb) && is_redirect &&
|
||||
tcf_mirred_can_reinsert(retval);
|
||||
if (!use_reinsert) {
|
||||
skb2 = skb_clone(skb, GFP_ATOMIC);
|
||||
if (!skb2)
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* If action's target direction differs than filter's direction,
|
||||
* and devices expect a mac header on xmit, then mac push/pull is
|
||||
* needed.
|
||||
*/
|
||||
if (skb_at_tc_ingress(skb) != tcf_mirred_act_wants_ingress(m_eaction) &&
|
||||
m_mac_header_xmit) {
|
||||
want_ingress = tcf_mirred_act_wants_ingress(m_eaction);
|
||||
if (skb_at_tc_ingress(skb) != want_ingress && m_mac_header_xmit) {
|
||||
if (!skb_at_tc_ingress(skb)) {
|
||||
/* caught at egress, act ingress: pull mac */
|
||||
mac_len = skb_network_header(skb) - skb_mac_header(skb);
|
||||
@ -217,15 +241,23 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
|
||||
}
|
||||
}
|
||||
|
||||
/* mirror is always swallowed */
|
||||
if (tcf_mirred_is_act_redirect(m_eaction)) {
|
||||
skb2->tc_redirected = 1;
|
||||
skb2->tc_from_ingress = skb2->tc_at_ingress;
|
||||
}
|
||||
|
||||
skb2->skb_iif = skb->dev->ifindex;
|
||||
skb2->dev = dev;
|
||||
if (!tcf_mirred_act_wants_ingress(m_eaction))
|
||||
|
||||
/* mirror is always swallowed */
|
||||
if (is_redirect) {
|
||||
skb2->tc_redirected = 1;
|
||||
skb2->tc_from_ingress = skb2->tc_at_ingress;
|
||||
|
||||
/* let's the caller reinsert the packet, if possible */
|
||||
if (use_reinsert) {
|
||||
res->ingress = want_ingress;
|
||||
res->qstats = this_cpu_ptr(m->common.cpu_qstats);
|
||||
return TC_ACT_REINSERT;
|
||||
}
|
||||
}
|
||||
|
||||
if (!want_ingress)
|
||||
err = dev_queue_xmit(skb2);
|
||||
else
|
||||
err = netif_receive_skb(skb2);
|
||||
@ -236,7 +268,6 @@ out:
|
||||
if (tcf_mirred_is_act_redirect(m_eaction))
|
||||
retval = TC_ACT_SHOT;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
@ -140,8 +140,7 @@ static int tcf_sample_act(struct sk_buff *skb, const struct tc_action *a,
|
||||
bstats_cpu_update(this_cpu_ptr(s->common.cpu_bstats), skb);
|
||||
retval = READ_ONCE(s->tcf_action);
|
||||
|
||||
rcu_read_lock();
|
||||
psample_group = rcu_dereference(s->psample_group);
|
||||
psample_group = rcu_dereference_bh(s->psample_group);
|
||||
|
||||
/* randomly sample packets according to rate */
|
||||
if (psample_group && (prandom_u32() % s->rate == 0)) {
|
||||
@ -165,7 +164,6 @@ static int tcf_sample_act(struct sk_buff *skb, const struct tc_action *a,
|
||||
skb_pull(skb, skb->mac_len);
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
return retval;
|
||||
}
|
||||
|
||||
|
@ -43,8 +43,7 @@ static int tcf_skbedit(struct sk_buff *skb, const struct tc_action *a,
|
||||
tcf_lastuse_update(&d->tcf_tm);
|
||||
bstats_cpu_update(this_cpu_ptr(d->common.cpu_bstats), skb);
|
||||
|
||||
rcu_read_lock();
|
||||
params = rcu_dereference(d->params);
|
||||
params = rcu_dereference_bh(d->params);
|
||||
action = READ_ONCE(d->tcf_action);
|
||||
|
||||
if (params->flags & SKBEDIT_F_PRIORITY)
|
||||
@ -77,14 +76,11 @@ static int tcf_skbedit(struct sk_buff *skb, const struct tc_action *a,
|
||||
}
|
||||
if (params->flags & SKBEDIT_F_PTYPE)
|
||||
skb->pkt_type = params->ptype;
|
||||
|
||||
unlock:
|
||||
rcu_read_unlock();
|
||||
return action;
|
||||
|
||||
err:
|
||||
qstats_drop_inc(this_cpu_ptr(d->common.cpu_qstats));
|
||||
action = TC_ACT_SHOT;
|
||||
goto unlock;
|
||||
return TC_ACT_SHOT;
|
||||
}
|
||||
|
||||
static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = {
|
||||
|
@ -41,20 +41,14 @@ static int tcf_skbmod_run(struct sk_buff *skb, const struct tc_action *a,
|
||||
* then MAX_EDIT_LEN needs to change appropriately
|
||||
*/
|
||||
err = skb_ensure_writable(skb, MAX_EDIT_LEN);
|
||||
if (unlikely(err)) { /* best policy is to drop on the floor */
|
||||
qstats_overlimit_inc(this_cpu_ptr(d->common.cpu_qstats));
|
||||
return TC_ACT_SHOT;
|
||||
}
|
||||
if (unlikely(err)) /* best policy is to drop on the floor */
|
||||
goto drop;
|
||||
|
||||
rcu_read_lock();
|
||||
action = READ_ONCE(d->tcf_action);
|
||||
if (unlikely(action == TC_ACT_SHOT)) {
|
||||
qstats_overlimit_inc(this_cpu_ptr(d->common.cpu_qstats));
|
||||
rcu_read_unlock();
|
||||
return action;
|
||||
}
|
||||
if (unlikely(action == TC_ACT_SHOT))
|
||||
goto drop;
|
||||
|
||||
p = rcu_dereference(d->skbmod_p);
|
||||
p = rcu_dereference_bh(d->skbmod_p);
|
||||
flags = p->flags;
|
||||
if (flags & SKBMOD_F_DMAC)
|
||||
ether_addr_copy(eth_hdr(skb)->h_dest, p->eth_dst);
|
||||
@ -62,7 +56,6 @@ static int tcf_skbmod_run(struct sk_buff *skb, const struct tc_action *a,
|
||||
ether_addr_copy(eth_hdr(skb)->h_source, p->eth_src);
|
||||
if (flags & SKBMOD_F_ETYPE)
|
||||
eth_hdr(skb)->h_proto = p->eth_type;
|
||||
rcu_read_unlock();
|
||||
|
||||
if (flags & SKBMOD_F_SWAPMAC) {
|
||||
u16 tmpaddr[ETH_ALEN / 2]; /* ether_addr_copy() requirement */
|
||||
@ -73,6 +66,10 @@ static int tcf_skbmod_run(struct sk_buff *skb, const struct tc_action *a,
|
||||
}
|
||||
|
||||
return action;
|
||||
|
||||
drop:
|
||||
qstats_overlimit_inc(this_cpu_ptr(d->common.cpu_qstats));
|
||||
return TC_ACT_SHOT;
|
||||
}
|
||||
|
||||
static const struct nla_policy skbmod_policy[TCA_SKBMOD_MAX + 1] = {
|
||||
|
@ -31,9 +31,7 @@ static int tunnel_key_act(struct sk_buff *skb, const struct tc_action *a,
|
||||
struct tcf_tunnel_key_params *params;
|
||||
int action;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
params = rcu_dereference(t->params);
|
||||
params = rcu_dereference_bh(t->params);
|
||||
|
||||
tcf_lastuse_update(&t->tcf_tm);
|
||||
bstats_cpu_update(this_cpu_ptr(t->common.cpu_bstats), skb);
|
||||
@ -53,8 +51,6 @@ static int tunnel_key_act(struct sk_buff *skb, const struct tc_action *a,
|
||||
break;
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
|
||||
return action;
|
||||
}
|
||||
|
||||
|
@ -40,11 +40,9 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a,
|
||||
if (skb_at_tc_ingress(skb))
|
||||
skb_push_rcsum(skb, skb->mac_len);
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
action = READ_ONCE(v->tcf_action);
|
||||
|
||||
p = rcu_dereference(v->vlan_p);
|
||||
p = rcu_dereference_bh(v->vlan_p);
|
||||
|
||||
switch (p->tcfv_action) {
|
||||
case TCA_VLAN_ACT_POP:
|
||||
@ -61,7 +59,7 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a,
|
||||
case TCA_VLAN_ACT_MODIFY:
|
||||
/* No-op if no vlan tag (either hw-accel or in-payload) */
|
||||
if (!skb_vlan_tagged(skb))
|
||||
goto unlock;
|
||||
goto out;
|
||||
/* extract existing tag (and guarantee no hw-accel tag) */
|
||||
if (skb_vlan_tag_present(skb)) {
|
||||
tci = skb_vlan_tag_get(skb);
|
||||
@ -86,18 +84,15 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a,
|
||||
BUG();
|
||||
}
|
||||
|
||||
goto unlock;
|
||||
|
||||
drop:
|
||||
action = TC_ACT_SHOT;
|
||||
qstats_drop_inc(this_cpu_ptr(v->common.cpu_qstats));
|
||||
|
||||
unlock:
|
||||
rcu_read_unlock();
|
||||
out:
|
||||
if (skb_at_tc_ingress(skb))
|
||||
skb_pull_rcsum(skb, skb->mac_len);
|
||||
|
||||
return action;
|
||||
|
||||
drop:
|
||||
qstats_drop_inc(this_cpu_ptr(v->common.cpu_qstats));
|
||||
return TC_ACT_SHOT;
|
||||
}
|
||||
|
||||
static const struct nla_policy vlan_policy[TCA_VLAN_MAX + 1] = {
|
||||
|
Loading…
Reference in New Issue
Block a user