Merge branch 'net-sched-fix-ct-zone-matching-for-invalid-conntrack-state'

Paul Blakey says:

====================
net/sched: Fix ct zone matching for invalid conntrack state

Currently, when a packet is marked as invalid conntrack_in in act_ct,
post_ct will be set, and connection info (nf_conn) will be removed
from the skb. Later openvswitch and flower matching will parse this
as ct_state=+trk+inv. But because the connection info is missing,
there is also no zone info to match against even though the packet
is tracked.

This series fixes that, by passing the last executed zone by act_ct.
The zone info is passed along from act_ct to the ct flow dissector
(used by flower to extract zone info) and to ovs, the same way as post_ct
is passed, via qdisc layer skb cb to dissector, and via skb extension
to OVS.

Since adding any more data to qdisc skb cb, there will be no room
for BPF skb cb to extend it and stay under skb->cb size, this series
moves the tc related info from within qdisc skb cb to a tc specific cb
that also extends it.
====================

Link: https://lore.kernel.org/r/20211214172435.24207-1-paulb@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski 2021-12-17 18:06:39 -08:00
commit 14193d57c8
10 changed files with 50 additions and 21 deletions

View File

@ -286,6 +286,7 @@ struct nf_bridge_info {
struct tc_skb_ext {
__u32 chain;
__u16 mru;
__u16 zone;
bool post_ct;
};
#endif
@ -1380,7 +1381,7 @@ skb_flow_dissect_ct(const struct sk_buff *skb,
struct flow_dissector *flow_dissector,
void *target_container,
u16 *ctinfo_map, size_t mapsize,
bool post_ct);
bool post_ct, u16 zone);
void
skb_flow_dissect_tunnel_info(const struct sk_buff *skb,
struct flow_dissector *flow_dissector,

View File

@ -193,4 +193,20 @@ static inline void skb_txtime_consumed(struct sk_buff *skb)
skb->tstamp = ktime_set(0, 0);
}
struct tc_skb_cb {
struct qdisc_skb_cb qdisc_cb;
u16 mru;
bool post_ct;
u16 zone; /* Only valid if post_ct = true */
};
static inline struct tc_skb_cb *tc_skb_cb(const struct sk_buff *skb)
{
struct tc_skb_cb *cb = (struct tc_skb_cb *)skb->cb;
BUILD_BUG_ON(sizeof(*cb) > sizeof_field(struct sk_buff, cb));
return cb;
}
#endif

View File

@ -447,8 +447,6 @@ struct qdisc_skb_cb {
};
#define QDISC_CB_PRIV_LEN 20
unsigned char data[QDISC_CB_PRIV_LEN];
u16 mru;
bool post_ct;
};
typedef void tcf_chain_head_change_t(struct tcf_proto *tp_head, void *priv);

View File

@ -3941,8 +3941,8 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
return skb;
/* qdisc_skb_cb(skb)->pkt_len was already set by the caller. */
qdisc_skb_cb(skb)->mru = 0;
qdisc_skb_cb(skb)->post_ct = false;
tc_skb_cb(skb)->mru = 0;
tc_skb_cb(skb)->post_ct = false;
mini_qdisc_bstats_cpu_update(miniq, skb);
switch (tcf_classify(skb, miniq->block, miniq->filter_list, &cl_res, false)) {
@ -5103,8 +5103,8 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
}
qdisc_skb_cb(skb)->pkt_len = skb->len;
qdisc_skb_cb(skb)->mru = 0;
qdisc_skb_cb(skb)->post_ct = false;
tc_skb_cb(skb)->mru = 0;
tc_skb_cb(skb)->post_ct = false;
skb->tc_at_ingress = 1;
mini_qdisc_bstats_cpu_update(miniq, skb);

View File

@ -238,7 +238,7 @@ void
skb_flow_dissect_ct(const struct sk_buff *skb,
struct flow_dissector *flow_dissector,
void *target_container, u16 *ctinfo_map,
size_t mapsize, bool post_ct)
size_t mapsize, bool post_ct, u16 zone)
{
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
struct flow_dissector_key_ct *key;
@ -260,6 +260,7 @@ skb_flow_dissect_ct(const struct sk_buff *skb,
if (!ct) {
key->ct_state = TCA_FLOWER_KEY_CT_FLAGS_TRACKED |
TCA_FLOWER_KEY_CT_FLAGS_INVALID;
key->ct_zone = zone;
return;
}

View File

@ -34,6 +34,7 @@
#include <net/mpls.h>
#include <net/ndisc.h>
#include <net/nsh.h>
#include <net/netfilter/nf_conntrack_zones.h>
#include "conntrack.h"
#include "datapath.h"
@ -860,6 +861,7 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
#endif
bool post_ct = false;
int res, err;
u16 zone = 0;
/* Extract metadata from packet. */
if (tun_info) {
@ -898,6 +900,7 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
key->recirc_id = tc_ext ? tc_ext->chain : 0;
OVS_CB(skb)->mru = tc_ext ? tc_ext->mru : 0;
post_ct = tc_ext ? tc_ext->post_ct : false;
zone = post_ct ? tc_ext->zone : 0;
} else {
key->recirc_id = 0;
}
@ -906,8 +909,11 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
#endif
err = key_extract(skb, key);
if (!err)
if (!err) {
ovs_ct_fill_key(skb, key, post_ct); /* Must be after key_extract(). */
if (post_ct && !skb_get_nfct(skb))
key->ct_zone = zone;
}
return err;
}

View File

@ -690,10 +690,10 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb,
u8 family, u16 zone, bool *defrag)
{
enum ip_conntrack_info ctinfo;
struct qdisc_skb_cb cb;
struct nf_conn *ct;
int err = 0;
bool frag;
u16 mru;
/* Previously seen (loopback)? Ignore. */
ct = nf_ct_get(skb, &ctinfo);
@ -708,7 +708,7 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb,
return err;
skb_get(skb);
cb = *qdisc_skb_cb(skb);
mru = tc_skb_cb(skb)->mru;
if (family == NFPROTO_IPV4) {
enum ip_defrag_users user = IP_DEFRAG_CONNTRACK_IN + zone;
@ -722,7 +722,7 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb,
if (!err) {
*defrag = true;
cb.mru = IPCB(skb)->frag_max_size;
mru = IPCB(skb)->frag_max_size;
}
} else { /* NFPROTO_IPV6 */
#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
@ -735,7 +735,7 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb,
if (!err) {
*defrag = true;
cb.mru = IP6CB(skb)->frag_max_size;
mru = IP6CB(skb)->frag_max_size;
}
#else
err = -EOPNOTSUPP;
@ -744,7 +744,7 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb,
}
if (err != -EINPROGRESS)
*qdisc_skb_cb(skb) = cb;
tc_skb_cb(skb)->mru = mru;
skb_clear_hash(skb);
skb->ignore_df = 1;
return err;
@ -963,7 +963,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
tcf_action_update_bstats(&c->common, skb);
if (clear) {
qdisc_skb_cb(skb)->post_ct = false;
tc_skb_cb(skb)->post_ct = false;
ct = nf_ct_get(skb, &ctinfo);
if (ct) {
nf_conntrack_put(&ct->ct_general);
@ -1048,7 +1048,8 @@ do_nat:
out_push:
skb_push_rcsum(skb, nh_ofs);
qdisc_skb_cb(skb)->post_ct = true;
tc_skb_cb(skb)->post_ct = true;
tc_skb_cb(skb)->zone = p->zone;
out_clear:
if (defrag)
qdisc_skb_cb(skb)->pkt_len = skb->len;

View File

@ -1617,12 +1617,15 @@ int tcf_classify(struct sk_buff *skb,
/* If we missed on some chain */
if (ret == TC_ACT_UNSPEC && last_executed_chain) {
struct tc_skb_cb *cb = tc_skb_cb(skb);
ext = tc_skb_ext_alloc(skb);
if (WARN_ON_ONCE(!ext))
return TC_ACT_SHOT;
ext->chain = last_executed_chain;
ext->mru = qdisc_skb_cb(skb)->mru;
ext->post_ct = qdisc_skb_cb(skb)->post_ct;
ext->mru = cb->mru;
ext->post_ct = cb->post_ct;
ext->zone = cb->zone;
}
return ret;

View File

@ -19,6 +19,7 @@
#include <net/sch_generic.h>
#include <net/pkt_cls.h>
#include <net/pkt_sched.h>
#include <net/ip.h>
#include <net/flow_dissector.h>
#include <net/geneve.h>
@ -309,7 +310,8 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res)
{
struct cls_fl_head *head = rcu_dereference_bh(tp->root);
bool post_ct = qdisc_skb_cb(skb)->post_ct;
bool post_ct = tc_skb_cb(skb)->post_ct;
u16 zone = tc_skb_cb(skb)->zone;
struct fl_flow_key skb_key;
struct fl_flow_mask *mask;
struct cls_fl_filter *f;
@ -327,7 +329,7 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
skb_flow_dissect_ct(skb, &mask->dissector, &skb_key,
fl_ct_info_to_flower_map,
ARRAY_SIZE(fl_ct_info_to_flower_map),
post_ct);
post_ct, zone);
skb_flow_dissect_hash(skb, &mask->dissector, &skb_key);
skb_flow_dissect(skb, &mask->dissector, &skb_key,
FLOW_DISSECTOR_F_STOP_BEFORE_ENCAP);

View File

@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
#include <net/netlink.h>
#include <net/sch_generic.h>
#include <net/pkt_sched.h>
#include <net/dst.h>
#include <net/ip.h>
#include <net/ip6_fib.h>
@ -137,7 +138,7 @@ err:
int sch_frag_xmit_hook(struct sk_buff *skb, int (*xmit)(struct sk_buff *skb))
{
u16 mru = qdisc_skb_cb(skb)->mru;
u16 mru = tc_skb_cb(skb)->mru;
int err;
if (mru && skb->len > mru + skb->dev->hard_header_len)