mirror of
https://github.com/torvalds/linux.git
synced 2025-01-01 15:51:46 +00:00
Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf
Pablo Neira Ayuso says: ==================== Netfilter fixes for net 1) Fix bogus compilter warning in nfnetlink_queue, from Florian Westphal. 2) Don't run conntrack on vrf with !dflt qdisc, from Nicolas Dichtel. 3) Fix nft_pipapo bucket load in AVX2 lookup routine for six 8-bit groups, from Stefano Brivio. 4) Break rule evaluation on malformed TCP options. 5) Use socat instead of nc in selftests/netfilter/nft_zones_many.sh, also from Florian 6) Fix KCSAN data-race in conntrack timeout updates, from Eric Dumazet. * git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf: netfilter: conntrack: annotate data-races around ct->timeout selftests: netfilter: switch zone stress to socat netfilter: nft_exthdr: break evaluation if setting TCP option fails selftests: netfilter: Add correctness test for mac,net set type nft_set_pipapo: Fix bucket load in AVX2 lookup routine for six 8-bit groups vrf: don't run conntrack on vrf with !dflt qdisc netfilter: nfnetlink_queue: silence bogus compiler warning ==================== Link: https://lore.kernel.org/r/20211209000847.102598-1-pablo@netfilter.org Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
commit
fd31cb0c6a
@ -770,8 +770,6 @@ static struct sk_buff *vrf_ip6_out_direct(struct net_device *vrf_dev,
|
||||
|
||||
skb->dev = vrf_dev;
|
||||
|
||||
vrf_nf_set_untracked(skb);
|
||||
|
||||
err = nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk,
|
||||
skb, NULL, vrf_dev, vrf_ip6_out_direct_finish);
|
||||
|
||||
@ -792,6 +790,8 @@ static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev,
|
||||
if (rt6_need_strict(&ipv6_hdr(skb)->daddr))
|
||||
return skb;
|
||||
|
||||
vrf_nf_set_untracked(skb);
|
||||
|
||||
if (qdisc_tx_is_default(vrf_dev) ||
|
||||
IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED)
|
||||
return vrf_ip6_out_direct(vrf_dev, sk, skb);
|
||||
@ -1000,8 +1000,6 @@ static struct sk_buff *vrf_ip_out_direct(struct net_device *vrf_dev,
|
||||
|
||||
skb->dev = vrf_dev;
|
||||
|
||||
vrf_nf_set_untracked(skb);
|
||||
|
||||
err = nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, net, sk,
|
||||
skb, NULL, vrf_dev, vrf_ip_out_direct_finish);
|
||||
|
||||
@ -1023,6 +1021,8 @@ static struct sk_buff *vrf_ip_out(struct net_device *vrf_dev,
|
||||
ipv4_is_lbcast(ip_hdr(skb)->daddr))
|
||||
return skb;
|
||||
|
||||
vrf_nf_set_untracked(skb);
|
||||
|
||||
if (qdisc_tx_is_default(vrf_dev) ||
|
||||
IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED)
|
||||
return vrf_ip_out_direct(vrf_dev, sk, skb);
|
||||
|
@ -276,14 +276,14 @@ static inline bool nf_is_loopback_packet(const struct sk_buff *skb)
|
||||
/* jiffies until ct expires, 0 if already expired */
|
||||
static inline unsigned long nf_ct_expires(const struct nf_conn *ct)
|
||||
{
|
||||
s32 timeout = ct->timeout - nfct_time_stamp;
|
||||
s32 timeout = READ_ONCE(ct->timeout) - nfct_time_stamp;
|
||||
|
||||
return timeout > 0 ? timeout : 0;
|
||||
}
|
||||
|
||||
static inline bool nf_ct_is_expired(const struct nf_conn *ct)
|
||||
{
|
||||
return (__s32)(ct->timeout - nfct_time_stamp) <= 0;
|
||||
return (__s32)(READ_ONCE(ct->timeout) - nfct_time_stamp) <= 0;
|
||||
}
|
||||
|
||||
/* use after obtaining a reference count */
|
||||
@ -302,7 +302,7 @@ static inline bool nf_ct_should_gc(const struct nf_conn *ct)
|
||||
static inline void nf_ct_offload_timeout(struct nf_conn *ct)
|
||||
{
|
||||
if (nf_ct_expires(ct) < NF_CT_DAY / 2)
|
||||
ct->timeout = nfct_time_stamp + NF_CT_DAY;
|
||||
WRITE_ONCE(ct->timeout, nfct_time_stamp + NF_CT_DAY);
|
||||
}
|
||||
|
||||
struct kernel_param;
|
||||
|
@ -684,7 +684,7 @@ bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report)
|
||||
|
||||
tstamp = nf_conn_tstamp_find(ct);
|
||||
if (tstamp) {
|
||||
s32 timeout = ct->timeout - nfct_time_stamp;
|
||||
s32 timeout = READ_ONCE(ct->timeout) - nfct_time_stamp;
|
||||
|
||||
tstamp->stop = ktime_get_real_ns();
|
||||
if (timeout < 0)
|
||||
@ -1036,7 +1036,7 @@ static int nf_ct_resolve_clash_harder(struct sk_buff *skb, u32 repl_idx)
|
||||
}
|
||||
|
||||
/* We want the clashing entry to go away real soon: 1 second timeout. */
|
||||
loser_ct->timeout = nfct_time_stamp + HZ;
|
||||
WRITE_ONCE(loser_ct->timeout, nfct_time_stamp + HZ);
|
||||
|
||||
/* IPS_NAT_CLASH removes the entry automatically on the first
|
||||
* reply. Also prevents UDP tracker from moving the entry to
|
||||
@ -1560,7 +1560,7 @@ __nf_conntrack_alloc(struct net *net,
|
||||
/* save hash for reusing when confirming */
|
||||
*(unsigned long *)(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev) = hash;
|
||||
ct->status = 0;
|
||||
ct->timeout = 0;
|
||||
WRITE_ONCE(ct->timeout, 0);
|
||||
write_pnet(&ct->ct_net, net);
|
||||
memset(&ct->__nfct_init_offset, 0,
|
||||
offsetof(struct nf_conn, proto) -
|
||||
|
@ -1998,7 +1998,7 @@ static int ctnetlink_change_timeout(struct nf_conn *ct,
|
||||
|
||||
if (timeout > INT_MAX)
|
||||
timeout = INT_MAX;
|
||||
ct->timeout = nfct_time_stamp + (u32)timeout;
|
||||
WRITE_ONCE(ct->timeout, nfct_time_stamp + (u32)timeout);
|
||||
|
||||
if (test_bit(IPS_DYING_BIT, &ct->status))
|
||||
return -ETIME;
|
||||
|
@ -201,8 +201,8 @@ static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
|
||||
if (timeout < 0)
|
||||
timeout = 0;
|
||||
|
||||
if (nf_flow_timeout_delta(ct->timeout) > (__s32)timeout)
|
||||
ct->timeout = nfct_time_stamp + timeout;
|
||||
if (nf_flow_timeout_delta(READ_ONCE(ct->timeout)) > (__s32)timeout)
|
||||
WRITE_ONCE(ct->timeout, nfct_time_stamp + timeout);
|
||||
}
|
||||
|
||||
static void flow_offload_fixup_ct_state(struct nf_conn *ct)
|
||||
|
@ -387,7 +387,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
|
||||
struct net_device *indev;
|
||||
struct net_device *outdev;
|
||||
struct nf_conn *ct = NULL;
|
||||
enum ip_conntrack_info ctinfo;
|
||||
enum ip_conntrack_info ctinfo = 0;
|
||||
struct nfnl_ct_hook *nfnl_ct;
|
||||
bool csum_verify;
|
||||
char *secdata = NULL;
|
||||
|
@ -236,7 +236,7 @@ static void nft_exthdr_tcp_set_eval(const struct nft_expr *expr,
|
||||
|
||||
tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff, &tcphdr_len);
|
||||
if (!tcph)
|
||||
return;
|
||||
goto err;
|
||||
|
||||
opt = (u8 *)tcph;
|
||||
for (i = sizeof(*tcph); i < tcphdr_len - 1; i += optl) {
|
||||
@ -251,16 +251,16 @@ static void nft_exthdr_tcp_set_eval(const struct nft_expr *expr,
|
||||
continue;
|
||||
|
||||
if (i + optl > tcphdr_len || priv->len + priv->offset > optl)
|
||||
return;
|
||||
goto err;
|
||||
|
||||
if (skb_ensure_writable(pkt->skb,
|
||||
nft_thoff(pkt) + i + priv->len))
|
||||
return;
|
||||
goto err;
|
||||
|
||||
tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff,
|
||||
&tcphdr_len);
|
||||
if (!tcph)
|
||||
return;
|
||||
goto err;
|
||||
|
||||
offset = i + priv->offset;
|
||||
|
||||
@ -303,6 +303,9 @@ static void nft_exthdr_tcp_set_eval(const struct nft_expr *expr,
|
||||
|
||||
return;
|
||||
}
|
||||
return;
|
||||
err:
|
||||
regs->verdict.code = NFT_BREAK;
|
||||
}
|
||||
|
||||
static void nft_exthdr_sctp_eval(const struct nft_expr *expr,
|
||||
|
@ -886,7 +886,7 @@ static int nft_pipapo_avx2_lookup_8b_6(unsigned long *map, unsigned long *fill,
|
||||
NFT_PIPAPO_AVX2_BUCKET_LOAD8(4, lt, 4, pkt[4], bsize);
|
||||
|
||||
NFT_PIPAPO_AVX2_AND(5, 0, 1);
|
||||
NFT_PIPAPO_AVX2_BUCKET_LOAD8(6, lt, 6, pkt[5], bsize);
|
||||
NFT_PIPAPO_AVX2_BUCKET_LOAD8(6, lt, 5, pkt[5], bsize);
|
||||
NFT_PIPAPO_AVX2_AND(7, 2, 3);
|
||||
|
||||
/* Stall */
|
||||
|
@ -150,11 +150,27 @@ EOF
|
||||
# oifname is the vrf device.
|
||||
test_masquerade_vrf()
|
||||
{
|
||||
local qdisc=$1
|
||||
|
||||
if [ "$qdisc" != "default" ]; then
|
||||
tc -net $ns0 qdisc add dev tvrf root $qdisc
|
||||
fi
|
||||
|
||||
ip netns exec $ns0 conntrack -F 2>/dev/null
|
||||
|
||||
ip netns exec $ns0 nft -f - <<EOF
|
||||
flush ruleset
|
||||
table ip nat {
|
||||
chain rawout {
|
||||
type filter hook output priority raw;
|
||||
|
||||
oif tvrf ct state untracked counter
|
||||
}
|
||||
chain postrouting2 {
|
||||
type filter hook postrouting priority mangle;
|
||||
|
||||
oif tvrf ct state untracked counter
|
||||
}
|
||||
chain postrouting {
|
||||
type nat hook postrouting priority 0;
|
||||
# NB: masquerade should always be combined with 'oif(name) bla',
|
||||
@ -171,13 +187,18 @@ EOF
|
||||
fi
|
||||
|
||||
# must also check that nat table was evaluated on second (lower device) iteration.
|
||||
ip netns exec $ns0 nft list table ip nat |grep -q 'counter packets 2'
|
||||
ip netns exec $ns0 nft list table ip nat |grep -q 'counter packets 2' &&
|
||||
ip netns exec $ns0 nft list table ip nat |grep -q 'untracked counter packets [1-9]'
|
||||
if [ $? -eq 0 ]; then
|
||||
echo "PASS: iperf3 connect with masquerade + sport rewrite on vrf device"
|
||||
echo "PASS: iperf3 connect with masquerade + sport rewrite on vrf device ($qdisc qdisc)"
|
||||
else
|
||||
echo "FAIL: vrf masq rule has unexpected counter value"
|
||||
echo "FAIL: vrf rules have unexpected counter value"
|
||||
ret=1
|
||||
fi
|
||||
|
||||
if [ "$qdisc" != "default" ]; then
|
||||
tc -net $ns0 qdisc del dev tvrf root
|
||||
fi
|
||||
}
|
||||
|
||||
# add masq rule that gets evaluated w. outif set to veth device.
|
||||
@ -213,7 +234,8 @@ EOF
|
||||
}
|
||||
|
||||
test_ct_zone_in
|
||||
test_masquerade_vrf
|
||||
test_masquerade_vrf "default"
|
||||
test_masquerade_vrf "pfifo"
|
||||
test_masquerade_veth
|
||||
|
||||
exit $ret
|
||||
|
@ -23,8 +23,8 @@ TESTS="reported_issues correctness concurrency timeout"
|
||||
|
||||
# Set types, defined by TYPE_ variables below
|
||||
TYPES="net_port port_net net6_port port_proto net6_port_mac net6_port_mac_proto
|
||||
net_port_net net_mac net_mac_icmp net6_mac_icmp net6_port_net6_port
|
||||
net_port_mac_proto_net"
|
||||
net_port_net net_mac mac_net net_mac_icmp net6_mac_icmp
|
||||
net6_port_net6_port net_port_mac_proto_net"
|
||||
|
||||
# Reported bugs, also described by TYPE_ variables below
|
||||
BUGS="flush_remove_add"
|
||||
@ -277,6 +277,23 @@ perf_entries 1000
|
||||
perf_proto ipv4
|
||||
"
|
||||
|
||||
TYPE_mac_net="
|
||||
display mac,net
|
||||
type_spec ether_addr . ipv4_addr
|
||||
chain_spec ether saddr . ip saddr
|
||||
dst
|
||||
src mac addr4
|
||||
start 1
|
||||
count 5
|
||||
src_delta 2000
|
||||
tools sendip nc bash
|
||||
proto udp
|
||||
|
||||
race_repeat 0
|
||||
|
||||
perf_duration 0
|
||||
"
|
||||
|
||||
TYPE_net_mac_icmp="
|
||||
display net,mac - ICMP
|
||||
type_spec ipv4_addr . ether_addr
|
||||
@ -984,7 +1001,8 @@ format() {
|
||||
fi
|
||||
done
|
||||
for f in ${src}; do
|
||||
__expr="${__expr} . "
|
||||
[ "${__expr}" != "{ " ] && __expr="${__expr} . "
|
||||
|
||||
__start="$(eval format_"${f}" "${srcstart}")"
|
||||
__end="$(eval format_"${f}" "${srcend}")"
|
||||
|
||||
|
@ -18,11 +18,17 @@ cleanup()
|
||||
ip netns del $ns
|
||||
}
|
||||
|
||||
ip netns add $ns
|
||||
if [ $? -ne 0 ];then
|
||||
echo "SKIP: Could not create net namespace $gw"
|
||||
exit $ksft_skip
|
||||
fi
|
||||
checktool (){
|
||||
if ! $1 > /dev/null 2>&1; then
|
||||
echo "SKIP: Could not $2"
|
||||
exit $ksft_skip
|
||||
fi
|
||||
}
|
||||
|
||||
checktool "nft --version" "run test without nft tool"
|
||||
checktool "ip -Version" "run test without ip tool"
|
||||
checktool "socat -V" "run test without socat tool"
|
||||
checktool "ip netns add $ns" "create net namespace"
|
||||
|
||||
trap cleanup EXIT
|
||||
|
||||
@ -71,7 +77,8 @@ EOF
|
||||
local start=$(date +%s%3N)
|
||||
i=$((i + 10000))
|
||||
j=$((j + 1))
|
||||
dd if=/dev/zero of=/dev/stdout bs=8k count=10000 2>/dev/null | ip netns exec "$ns" nc -w 1 -q 1 -u -p 12345 127.0.0.1 12345 > /dev/null
|
||||
# nft rule in output places each packet in a different zone.
|
||||
dd if=/dev/zero of=/dev/stdout bs=8k count=10000 2>/dev/null | ip netns exec "$ns" socat STDIN UDP:127.0.0.1:12345,sourceport=12345
|
||||
if [ $? -ne 0 ] ;then
|
||||
ret=1
|
||||
break
|
||||
|
Loading…
Reference in New Issue
Block a user