d4589926d7
While investigating performance problems on small RPC workloads, I noticed linux TCP stack was always splitting the last TSO skb into two parts (skbs). One being a multiple of MSS, and a small one with the Push flag. This split is done even if TCP_NODELAY is set, or if no small packet is in flight. Example with request/response of 4K/4K IP A > B: . ack 68432 win 2783 <nop,nop,timestamp 6524593 6525001> IP A > B: . 65537:68433(2896) ack 69632 win 2783 <nop,nop,timestamp 6524593 6525001> IP A > B: P 68433:69633(1200) ack 69632 win 2783 <nop,nop,timestamp 6524593 6525001> IP B > A: . ack 68433 win 2768 <nop,nop,timestamp 6525001 6524593> IP B > A: . 69632:72528(2896) ack 69633 win 2768 <nop,nop,timestamp 6525001 6524593> IP B > A: P 72528:73728(1200) ack 69633 win 2768 <nop,nop,timestamp 6525001 6524593> IP A > B: . ack 72528 win 2783 <nop,nop,timestamp 6524593 6525001> IP A > B: . 69633:72529(2896) ack 73728 win 2783 <nop,nop,timestamp 6524593 6525001> IP A > B: P 72529:73729(1200) ack 73728 win 2783 <nop,nop,timestamp 6524593 6525001> We can avoid this split by including the Nagle tests at the right place. Note : If some NIC had trouble sending TSO packets with a partial last segment, we would have hit the problem in GRO/forwarding workload already. tcp_minshall_update() is moved to tcp_output.c and is updated as we might feed a TSO packet with a partial last segment. This patch tremendously improves performance, as the traffic now looks like : IP A > B: . ack 98304 win 2783 <nop,nop,timestamp |
||
---|---|---|
.. | ||
9p | ||
bluetooth | ||
caif | ||
irda | ||
iucv | ||
netfilter | ||
netns | ||
nfc | ||
phonet | ||
sctp | ||
tc_act | ||
act_api.h | ||
addrconf.h | ||
af_ieee802154.h | ||
af_rxrpc.h | ||
af_unix.h | ||
af_vsock.h | ||
ah.h | ||
arp.h | ||
atmclip.h | ||
ax25.h | ||
ax88796.h | ||
busy_poll.h | ||
cfg80211-wext.h | ||
cfg80211.h | ||
checksum.h | ||
cipso_ipv4.h | ||
cls_cgroup.h | ||
codel.h | ||
compat.h | ||
datalink.h | ||
dcbevent.h | ||
dcbnl.h | ||
dn_dev.h | ||
dn_fib.h | ||
dn_neigh.h | ||
dn_nsp.h | ||
dn_route.h | ||
dn.h | ||
dsa.h | ||
dsfield.h | ||
dst_ops.h | ||
dst.h | ||
esp.h | ||
ethoc.h | ||
fib_rules.h | ||
firewire.h | ||
flow_keys.h | ||
flow.h | ||
garp.h | ||
gen_stats.h | ||
genetlink.h | ||
gre.h | ||
gro_cells.h | ||
icmp.h | ||
ieee80211_radiotap.h | ||
ieee802154_netdev.h | ||
ieee802154.h | ||
if_inet6.h | ||
inet6_connection_sock.h | ||
inet6_hashtables.h | ||
inet_common.h | ||
inet_connection_sock.h | ||
inet_ecn.h | ||
inet_frag.h | ||
inet_hashtables.h | ||
inet_sock.h | ||
inet_timewait_sock.h | ||
inetpeer.h | ||
ip6_checksum.h | ||
ip6_fib.h | ||
ip6_route.h | ||
ip6_tunnel.h | ||
ip_fib.h | ||
ip_tunnels.h | ||
ip_vs.h | ||
ip.h | ||
ipcomp.h | ||
ipconfig.h | ||
ipv6.h | ||
ipx.h | ||
iw_handler.h | ||
lapb.h | ||
lib80211.h | ||
llc_c_ac.h | ||
llc_c_ev.h | ||
llc_c_st.h | ||
llc_conn.h | ||
llc_if.h | ||
llc_pdu.h | ||
llc_s_ac.h | ||
llc_s_ev.h | ||
llc_s_st.h | ||
llc_sap.h | ||
llc.h | ||
mac80211.h | ||
mac802154.h | ||
mip6.h | ||
mld.h | ||
mrp.h | ||
ndisc.h | ||
neighbour.h | ||
net_namespace.h | ||
net_ratelimit.h | ||
netdma.h | ||
netevent.h | ||
netlabel.h | ||
netlink.h | ||
netprio_cgroup.h | ||
netrom.h | ||
nexthop.h | ||
nl802154.h | ||
p8022.h | ||
ping.h | ||
pkt_cls.h | ||
pkt_sched.h | ||
protocol.h | ||
psnap.h | ||
raw.h | ||
rawv6.h | ||
red.h | ||
regulatory.h | ||
request_sock.h | ||
rose.h | ||
route.h | ||
rtnetlink.h | ||
sch_generic.h | ||
scm.h | ||
secure_seq.h | ||
slhc_vj.h | ||
snmp.h | ||
sock.h | ||
stp.h | ||
tcp_memcontrol.h | ||
tcp_states.h | ||
tcp.h | ||
timewait_sock.h | ||
transp_v6.h | ||
udp.h | ||
udplite.h | ||
vsock_addr.h | ||
vxlan.h | ||
wext.h | ||
wimax.h | ||
wpan-phy.h | ||
x25.h | ||
x25device.h | ||
xfrm.h |