Merge branch 'tcp_sack_rttm'
Kenneth Klette Jonassen says: ==================== tcp: SACK RTTM changes for congestion control This patch series improves SACK RTT measurements for congestion control: o Picks the latest sequence SACKed for RTT, i.e. most accurate delay signal. o Calls the congestion control's pkts_acked hook with SACK RTTMs even when not sequentially ACKing new data. V2: amend misleading comment ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
a89f96c927
@ -1130,7 +1130,12 @@ static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
|
|||||||
struct tcp_sacktag_state {
|
struct tcp_sacktag_state {
|
||||||
int reord;
|
int reord;
|
||||||
int fack_count;
|
int fack_count;
|
||||||
long rtt_us; /* RTT measured by SACKing never-retransmitted data */
|
/* Timestamps for earliest and latest never-retransmitted segment
|
||||||
|
* that was SACKed. RTO needs the earliest RTT to stay conservative,
|
||||||
|
* but congestion control should still get an accurate delay signal.
|
||||||
|
*/
|
||||||
|
struct skb_mstamp first_sackt;
|
||||||
|
struct skb_mstamp last_sackt;
|
||||||
int flag;
|
int flag;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -1233,14 +1238,9 @@ static u8 tcp_sacktag_one(struct sock *sk,
|
|||||||
state->reord);
|
state->reord);
|
||||||
if (!after(end_seq, tp->high_seq))
|
if (!after(end_seq, tp->high_seq))
|
||||||
state->flag |= FLAG_ORIG_SACK_ACKED;
|
state->flag |= FLAG_ORIG_SACK_ACKED;
|
||||||
/* Pick the earliest sequence sacked for RTT */
|
if (state->first_sackt.v64 == 0)
|
||||||
if (state->rtt_us < 0) {
|
state->first_sackt = *xmit_time;
|
||||||
struct skb_mstamp now;
|
state->last_sackt = *xmit_time;
|
||||||
|
|
||||||
skb_mstamp_get(&now);
|
|
||||||
state->rtt_us = skb_mstamp_us_delta(&now,
|
|
||||||
xmit_time);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (sacked & TCPCB_LOST) {
|
if (sacked & TCPCB_LOST) {
|
||||||
@ -1634,7 +1634,7 @@ static int tcp_sack_cache_ok(const struct tcp_sock *tp, const struct tcp_sack_bl
|
|||||||
|
|
||||||
static int
|
static int
|
||||||
tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
|
tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
|
||||||
u32 prior_snd_una, long *sack_rtt_us)
|
u32 prior_snd_una, struct tcp_sacktag_state *state)
|
||||||
{
|
{
|
||||||
struct tcp_sock *tp = tcp_sk(sk);
|
struct tcp_sock *tp = tcp_sk(sk);
|
||||||
const unsigned char *ptr = (skb_transport_header(ack_skb) +
|
const unsigned char *ptr = (skb_transport_header(ack_skb) +
|
||||||
@ -1642,7 +1642,6 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
|
|||||||
struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2);
|
struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2);
|
||||||
struct tcp_sack_block sp[TCP_NUM_SACKS];
|
struct tcp_sack_block sp[TCP_NUM_SACKS];
|
||||||
struct tcp_sack_block *cache;
|
struct tcp_sack_block *cache;
|
||||||
struct tcp_sacktag_state state;
|
|
||||||
struct sk_buff *skb;
|
struct sk_buff *skb;
|
||||||
int num_sacks = min(TCP_NUM_SACKS, (ptr[1] - TCPOLEN_SACK_BASE) >> 3);
|
int num_sacks = min(TCP_NUM_SACKS, (ptr[1] - TCPOLEN_SACK_BASE) >> 3);
|
||||||
int used_sacks;
|
int used_sacks;
|
||||||
@ -1650,9 +1649,8 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
|
|||||||
int i, j;
|
int i, j;
|
||||||
int first_sack_index;
|
int first_sack_index;
|
||||||
|
|
||||||
state.flag = 0;
|
state->flag = 0;
|
||||||
state.reord = tp->packets_out;
|
state->reord = tp->packets_out;
|
||||||
state.rtt_us = -1L;
|
|
||||||
|
|
||||||
if (!tp->sacked_out) {
|
if (!tp->sacked_out) {
|
||||||
if (WARN_ON(tp->fackets_out))
|
if (WARN_ON(tp->fackets_out))
|
||||||
@ -1663,7 +1661,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
|
|||||||
found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire,
|
found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire,
|
||||||
num_sacks, prior_snd_una);
|
num_sacks, prior_snd_una);
|
||||||
if (found_dup_sack)
|
if (found_dup_sack)
|
||||||
state.flag |= FLAG_DSACKING_ACK;
|
state->flag |= FLAG_DSACKING_ACK;
|
||||||
|
|
||||||
/* Eliminate too old ACKs, but take into
|
/* Eliminate too old ACKs, but take into
|
||||||
* account more or less fresh ones, they can
|
* account more or less fresh ones, they can
|
||||||
@ -1728,7 +1726,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
|
|||||||
}
|
}
|
||||||
|
|
||||||
skb = tcp_write_queue_head(sk);
|
skb = tcp_write_queue_head(sk);
|
||||||
state.fack_count = 0;
|
state->fack_count = 0;
|
||||||
i = 0;
|
i = 0;
|
||||||
|
|
||||||
if (!tp->sacked_out) {
|
if (!tp->sacked_out) {
|
||||||
@ -1762,10 +1760,10 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
|
|||||||
|
|
||||||
/* Head todo? */
|
/* Head todo? */
|
||||||
if (before(start_seq, cache->start_seq)) {
|
if (before(start_seq, cache->start_seq)) {
|
||||||
skb = tcp_sacktag_skip(skb, sk, &state,
|
skb = tcp_sacktag_skip(skb, sk, state,
|
||||||
start_seq);
|
start_seq);
|
||||||
skb = tcp_sacktag_walk(skb, sk, next_dup,
|
skb = tcp_sacktag_walk(skb, sk, next_dup,
|
||||||
&state,
|
state,
|
||||||
start_seq,
|
start_seq,
|
||||||
cache->start_seq,
|
cache->start_seq,
|
||||||
dup_sack);
|
dup_sack);
|
||||||
@ -1776,7 +1774,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
|
|||||||
goto advance_sp;
|
goto advance_sp;
|
||||||
|
|
||||||
skb = tcp_maybe_skipping_dsack(skb, sk, next_dup,
|
skb = tcp_maybe_skipping_dsack(skb, sk, next_dup,
|
||||||
&state,
|
state,
|
||||||
cache->end_seq);
|
cache->end_seq);
|
||||||
|
|
||||||
/* ...tail remains todo... */
|
/* ...tail remains todo... */
|
||||||
@ -1785,12 +1783,12 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
|
|||||||
skb = tcp_highest_sack(sk);
|
skb = tcp_highest_sack(sk);
|
||||||
if (!skb)
|
if (!skb)
|
||||||
break;
|
break;
|
||||||
state.fack_count = tp->fackets_out;
|
state->fack_count = tp->fackets_out;
|
||||||
cache++;
|
cache++;
|
||||||
goto walk;
|
goto walk;
|
||||||
}
|
}
|
||||||
|
|
||||||
skb = tcp_sacktag_skip(skb, sk, &state, cache->end_seq);
|
skb = tcp_sacktag_skip(skb, sk, state, cache->end_seq);
|
||||||
/* Check overlap against next cached too (past this one already) */
|
/* Check overlap against next cached too (past this one already) */
|
||||||
cache++;
|
cache++;
|
||||||
continue;
|
continue;
|
||||||
@ -1800,12 +1798,12 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
|
|||||||
skb = tcp_highest_sack(sk);
|
skb = tcp_highest_sack(sk);
|
||||||
if (!skb)
|
if (!skb)
|
||||||
break;
|
break;
|
||||||
state.fack_count = tp->fackets_out;
|
state->fack_count = tp->fackets_out;
|
||||||
}
|
}
|
||||||
skb = tcp_sacktag_skip(skb, sk, &state, start_seq);
|
skb = tcp_sacktag_skip(skb, sk, state, start_seq);
|
||||||
|
|
||||||
walk:
|
walk:
|
||||||
skb = tcp_sacktag_walk(skb, sk, next_dup, &state,
|
skb = tcp_sacktag_walk(skb, sk, next_dup, state,
|
||||||
start_seq, end_seq, dup_sack);
|
start_seq, end_seq, dup_sack);
|
||||||
|
|
||||||
advance_sp:
|
advance_sp:
|
||||||
@ -1820,9 +1818,9 @@ advance_sp:
|
|||||||
for (j = 0; j < used_sacks; j++)
|
for (j = 0; j < used_sacks; j++)
|
||||||
tp->recv_sack_cache[i++] = sp[j];
|
tp->recv_sack_cache[i++] = sp[j];
|
||||||
|
|
||||||
if ((state.reord < tp->fackets_out) &&
|
if ((state->reord < tp->fackets_out) &&
|
||||||
((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker))
|
((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker))
|
||||||
tcp_update_reordering(sk, tp->fackets_out - state.reord, 0);
|
tcp_update_reordering(sk, tp->fackets_out - state->reord, 0);
|
||||||
|
|
||||||
tcp_mark_lost_retrans(sk);
|
tcp_mark_lost_retrans(sk);
|
||||||
tcp_verify_left_out(tp);
|
tcp_verify_left_out(tp);
|
||||||
@ -1834,8 +1832,7 @@ out:
|
|||||||
WARN_ON((int)tp->retrans_out < 0);
|
WARN_ON((int)tp->retrans_out < 0);
|
||||||
WARN_ON((int)tcp_packets_in_flight(tp) < 0);
|
WARN_ON((int)tcp_packets_in_flight(tp) < 0);
|
||||||
#endif
|
#endif
|
||||||
*sack_rtt_us = state.rtt_us;
|
return state->flag;
|
||||||
return state.flag;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Limits sacked_out so that sum with lost_out isn't ever larger than
|
/* Limits sacked_out so that sum with lost_out isn't ever larger than
|
||||||
@ -3052,7 +3049,8 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,
|
|||||||
* arrived at the other end.
|
* arrived at the other end.
|
||||||
*/
|
*/
|
||||||
static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
|
static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
|
||||||
u32 prior_snd_una, long sack_rtt_us)
|
u32 prior_snd_una,
|
||||||
|
struct tcp_sacktag_state *sack)
|
||||||
{
|
{
|
||||||
const struct inet_connection_sock *icsk = inet_csk(sk);
|
const struct inet_connection_sock *icsk = inet_csk(sk);
|
||||||
struct skb_mstamp first_ackt, last_ackt, now;
|
struct skb_mstamp first_ackt, last_ackt, now;
|
||||||
@ -3060,8 +3058,9 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
|
|||||||
u32 prior_sacked = tp->sacked_out;
|
u32 prior_sacked = tp->sacked_out;
|
||||||
u32 reord = tp->packets_out;
|
u32 reord = tp->packets_out;
|
||||||
bool fully_acked = true;
|
bool fully_acked = true;
|
||||||
long ca_seq_rtt_us = -1L;
|
long sack_rtt_us = -1L;
|
||||||
long seq_rtt_us = -1L;
|
long seq_rtt_us = -1L;
|
||||||
|
long ca_rtt_us = -1L;
|
||||||
struct sk_buff *skb;
|
struct sk_buff *skb;
|
||||||
u32 pkts_acked = 0;
|
u32 pkts_acked = 0;
|
||||||
bool rtt_update;
|
bool rtt_update;
|
||||||
@ -3150,15 +3149,16 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
|
|||||||
skb_mstamp_get(&now);
|
skb_mstamp_get(&now);
|
||||||
if (likely(first_ackt.v64)) {
|
if (likely(first_ackt.v64)) {
|
||||||
seq_rtt_us = skb_mstamp_us_delta(&now, &first_ackt);
|
seq_rtt_us = skb_mstamp_us_delta(&now, &first_ackt);
|
||||||
ca_seq_rtt_us = skb_mstamp_us_delta(&now, &last_ackt);
|
ca_rtt_us = skb_mstamp_us_delta(&now, &last_ackt);
|
||||||
|
}
|
||||||
|
if (sack->first_sackt.v64) {
|
||||||
|
sack_rtt_us = skb_mstamp_us_delta(&now, &sack->first_sackt);
|
||||||
|
ca_rtt_us = skb_mstamp_us_delta(&now, &sack->last_sackt);
|
||||||
}
|
}
|
||||||
|
|
||||||
rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt_us, sack_rtt_us);
|
rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt_us, sack_rtt_us);
|
||||||
|
|
||||||
if (flag & FLAG_ACKED) {
|
if (flag & FLAG_ACKED) {
|
||||||
const struct tcp_congestion_ops *ca_ops
|
|
||||||
= inet_csk(sk)->icsk_ca_ops;
|
|
||||||
|
|
||||||
tcp_rearm_rto(sk);
|
tcp_rearm_rto(sk);
|
||||||
if (unlikely(icsk->icsk_mtup.probe_size &&
|
if (unlikely(icsk->icsk_mtup.probe_size &&
|
||||||
!after(tp->mtu_probe.probe_seq_end, tp->snd_una))) {
|
!after(tp->mtu_probe.probe_seq_end, tp->snd_una))) {
|
||||||
@ -3181,11 +3181,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
|
|||||||
|
|
||||||
tp->fackets_out -= min(pkts_acked, tp->fackets_out);
|
tp->fackets_out -= min(pkts_acked, tp->fackets_out);
|
||||||
|
|
||||||
if (ca_ops->pkts_acked) {
|
|
||||||
long rtt_us = min_t(ulong, ca_seq_rtt_us, sack_rtt_us);
|
|
||||||
ca_ops->pkts_acked(sk, pkts_acked, rtt_us);
|
|
||||||
}
|
|
||||||
|
|
||||||
} else if (skb && rtt_update && sack_rtt_us >= 0 &&
|
} else if (skb && rtt_update && sack_rtt_us >= 0 &&
|
||||||
sack_rtt_us > skb_mstamp_us_delta(&now, &skb->skb_mstamp)) {
|
sack_rtt_us > skb_mstamp_us_delta(&now, &skb->skb_mstamp)) {
|
||||||
/* Do not re-arm RTO if the sack RTT is measured from data sent
|
/* Do not re-arm RTO if the sack RTT is measured from data sent
|
||||||
@ -3195,6 +3190,9 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
|
|||||||
tcp_rearm_rto(sk);
|
tcp_rearm_rto(sk);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (icsk->icsk_ca_ops->pkts_acked)
|
||||||
|
icsk->icsk_ca_ops->pkts_acked(sk, pkts_acked, ca_rtt_us);
|
||||||
|
|
||||||
#if FASTRETRANS_DEBUG > 0
|
#if FASTRETRANS_DEBUG > 0
|
||||||
WARN_ON((int)tp->sacked_out < 0);
|
WARN_ON((int)tp->sacked_out < 0);
|
||||||
WARN_ON((int)tp->lost_out < 0);
|
WARN_ON((int)tp->lost_out < 0);
|
||||||
@ -3459,6 +3457,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
|
|||||||
{
|
{
|
||||||
struct inet_connection_sock *icsk = inet_csk(sk);
|
struct inet_connection_sock *icsk = inet_csk(sk);
|
||||||
struct tcp_sock *tp = tcp_sk(sk);
|
struct tcp_sock *tp = tcp_sk(sk);
|
||||||
|
struct tcp_sacktag_state sack_state;
|
||||||
u32 prior_snd_una = tp->snd_una;
|
u32 prior_snd_una = tp->snd_una;
|
||||||
u32 ack_seq = TCP_SKB_CB(skb)->seq;
|
u32 ack_seq = TCP_SKB_CB(skb)->seq;
|
||||||
u32 ack = TCP_SKB_CB(skb)->ack_seq;
|
u32 ack = TCP_SKB_CB(skb)->ack_seq;
|
||||||
@ -3467,7 +3466,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
|
|||||||
int prior_packets = tp->packets_out;
|
int prior_packets = tp->packets_out;
|
||||||
const int prior_unsacked = tp->packets_out - tp->sacked_out;
|
const int prior_unsacked = tp->packets_out - tp->sacked_out;
|
||||||
int acked = 0; /* Number of packets newly acked */
|
int acked = 0; /* Number of packets newly acked */
|
||||||
long sack_rtt_us = -1L;
|
|
||||||
|
sack_state.first_sackt.v64 = 0;
|
||||||
|
|
||||||
/* We very likely will need to access write queue head. */
|
/* We very likely will need to access write queue head. */
|
||||||
prefetchw(sk->sk_write_queue.next);
|
prefetchw(sk->sk_write_queue.next);
|
||||||
@ -3531,7 +3531,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
|
|||||||
|
|
||||||
if (TCP_SKB_CB(skb)->sacked)
|
if (TCP_SKB_CB(skb)->sacked)
|
||||||
flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
|
flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
|
||||||
&sack_rtt_us);
|
&sack_state);
|
||||||
|
|
||||||
if (tcp_ecn_rcv_ecn_echo(tp, tcp_hdr(skb))) {
|
if (tcp_ecn_rcv_ecn_echo(tp, tcp_hdr(skb))) {
|
||||||
flag |= FLAG_ECE;
|
flag |= FLAG_ECE;
|
||||||
@ -3556,7 +3556,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
|
|||||||
/* See if we can take anything off of the retransmit queue. */
|
/* See if we can take anything off of the retransmit queue. */
|
||||||
acked = tp->packets_out;
|
acked = tp->packets_out;
|
||||||
flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una,
|
flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una,
|
||||||
sack_rtt_us);
|
&sack_state);
|
||||||
acked -= tp->packets_out;
|
acked -= tp->packets_out;
|
||||||
|
|
||||||
/* Advance cwnd if state allows */
|
/* Advance cwnd if state allows */
|
||||||
@ -3608,7 +3608,7 @@ old_ack:
|
|||||||
*/
|
*/
|
||||||
if (TCP_SKB_CB(skb)->sacked) {
|
if (TCP_SKB_CB(skb)->sacked) {
|
||||||
flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
|
flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
|
||||||
&sack_rtt_us);
|
&sack_state);
|
||||||
tcp_fastretrans_alert(sk, acked, prior_unsacked,
|
tcp_fastretrans_alert(sk, acked, prior_unsacked,
|
||||||
is_dupack, flag);
|
is_dupack, flag);
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user