forked from Minki/linux
net_sched: sch_fq: do not assume EDT packets are ordered
TCP stack makes sure packets for a given flow are monotically increasing, but we want to allow UDP packets to use EDT as well, so that QUIC servers can use in-kernel pacing. This patch adds a per-flow rb-tree on which packets might be stored. We still try to use the linear list for the typical cases where packets are queued with monotically increasing skb->tstamp, since queue/dequeue packets on a standard list is O(1). Note that the ability to store packets in arbitrary EDT order will allow us to implement later a per TCP socket mechanism adding delays (with jitter eventually) and reorders, to implement convenient network emulators. Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
a55a385d8c
commit
eeb84aa0d0
@ -54,10 +54,23 @@
|
||||
#include <net/tcp_states.h>
|
||||
#include <net/tcp.h>
|
||||
|
||||
struct fq_skb_cb {
|
||||
u64 time_to_send;
|
||||
};
|
||||
|
||||
static inline struct fq_skb_cb *fq_skb_cb(struct sk_buff *skb)
|
||||
{
|
||||
qdisc_cb_private_validate(skb, sizeof(struct fq_skb_cb));
|
||||
return (struct fq_skb_cb *)qdisc_skb_cb(skb)->data;
|
||||
}
|
||||
|
||||
/*
|
||||
* Per flow structure, dynamically allocated
|
||||
* Per flow structure, dynamically allocated.
|
||||
* If packets have monotically increasing time_to_send, they are placed in O(1)
|
||||
* in linear list (head,tail), otherwise are placed in a rbtree (t_root).
|
||||
*/
|
||||
struct fq_flow {
|
||||
struct rb_root t_root;
|
||||
struct sk_buff *head; /* list of skbs for this flow : first skb */
|
||||
union {
|
||||
struct sk_buff *tail; /* last skb in the list */
|
||||
@ -298,6 +311,8 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q)
|
||||
q->stat_allocation_errors++;
|
||||
return &q->internal;
|
||||
}
|
||||
/* f->t_root is already zeroed after kmem_cache_zalloc() */
|
||||
|
||||
fq_flow_set_detached(f);
|
||||
f->sk = sk;
|
||||
if (skb->sk)
|
||||
@ -312,14 +327,40 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q)
|
||||
return f;
|
||||
}
|
||||
|
||||
static struct sk_buff *fq_peek(struct fq_flow *flow)
|
||||
{
|
||||
struct sk_buff *skb = skb_rb_first(&flow->t_root);
|
||||
struct sk_buff *head = flow->head;
|
||||
|
||||
if (!skb)
|
||||
return head;
|
||||
|
||||
if (!head)
|
||||
return skb;
|
||||
|
||||
if (fq_skb_cb(skb)->time_to_send < fq_skb_cb(head)->time_to_send)
|
||||
return skb;
|
||||
return head;
|
||||
}
|
||||
|
||||
static void fq_erase_head(struct Qdisc *sch, struct fq_flow *flow,
|
||||
struct sk_buff *skb)
|
||||
{
|
||||
if (skb == flow->head) {
|
||||
flow->head = skb->next;
|
||||
} else {
|
||||
rb_erase(&skb->rbnode, &flow->t_root);
|
||||
skb->dev = qdisc_dev(sch);
|
||||
}
|
||||
}
|
||||
|
||||
/* remove one skb from head of flow queue */
|
||||
static struct sk_buff *fq_dequeue_head(struct Qdisc *sch, struct fq_flow *flow)
|
||||
{
|
||||
struct sk_buff *skb = flow->head;
|
||||
struct sk_buff *skb = fq_peek(flow);
|
||||
|
||||
if (skb) {
|
||||
flow->head = skb->next;
|
||||
fq_erase_head(sch, flow, skb);
|
||||
skb_mark_not_on_list(skb);
|
||||
flow->qlen--;
|
||||
qdisc_qstats_backlog_dec(sch, skb);
|
||||
@ -330,15 +371,36 @@ static struct sk_buff *fq_dequeue_head(struct Qdisc *sch, struct fq_flow *flow)
|
||||
|
||||
static void flow_queue_add(struct fq_flow *flow, struct sk_buff *skb)
|
||||
{
|
||||
struct sk_buff *head = flow->head;
|
||||
struct rb_node **p, *parent;
|
||||
struct sk_buff *head, *aux;
|
||||
|
||||
skb->next = NULL;
|
||||
if (!head)
|
||||
flow->head = skb;
|
||||
else
|
||||
flow->tail->next = skb;
|
||||
fq_skb_cb(skb)->time_to_send = skb->tstamp ?: ktime_get_ns();
|
||||
|
||||
flow->tail = skb;
|
||||
head = flow->head;
|
||||
if (!head ||
|
||||
fq_skb_cb(skb)->time_to_send >= fq_skb_cb(flow->tail)->time_to_send) {
|
||||
if (!head)
|
||||
flow->head = skb;
|
||||
else
|
||||
flow->tail->next = skb;
|
||||
flow->tail = skb;
|
||||
skb->next = NULL;
|
||||
return;
|
||||
}
|
||||
|
||||
p = &flow->t_root.rb_node;
|
||||
parent = NULL;
|
||||
|
||||
while (*p) {
|
||||
parent = *p;
|
||||
aux = rb_to_skb(parent);
|
||||
if (fq_skb_cb(skb)->time_to_send >= fq_skb_cb(aux)->time_to_send)
|
||||
p = &parent->rb_right;
|
||||
else
|
||||
p = &parent->rb_left;
|
||||
}
|
||||
rb_link_node(&skb->rbnode, parent, p);
|
||||
rb_insert_color(&skb->rbnode, &flow->t_root);
|
||||
}
|
||||
|
||||
static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
|
||||
@ -450,9 +512,9 @@ begin:
|
||||
goto begin;
|
||||
}
|
||||
|
||||
skb = f->head;
|
||||
skb = fq_peek(f);
|
||||
if (skb) {
|
||||
u64 time_next_packet = max_t(u64, ktime_to_ns(skb->tstamp),
|
||||
u64 time_next_packet = max_t(u64, fq_skb_cb(skb)->time_to_send,
|
||||
f->time_next_packet);
|
||||
|
||||
if (now < time_next_packet) {
|
||||
@ -533,6 +595,15 @@ out:
|
||||
|
||||
static void fq_flow_purge(struct fq_flow *flow)
|
||||
{
|
||||
struct rb_node *p = rb_first(&flow->t_root);
|
||||
|
||||
while (p) {
|
||||
struct sk_buff *skb = rb_to_skb(p);
|
||||
|
||||
p = rb_next(p);
|
||||
rb_erase(&skb->rbnode, &flow->t_root);
|
||||
rtnl_kfree_skbs(skb, skb);
|
||||
}
|
||||
rtnl_kfree_skbs(flow->head, flow->tail);
|
||||
flow->head = NULL;
|
||||
flow->qlen = 0;
|
||||
|
Loading…
Reference in New Issue
Block a user