bpf: Add support for changing congestion control
Added support for changing congestion control for SOCK_OPS bpf programs through the setsockopt bpf helper function. It also adds a new SOCK_OPS op, BPF_SOCK_OPS_NEEDS_ECN, that is needed for congestion controls, like dctcp, that need to enable ECN in the SYN packets. Signed-off-by: Lawrence Brakmo <brakmo@fb.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
d9925368a6
commit
91b5b21c7c
@ -1004,7 +1004,9 @@ void tcp_get_default_congestion_control(char *name);
|
||||
void tcp_get_available_congestion_control(char *buf, size_t len);
|
||||
void tcp_get_allowed_congestion_control(char *buf, size_t len);
|
||||
int tcp_set_allowed_congestion_control(char *allowed);
|
||||
int tcp_set_congestion_control(struct sock *sk, const char *name);
|
||||
int tcp_set_congestion_control(struct sock *sk, const char *name, bool load);
|
||||
void tcp_reinit_congestion_control(struct sock *sk,
|
||||
const struct tcp_congestion_ops *ca);
|
||||
u32 tcp_slow_start(struct tcp_sock *tp, u32 acked);
|
||||
void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked);
|
||||
|
||||
@ -2078,4 +2080,9 @@ static inline u32 tcp_rwnd_init_bpf(struct sock *sk)
|
||||
rwnd = 0;
|
||||
return rwnd;
|
||||
}
|
||||
|
||||
static inline bool tcp_bpf_ca_needs_ecn(struct sock *sk)
|
||||
{
|
||||
return (tcp_call_bpf(sk, BPF_SOCK_OPS_NEEDS_ECN) == 1);
|
||||
}
|
||||
#endif /* _TCP_H */
|
||||
|
@ -778,6 +778,9 @@ enum {
|
||||
* passive connection is
|
||||
* established
|
||||
*/
|
||||
BPF_SOCK_OPS_NEEDS_ECN, /* If connection's congestion control
|
||||
* needs ECN
|
||||
*/
|
||||
};
|
||||
|
||||
#endif /* _UAPI__LINUX_BPF_H__ */
|
||||
|
@ -2719,8 +2719,24 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
|
||||
}
|
||||
} else if (level == SOL_TCP &&
|
||||
sk->sk_prot->setsockopt == tcp_setsockopt) {
|
||||
/* Place holder */
|
||||
#ifdef CONFIG_INET
|
||||
if (optname == TCP_CONGESTION) {
|
||||
char name[TCP_CA_NAME_MAX];
|
||||
|
||||
strncpy(name, optval, min_t(long, optlen,
|
||||
TCP_CA_NAME_MAX-1));
|
||||
name[TCP_CA_NAME_MAX-1] = 0;
|
||||
ret = tcp_set_congestion_control(sk, name, false);
|
||||
if (!ret && bpf_sock->op > BPF_SOCK_OPS_NEEDS_ECN)
|
||||
/* replacing an existing ca */
|
||||
tcp_reinit_congestion_control(sk,
|
||||
inet_csk(sk)->icsk_ca_ops);
|
||||
} else {
|
||||
ret = -EINVAL;
|
||||
}
|
||||
#else
|
||||
ret = -EINVAL;
|
||||
#endif
|
||||
} else {
|
||||
ret = -EINVAL;
|
||||
}
|
||||
|
@ -2481,7 +2481,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
|
||||
name[val] = 0;
|
||||
|
||||
lock_sock(sk);
|
||||
err = tcp_set_congestion_control(sk, name);
|
||||
err = tcp_set_congestion_control(sk, name, true);
|
||||
release_sock(sk);
|
||||
return err;
|
||||
}
|
||||
|
@ -189,8 +189,8 @@ void tcp_init_congestion_control(struct sock *sk)
|
||||
INET_ECN_dontxmit(sk);
|
||||
}
|
||||
|
||||
static void tcp_reinit_congestion_control(struct sock *sk,
|
||||
const struct tcp_congestion_ops *ca)
|
||||
void tcp_reinit_congestion_control(struct sock *sk,
|
||||
const struct tcp_congestion_ops *ca)
|
||||
{
|
||||
struct inet_connection_sock *icsk = inet_csk(sk);
|
||||
|
||||
@ -333,8 +333,12 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Change congestion control for socket */
|
||||
int tcp_set_congestion_control(struct sock *sk, const char *name)
|
||||
/* Change congestion control for socket. If load is false, then it is the
|
||||
* responsibility of the caller to call tcp_init_congestion_control or
|
||||
* tcp_reinit_congestion_control (if the current congestion control was
|
||||
* already initialized.
|
||||
*/
|
||||
int tcp_set_congestion_control(struct sock *sk, const char *name, bool load)
|
||||
{
|
||||
struct inet_connection_sock *icsk = inet_csk(sk);
|
||||
const struct tcp_congestion_ops *ca;
|
||||
@ -344,21 +348,29 @@ int tcp_set_congestion_control(struct sock *sk, const char *name)
|
||||
return -EPERM;
|
||||
|
||||
rcu_read_lock();
|
||||
ca = __tcp_ca_find_autoload(name);
|
||||
if (!load)
|
||||
ca = tcp_ca_find(name);
|
||||
else
|
||||
ca = __tcp_ca_find_autoload(name);
|
||||
/* No change asking for existing value */
|
||||
if (ca == icsk->icsk_ca_ops) {
|
||||
icsk->icsk_ca_setsockopt = 1;
|
||||
goto out;
|
||||
}
|
||||
if (!ca)
|
||||
if (!ca) {
|
||||
err = -ENOENT;
|
||||
else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) ||
|
||||
ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)))
|
||||
} else if (!load) {
|
||||
icsk->icsk_ca_ops = ca;
|
||||
if (!try_module_get(ca->owner))
|
||||
err = -EBUSY;
|
||||
} else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) ||
|
||||
ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))) {
|
||||
err = -EPERM;
|
||||
else if (!try_module_get(ca->owner))
|
||||
} else if (!try_module_get(ca->owner)) {
|
||||
err = -EBUSY;
|
||||
else
|
||||
} else {
|
||||
tcp_reinit_congestion_control(sk, ca);
|
||||
}
|
||||
out:
|
||||
rcu_read_unlock();
|
||||
return err;
|
||||
|
@ -6191,7 +6191,8 @@ static void tcp_ecn_create_request(struct request_sock *req,
|
||||
ecn_ok = net->ipv4.sysctl_tcp_ecn || ecn_ok_dst;
|
||||
|
||||
if ((!ect && ecn_ok) || tcp_ca_needs_ecn(listen_sk) ||
|
||||
(ecn_ok_dst & DST_FEATURE_ECN_CA))
|
||||
(ecn_ok_dst & DST_FEATURE_ECN_CA) ||
|
||||
tcp_bpf_ca_needs_ecn((struct sock *)req))
|
||||
inet_rsk(req)->ecn_ok = 1;
|
||||
}
|
||||
|
||||
|
@ -316,7 +316,8 @@ static void tcp_ecn_send_synack(struct sock *sk, struct sk_buff *skb)
|
||||
TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_CWR;
|
||||
if (!(tp->ecn_flags & TCP_ECN_OK))
|
||||
TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE;
|
||||
else if (tcp_ca_needs_ecn(sk))
|
||||
else if (tcp_ca_needs_ecn(sk) ||
|
||||
tcp_bpf_ca_needs_ecn(sk))
|
||||
INET_ECN_xmit(sk);
|
||||
}
|
||||
|
||||
@ -324,8 +325,9 @@ static void tcp_ecn_send_synack(struct sock *sk, struct sk_buff *skb)
|
||||
static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb)
|
||||
{
|
||||
struct tcp_sock *tp = tcp_sk(sk);
|
||||
bool bpf_needs_ecn = tcp_bpf_ca_needs_ecn(sk);
|
||||
bool use_ecn = sock_net(sk)->ipv4.sysctl_tcp_ecn == 1 ||
|
||||
tcp_ca_needs_ecn(sk);
|
||||
tcp_ca_needs_ecn(sk) || bpf_needs_ecn;
|
||||
|
||||
if (!use_ecn) {
|
||||
const struct dst_entry *dst = __sk_dst_get(sk);
|
||||
@ -339,7 +341,7 @@ static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb)
|
||||
if (use_ecn) {
|
||||
TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR;
|
||||
tp->ecn_flags = TCP_ECN_OK;
|
||||
if (tcp_ca_needs_ecn(sk))
|
||||
if (tcp_ca_needs_ecn(sk) || bpf_needs_ecn)
|
||||
INET_ECN_xmit(sk);
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user