forked from Minki/linux
ff5a0b421c
The msk can use backup subflows to transmit in-sequence data only if there are no other active subflow. On active backup scenario, the MPTCP connection can do forward progress only due to MPTCP retransmissions - rtx can pick backup subflows. This patch introduces a new flag flow MPTCP subflows: if the underlying TCP connection made no progresses for long time, and there are other less problematic subflows available, the given subflow become stale. Stale subflows are not considered active: if all non backup subflows become stale, the MPTCP scheduler can pick backup subflows for plain transmissions. Stale subflows can return in active state, as soon as any reply from the peer is observed. Active backup scenarios can now leverage the available b/w with no restrinction. Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/207 Signed-off-by: Paolo Abeni <pabeni@redhat.com> Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
210 lines
4.4 KiB
C
210 lines
4.4 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/* Multipath TCP
|
|
*
|
|
* Copyright (c) 2019, Tessares SA.
|
|
*/
|
|
|
|
#ifdef CONFIG_SYSCTL
|
|
#include <linux/sysctl.h>
|
|
#endif
|
|
|
|
#include <net/net_namespace.h>
|
|
#include <net/netns/generic.h>
|
|
|
|
#include "protocol.h"
|
|
|
|
#define MPTCP_SYSCTL_PATH "net/mptcp"
|
|
|
|
static int mptcp_pernet_id;
|
|
struct mptcp_pernet {
|
|
#ifdef CONFIG_SYSCTL
|
|
struct ctl_table_header *ctl_table_hdr;
|
|
#endif
|
|
|
|
unsigned int add_addr_timeout;
|
|
unsigned int stale_loss_cnt;
|
|
u8 mptcp_enabled;
|
|
u8 checksum_enabled;
|
|
u8 allow_join_initial_addr_port;
|
|
};
|
|
|
|
static struct mptcp_pernet *mptcp_get_pernet(const struct net *net)
|
|
{
|
|
return net_generic(net, mptcp_pernet_id);
|
|
}
|
|
|
|
int mptcp_is_enabled(const struct net *net)
|
|
{
|
|
return mptcp_get_pernet(net)->mptcp_enabled;
|
|
}
|
|
|
|
unsigned int mptcp_get_add_addr_timeout(const struct net *net)
|
|
{
|
|
return mptcp_get_pernet(net)->add_addr_timeout;
|
|
}
|
|
|
|
int mptcp_is_checksum_enabled(const struct net *net)
|
|
{
|
|
return mptcp_get_pernet(net)->checksum_enabled;
|
|
}
|
|
|
|
int mptcp_allow_join_id0(const struct net *net)
|
|
{
|
|
return mptcp_get_pernet(net)->allow_join_initial_addr_port;
|
|
}
|
|
|
|
unsigned int mptcp_stale_loss_cnt(const struct net *net)
|
|
{
|
|
return mptcp_get_pernet(net)->stale_loss_cnt;
|
|
}
|
|
|
|
static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet)
|
|
{
|
|
pernet->mptcp_enabled = 1;
|
|
pernet->add_addr_timeout = TCP_RTO_MAX;
|
|
pernet->checksum_enabled = 0;
|
|
pernet->allow_join_initial_addr_port = 1;
|
|
pernet->stale_loss_cnt = 4;
|
|
}
|
|
|
|
#ifdef CONFIG_SYSCTL
|
|
static struct ctl_table mptcp_sysctl_table[] = {
|
|
{
|
|
.procname = "enabled",
|
|
.maxlen = sizeof(u8),
|
|
.mode = 0644,
|
|
/* users with CAP_NET_ADMIN or root (not and) can change this
|
|
* value, same as other sysctl or the 'net' tree.
|
|
*/
|
|
.proc_handler = proc_dou8vec_minmax,
|
|
.extra1 = SYSCTL_ZERO,
|
|
.extra2 = SYSCTL_ONE
|
|
},
|
|
{
|
|
.procname = "add_addr_timeout",
|
|
.maxlen = sizeof(unsigned int),
|
|
.mode = 0644,
|
|
.proc_handler = proc_dointvec_jiffies,
|
|
},
|
|
{
|
|
.procname = "checksum_enabled",
|
|
.maxlen = sizeof(u8),
|
|
.mode = 0644,
|
|
.proc_handler = proc_dou8vec_minmax,
|
|
.extra1 = SYSCTL_ZERO,
|
|
.extra2 = SYSCTL_ONE
|
|
},
|
|
{
|
|
.procname = "allow_join_initial_addr_port",
|
|
.maxlen = sizeof(u8),
|
|
.mode = 0644,
|
|
.proc_handler = proc_dou8vec_minmax,
|
|
.extra1 = SYSCTL_ZERO,
|
|
.extra2 = SYSCTL_ONE
|
|
},
|
|
{
|
|
.procname = "stale_loss_cnt",
|
|
.maxlen = sizeof(unsigned int),
|
|
.mode = 0644,
|
|
.proc_handler = proc_douintvec_minmax,
|
|
},
|
|
{}
|
|
};
|
|
|
|
static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
|
|
{
|
|
struct ctl_table_header *hdr;
|
|
struct ctl_table *table;
|
|
|
|
table = mptcp_sysctl_table;
|
|
if (!net_eq(net, &init_net)) {
|
|
table = kmemdup(table, sizeof(mptcp_sysctl_table), GFP_KERNEL);
|
|
if (!table)
|
|
goto err_alloc;
|
|
}
|
|
|
|
table[0].data = &pernet->mptcp_enabled;
|
|
table[1].data = &pernet->add_addr_timeout;
|
|
table[2].data = &pernet->checksum_enabled;
|
|
table[3].data = &pernet->allow_join_initial_addr_port;
|
|
table[4].data = &pernet->stale_loss_cnt;
|
|
|
|
hdr = register_net_sysctl(net, MPTCP_SYSCTL_PATH, table);
|
|
if (!hdr)
|
|
goto err_reg;
|
|
|
|
pernet->ctl_table_hdr = hdr;
|
|
|
|
return 0;
|
|
|
|
err_reg:
|
|
if (!net_eq(net, &init_net))
|
|
kfree(table);
|
|
err_alloc:
|
|
return -ENOMEM;
|
|
}
|
|
|
|
static void mptcp_pernet_del_table(struct mptcp_pernet *pernet)
|
|
{
|
|
struct ctl_table *table = pernet->ctl_table_hdr->ctl_table_arg;
|
|
|
|
unregister_net_sysctl_table(pernet->ctl_table_hdr);
|
|
|
|
kfree(table);
|
|
}
|
|
|
|
#else
|
|
|
|
static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static void mptcp_pernet_del_table(struct mptcp_pernet *pernet) {}
|
|
|
|
#endif /* CONFIG_SYSCTL */
|
|
|
|
static int __net_init mptcp_net_init(struct net *net)
|
|
{
|
|
struct mptcp_pernet *pernet = mptcp_get_pernet(net);
|
|
|
|
mptcp_pernet_set_defaults(pernet);
|
|
|
|
return mptcp_pernet_new_table(net, pernet);
|
|
}
|
|
|
|
/* Note: the callback will only be called per extra netns */
|
|
static void __net_exit mptcp_net_exit(struct net *net)
|
|
{
|
|
struct mptcp_pernet *pernet = mptcp_get_pernet(net);
|
|
|
|
mptcp_pernet_del_table(pernet);
|
|
}
|
|
|
|
static struct pernet_operations mptcp_pernet_ops = {
|
|
.init = mptcp_net_init,
|
|
.exit = mptcp_net_exit,
|
|
.id = &mptcp_pernet_id,
|
|
.size = sizeof(struct mptcp_pernet),
|
|
};
|
|
|
|
void __init mptcp_init(void)
|
|
{
|
|
mptcp_join_cookie_init();
|
|
mptcp_proto_init();
|
|
|
|
if (register_pernet_subsys(&mptcp_pernet_ops) < 0)
|
|
panic("Failed to register MPTCP pernet subsystem.\n");
|
|
}
|
|
|
|
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
|
|
int __init mptcpv6_init(void)
|
|
{
|
|
int err;
|
|
|
|
err = mptcp_proto_v6_init();
|
|
|
|
return err;
|
|
}
|
|
#endif
|