forked from Minki/linux
0227f058aa
Currently, SMC uses smc->sk.sk_{rcv|snd}buf to create buffers for send buffer and RMB. And the values of buffer size are from tcp_{w|r}mem in clcsock. The buffer size from TCP socket doesn't fit SMC well. Generally, buffers are usually larger than TCP for SMC-R/-D to get higher performance, for they are different underlay devices and paths. So this patch unbinds buffer size from TCP, and introduces two sysctl knobs to tune them independently. Also, these knobs are per net namespace and work for containers. Signed-off-by: Tony Lu <tonylu@linux.alibaba.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
112 lines
2.6 KiB
C
112 lines
2.6 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* Shared Memory Communications over RDMA (SMC-R) and RoCE
|
|
*
|
|
* smc_sysctl.c: sysctl interface to SMC subsystem.
|
|
*
|
|
* Copyright (c) 2022, Alibaba Inc.
|
|
*
|
|
* Author: Tony Lu <tonylu@linux.alibaba.com>
|
|
*
|
|
*/
|
|
|
|
#include <linux/init.h>
|
|
#include <linux/sysctl.h>
|
|
#include <net/net_namespace.h>
|
|
|
|
#include "smc.h"
|
|
#include "smc_core.h"
|
|
#include "smc_llc.h"
|
|
#include "smc_sysctl.h"
|
|
|
|
static int min_sndbuf = SMC_BUF_MIN_SIZE;
|
|
static int min_rcvbuf = SMC_BUF_MIN_SIZE;
|
|
|
|
static struct ctl_table smc_table[] = {
|
|
{
|
|
.procname = "autocorking_size",
|
|
.data = &init_net.smc.sysctl_autocorking_size,
|
|
.maxlen = sizeof(unsigned int),
|
|
.mode = 0644,
|
|
.proc_handler = proc_douintvec,
|
|
},
|
|
{
|
|
.procname = "smcr_buf_type",
|
|
.data = &init_net.smc.sysctl_smcr_buf_type,
|
|
.maxlen = sizeof(unsigned int),
|
|
.mode = 0644,
|
|
.proc_handler = proc_douintvec_minmax,
|
|
.extra1 = SYSCTL_ZERO,
|
|
.extra2 = SYSCTL_TWO,
|
|
},
|
|
{
|
|
.procname = "smcr_testlink_time",
|
|
.data = &init_net.smc.sysctl_smcr_testlink_time,
|
|
.maxlen = sizeof(int),
|
|
.mode = 0644,
|
|
.proc_handler = proc_dointvec_jiffies,
|
|
},
|
|
{
|
|
.procname = "wmem",
|
|
.data = &init_net.smc.sysctl_wmem,
|
|
.maxlen = sizeof(int),
|
|
.mode = 0644,
|
|
.proc_handler = proc_dointvec_minmax,
|
|
.extra1 = &min_sndbuf,
|
|
},
|
|
{
|
|
.procname = "rmem",
|
|
.data = &init_net.smc.sysctl_rmem,
|
|
.maxlen = sizeof(int),
|
|
.mode = 0644,
|
|
.proc_handler = proc_dointvec_minmax,
|
|
.extra1 = &min_rcvbuf,
|
|
},
|
|
{ }
|
|
};
|
|
|
|
int __net_init smc_sysctl_net_init(struct net *net)
|
|
{
|
|
struct ctl_table *table;
|
|
|
|
table = smc_table;
|
|
if (!net_eq(net, &init_net)) {
|
|
int i;
|
|
|
|
table = kmemdup(table, sizeof(smc_table), GFP_KERNEL);
|
|
if (!table)
|
|
goto err_alloc;
|
|
|
|
for (i = 0; i < ARRAY_SIZE(smc_table) - 1; i++)
|
|
table[i].data += (void *)net - (void *)&init_net;
|
|
}
|
|
|
|
net->smc.smc_hdr = register_net_sysctl(net, "net/smc", table);
|
|
if (!net->smc.smc_hdr)
|
|
goto err_reg;
|
|
|
|
net->smc.sysctl_autocorking_size = SMC_AUTOCORKING_DEFAULT_SIZE;
|
|
net->smc.sysctl_smcr_buf_type = SMCR_PHYS_CONT_BUFS;
|
|
net->smc.sysctl_smcr_testlink_time = SMC_LLC_TESTLINK_DEFAULT_TIME;
|
|
WRITE_ONCE(net->smc.sysctl_wmem, READ_ONCE(net->ipv4.sysctl_tcp_wmem[1]));
|
|
WRITE_ONCE(net->smc.sysctl_rmem, READ_ONCE(net->ipv4.sysctl_tcp_rmem[1]));
|
|
|
|
return 0;
|
|
|
|
err_reg:
|
|
if (!net_eq(net, &init_net))
|
|
kfree(table);
|
|
err_alloc:
|
|
return -ENOMEM;
|
|
}
|
|
|
|
void __net_exit smc_sysctl_net_exit(struct net *net)
|
|
{
|
|
struct ctl_table *table;
|
|
|
|
table = net->smc.smc_hdr->ctl_table_arg;
|
|
unregister_net_sysctl_table(net->smc.smc_hdr);
|
|
if (!net_eq(net, &init_net))
|
|
kfree(table);
|
|
}
|