mainline inclusion from mainline-v6.1-rc1 commit 0227f058aa29f5ab6f6ec79c3a36ae41f1e03a13 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I76JHC CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/ne...
--------------------------------
Currently, SMC uses smc->sk.sk_{rcv|snd}buf to create buffers for send buffer and RMB. And the values of buffer size are from tcp_{w|r}mem in clcsock.
The buffer size from TCP socket doesn't fit SMC well. Generally, buffers are usually larger than TCP for SMC-R/-D to get higher performance, for they are different underlay devices and paths.
So this patch unbinds buffer size from TCP, and introduces two sysctl knobs to tune them independently. Also, these knobs are per net namespace and work for containers.
Signed-off-by: Tony Lu tonylu@linux.alibaba.com Signed-off-by: Paolo Abeni pabeni@redhat.com Signed-off-by: Litao Jiao jiaolitao@sangfor.com.cn --- Documentation/networking/smc-sysctl.rst | 18 ++++++++++++++++++ include/net/netns/smc.h | 2 ++ net/smc/af_smc.c | 5 +++-- net/smc/smc_core.c | 8 ++++---- net/smc/smc_sysctl.c | 21 ++++++++++++++++++++- 5 files changed, 47 insertions(+), 7 deletions(-)
diff --git a/Documentation/networking/smc-sysctl.rst b/Documentation/networking/smc-sysctl.rst index 3f0187ffc2a5..e38c92ab46f3 100644 --- a/Documentation/networking/smc-sysctl.rst +++ b/Documentation/networking/smc-sysctl.rst @@ -19,3 +19,21 @@ smcr_buf_type - INTEGER - 1 - Use virtually contiguous buffers - 2 - Mixed use of the two types. Try physically contiguous buffers first. If not available, use virtually contiguous buffers then. + +wmem - INTEGER + Initial size of send buffer used by SMC sockets. + The default value inherits from net.ipv4.tcp_wmem[1]. + + The minimum value is 16KiB and there is no hard limit for max value, but + only allowed 512KiB for SMC-R and 1MiB for SMC-D. + + Default: 16K + +rmem - INTEGER + Initial size of receive buffer (RMB) used by SMC sockets. + The default value inherits from net.ipv4.tcp_rmem[1]. + + The minimum value is 16KiB and there is no hard limit for max value, but + only allowed 512KiB for SMC-R and 1MiB for SMC-D. + + Default: 128K diff --git a/include/net/netns/smc.h b/include/net/netns/smc.h index 38396599938c..cded3f9a5081 100644 --- a/include/net/netns/smc.h +++ b/include/net/netns/smc.h @@ -7,5 +7,7 @@ struct netns_smc { struct ctl_table_header *smc_hdr; #endif unsigned int sysctl_smcr_buf_type; + int sysctl_wmem; + int sysctl_rmem; }; #endif diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 322bd6e3b932..074c57554f0b 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -249,6 +249,8 @@ static struct sock *smc_sock_alloc(struct net *net, struct socket *sock, sk->sk_state = SMC_INIT; sk->sk_destruct = smc_destruct; sk->sk_protocol = protocol; + WRITE_ONCE(sk->sk_sndbuf, READ_ONCE(net->smc.sysctl_wmem)); + WRITE_ONCE(sk->sk_rcvbuf, READ_ONCE(net->smc.sysctl_rmem)); smc = smc_sk(sk); INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work); INIT_WORK(&smc->connect_work, smc_connect_work); @@ -2528,8 +2530,7 @@ static int smc_create(struct net *net, struct socket *sock, int protocol, sk_common_release(sk); goto out; } - smc->sk.sk_sndbuf = max(smc->clcsock->sk->sk_sndbuf, SMC_BUF_MIN_SIZE); - smc->sk.sk_rcvbuf = max(smc->clcsock->sk->sk_rcvbuf, SMC_BUF_MIN_SIZE); +
out: return rc; diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 3722a1daac5b..525dd7543c60 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -1747,10 +1747,10 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
if (is_rmb) /* use socket recv buffer size (w/o overhead) as start value */ - sk_buf_size = smc->sk.sk_rcvbuf / 2; + sk_buf_size = smc->sk.sk_rcvbuf; else /* use socket send buffer size (w/o overhead) as start value */ - sk_buf_size = smc->sk.sk_sndbuf / 2; + sk_buf_size = smc->sk.sk_sndbuf;
for (bufsize_short = smc_compress_bufsize(sk_buf_size, is_smcd, is_rmb); bufsize_short >= 0; bufsize_short--) { @@ -1800,7 +1800,7 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb) if (is_rmb) { conn->rmb_desc = buf_desc; conn->rmbe_size_short = bufsize_short; - smc->sk.sk_rcvbuf = bufsize * 2; + smc->sk.sk_rcvbuf = bufsize; atomic_set(&conn->bytes_to_rcv, 0); conn->rmbe_update_limit = smc_rmb_wnd_update_limit(buf_desc->len); @@ -1808,7 +1808,7 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb) smc_ism_set_conn(conn); /* map RMB/smcd_dev to conn */ } else { conn->sndbuf_desc = buf_desc; - smc->sk.sk_sndbuf = bufsize * 2; + smc->sk.sk_sndbuf = bufsize; atomic_set(&conn->sndbuf_space, bufsize); } return 0; diff --git a/net/smc/smc_sysctl.c b/net/smc/smc_sysctl.c index 81faae0d1b7b..a7cf6411d583 100644 --- a/net/smc/smc_sysctl.c +++ b/net/smc/smc_sysctl.c @@ -18,6 +18,8 @@ #include "smc_core.h"
static int two = 2; +static int min_sndbuf = SMC_BUF_MIN_SIZE; +static int min_rcvbuf = SMC_BUF_MIN_SIZE;
static struct ctl_table smc_table[] = { { @@ -29,6 +31,22 @@ static struct ctl_table smc_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = &two, }, + { + .procname = "wmem", + .data = &init_net.smc.sysctl_wmem, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &min_sndbuf, + }, + { + .procname = "rmem", + .data = &init_net.smc.sysctl_rmem, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &min_rcvbuf, + }, { } };
@@ -53,7 +71,8 @@ int __net_init smc_sysctl_net_init(struct net *net) goto err_reg;
net->smc.sysctl_smcr_buf_type = SMCR_PHYS_CONT_BUFS; - + WRITE_ONCE(net->smc.sysctl_wmem, READ_ONCE(net->ipv4.sysctl_tcp_wmem[1])); + WRITE_ONCE(net->smc.sysctl_rmem, READ_ONCE(net->ipv4.sysctl_tcp_rmem[1])); return 0;
err_reg: