From: Lu Wei luwei32@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I7AO8G CVE: NA
--------------------------------
Commit 07f4c90062f8 ("tcp/dccp: try to not exhaust ip_local_port_range in connect()") allocates even ports for connect() first while leaving odd ports for bind() and this works well in busy servers.
But this strategy causes severe performance degradation in busy clients. when a client has used more than half of the local ports setted in proc/sys/net/ipv4/ip_local_port_range, if this client trys to connect to a server again, the connect time increases rapidly since it will traverse all the even ports though they are exhausted.
So this path provides another strategy by introducing a system option: local_port_allocation. If it is a busy client, users should set it to 1 to use sequential allocation while it should be set to 0 in other situations. Its default value is 0.
Signed-off-by: Lu Wei luwei32@huawei.com Signed-off-by: Liu Jian liujian56@huawei.com --- include/net/tcp.h | 1 + net/ipv4/inet_hashtables.c | 11 ++++++++--- net/ipv4/sysctl_net_ipv4.c | 8 ++++++++ 3 files changed, 17 insertions(+), 3 deletions(-)
diff --git a/include/net/tcp.h b/include/net/tcp.h index 9c86ff1e3ed1..1b832683ccf8 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -258,6 +258,7 @@ extern long sysctl_tcp_mem[3]; extern atomic_long_t tcp_memory_allocated; extern struct percpu_counter tcp_sockets_allocated; extern unsigned long tcp_memory_pressure; +extern int sysctl_local_port_allocation;
/* optimized version of sk_under_memory_pressure() for TCP sockets */ static inline bool tcp_under_memory_pressure(const struct sock *sk) diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index ed8324ee4b1b..396a58d9708c 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -742,7 +742,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, struct net *net = sock_net(sk); struct inet_bind_bucket *tb; u32 remaining, offset; - int ret, i, low, high; + int ret, i, low, high, span_size; int l3mdev; u32 index;
@@ -762,6 +762,11 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, local_bh_enable(); return ret; } + /* local_port_allocation 0 means even and odd port allocation strategy + * will be applied, so span size is 2; otherwise sequential allocation + * will be used and span size is 1. Default value is 0. + */ + span_size = sysctl_local_port_allocation ? 1 : 2;
l3mdev = inet_sk_bound_l3mdev(sk);
@@ -783,7 +788,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, offset &= ~1U; other_parity_scan: port = low + offset; - for (i = 0; i < remaining; i += 2, port += 2) { + for (i = 0; i < remaining; i += span_size, port += span_size) { if (unlikely(port >= high)) port -= remaining; if (inet_is_local_reserved_port(net, port)) @@ -824,7 +829,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, }
offset++; - if ((offset & 1) && remaining > 1) + if ((offset & 1) && remaining > 1 && span_size == 2) goto other_parity_scan;
return -EADDRNOTAVAIL; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index dee47ef05655..d82c0eceec91 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -51,6 +51,7 @@ static int comp_sack_nr_max = 255; static u32 u32_max_div_HZ = UINT_MAX / HZ; static int one_day_secs = 24 * 3600;
+int sysctl_local_port_allocation; /* obsolete */ static int sysctl_tcp_low_latency __read_mostly;
@@ -575,6 +576,13 @@ static struct ctl_table ipv4_table[] = { .extra2 = SYSCTL_ONE, }, #endif + { + .procname = "local_port_allocation", + .data = &sysctl_local_port_allocation, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { } };