[PATCH openEuler-1.0-LTS 0/2] estimation_timer adjust

Add sysctl_run_estimation and sysctl_run_estimation_target to control execution of the est_timer callback in ipvs. sysctl_run_estimation If disabled, the estimation will be stop, and you can't see any update on speed estimation data. sysctl_run_estimation_target If not -1, the estimation running on the target CPU. Dust Li (1): ipvs: add sysctl_run_estimation to support disable estimation Orange9887 (1): ipvs: add sysctl_run_estimation_target to support estimation select CPU Documentation/networking/ipvs-sysctl.txt | 20 +++++++++++++ include/net/ip_vs.h | 22 ++++++++++++++ net/netfilter/ipvs/ip_vs_ctl.c | 38 ++++++++++++++++++++++++ net/netfilter/ipvs/ip_vs_est.c | 16 +++++++++- 4 files changed, 95 insertions(+), 1 deletion(-) -- 2.25.1

From: Dust Li <dust.li@linux.alibaba.com> mainline inclusion from mainline-v5.16-rc1 commit 2232642ec3fb4aad6ae4da1e109f55a0e7f2d204 category: feature bugzilla: NA CVE: NA --------------------------- estimation_timer will iterate the est_list to do estimation for each ipvs stats. When there are lots of services, the list can be very large. We found that estimation_timer() run for more then 200ms on a machine with 104 CPU and 50K services. yunhong-cgl jiang report the same phenomenon before: https://www.spinics.net/lists/lvs-devel/msg05426.html In some cases(for example a large K8S cluster with many ipvs services), ipvs estimation may not be needed. So adding a sysctl blob to allow users to disable this completely. Default is: 1 (enable) Cc: yunhong-cgl jiang <xintian1976@gmail.com> Signed-off-by: Dust Li <dust.li@linux.alibaba.com> Acked-by: Julian Anastasov <ja@ssi.bg> Acked-by: Simon Horman <horms@verge.net.au> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> Signed-off-by: Xiangwei Li <liwei728@huawei.com> --- Documentation/networking/ipvs-sysctl.txt | 11 +++++++++++ include/net/ip_vs.h | 11 +++++++++++ net/netfilter/ipvs/ip_vs_ctl.c | 8 ++++++++ net/netfilter/ipvs/ip_vs_est.c | 5 +++++ 4 files changed, 35 insertions(+) diff --git a/Documentation/networking/ipvs-sysctl.txt b/Documentation/networking/ipvs-sysctl.txt index 056898685d40..ec0b260fb409 100644 --- a/Documentation/networking/ipvs-sysctl.txt +++ b/Documentation/networking/ipvs-sysctl.txt @@ -292,3 +292,14 @@ sync_version - INTEGER Kernels with this sync_version entry are able to receive messages of both version 1 and version 2 of the synchronisation protocol. + +run_estimation - BOOLEAN + 0 - disabled + not 0 - enabled (default) + + If disabled, the estimation will be stop, and you can't see + any update on speed estimation data. + + You can always re-enable estimation by setting this value to 1. + But be careful, the first estimation after re-enable is not + accurate. diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index c31e54a41b5c..4ffc08670234 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -926,6 +926,7 @@ struct netns_ipvs { int sysctl_conn_reuse_mode; int sysctl_schedule_icmp; int sysctl_ignore_tunneled; + int sysctl_run_estimation; /* ip_vs_lblc */ int sysctl_lblc_expiration; @@ -1060,6 +1061,11 @@ static inline int sysctl_cache_bypass(struct netns_ipvs *ipvs) return ipvs->sysctl_cache_bypass; } +static inline int sysctl_run_estimation(struct netns_ipvs *ipvs) +{ + return ipvs->sysctl_run_estimation; +} + #else static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs) @@ -1147,6 +1153,11 @@ static inline int sysctl_cache_bypass(struct netns_ipvs *ipvs) return 0; } +static inline int sysctl_run_estimation(struct netns_ipvs *ipvs) +{ + return 1; +} + #endif /* IPVS core functions diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 6208fa09fe71..f3a745b0ef48 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1903,6 +1903,12 @@ static struct ctl_table vs_vars[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "run_estimation", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, #ifdef CONFIG_IP_VS_DEBUG { .procname = "debug_level", @@ -3955,6 +3961,8 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs) tbl[idx++].data = &ipvs->sysctl_conn_reuse_mode; tbl[idx++].data = &ipvs->sysctl_schedule_icmp; tbl[idx++].data = &ipvs->sysctl_ignore_tunneled; + ipvs->sysctl_run_estimation = 1; + tbl[idx++].data = &ipvs->sysctl_run_estimation; ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl); if (ipvs->sysctl_hdr == NULL) { diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c index 489055091a9b..338e04fa6fbe 100644 --- a/net/netfilter/ipvs/ip_vs_est.c +++ b/net/netfilter/ipvs/ip_vs_est.c @@ -104,6 +104,9 @@ static void estimation_timer(struct timer_list *t) u64 rate; struct netns_ipvs *ipvs = from_timer(ipvs, t, est_timer); + if (!sysctl_run_estimation(ipvs)) + goto skip; + spin_lock(&ipvs->est_lock); list_for_each_entry(e, &ipvs->est_list, list) { s = container_of(e, struct ip_vs_stats, est); @@ -135,6 +138,8 @@ static void estimation_timer(struct timer_list *t) spin_unlock(&s->lock); } spin_unlock(&ipvs->est_lock); + +skip: mod_timer(&ipvs->est_timer, jiffies + 2*HZ); } -- 2.25.1

From: Orange9887 <3488547067@qq.com> hulk inclusion category: feature bugzilla: NA CVE: NA ------------------------------------ By using sysctl_run_estimation_target, you can adjust the CPU on which the estimation is performed to prevent interference with business operations. Signed-off-by: Xiangwei Li <liwei728@huawei.com> --- Documentation/networking/ipvs-sysctl.txt | 9 +++++++ include/net/ip_vs.h | 11 +++++++++ net/netfilter/ipvs/ip_vs_ctl.c | 30 ++++++++++++++++++++++++ net/netfilter/ipvs/ip_vs_est.c | 11 ++++++++- 4 files changed, 60 insertions(+), 1 deletion(-) diff --git a/Documentation/networking/ipvs-sysctl.txt b/Documentation/networking/ipvs-sysctl.txt index ec0b260fb409..ff7389172569 100644 --- a/Documentation/networking/ipvs-sysctl.txt +++ b/Documentation/networking/ipvs-sysctl.txt @@ -303,3 +303,12 @@ run_estimation - BOOLEAN You can always re-enable estimation by setting this value to 1. But be careful, the first estimation after re-enable is not accurate. + +run_estimation_target - int + -1 - default + not -1 - run_estimation on target cpu + + If not -1, the estimation running on the target CPU. + + You can adjust the CPU where estimation is located to prevent + interference with business operations. diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 4ffc08670234..048172e0c80c 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -927,6 +927,7 @@ struct netns_ipvs { int sysctl_schedule_icmp; int sysctl_ignore_tunneled; int sysctl_run_estimation; + int sysctl_run_estimation_target; /* ip_vs_lblc */ int sysctl_lblc_expiration; @@ -1066,6 +1067,11 @@ static inline int sysctl_run_estimation(struct netns_ipvs *ipvs) return ipvs->sysctl_run_estimation; } +static inline int sysctl_run_estimation_target(struct netns_ipvs *ipvs) +{ + return ipvs->sysctl_run_estimation_target; +} + #else static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs) @@ -1158,6 +1164,11 @@ static inline int sysctl_run_estimation(struct netns_ipvs *ipvs) return 1; } +static inline int sysctl_run_estimation_target(struct netns_ipvs *ipvs) +{ + return -1; +} + #endif /* IPVS core functions diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index f3a745b0ef48..624fd6beb092 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1735,6 +1735,28 @@ proc_do_sync_ports(struct ctl_table *table, int write, return rc; } +static int +proc_dointvec_target(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + int rc, t; + int *target = table->data; + struct netns_ipvs *ipvs = + container_of(target, struct netns_ipvs, sysctl_run_estimation_target); + + t = *target; + + rc = proc_dointvec(table, write, buffer, lenp, ppos); + if (write && (*target >= nr_cpu_ids || *target < 0)) + *target = t; + else if (*target != t && cpu_online(*target)) { + del_timer_sync(&ipvs->est_timer); + add_timer_on(&ipvs->est_timer, *target); + } + + return rc; +} + /* * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/) * Do not change order or insert new entries without @@ -1909,6 +1931,12 @@ static struct ctl_table vs_vars[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "run_estimation_target", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_target, + }, #ifdef CONFIG_IP_VS_DEBUG { .procname = "debug_level", @@ -3963,6 +3991,8 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs) tbl[idx++].data = &ipvs->sysctl_ignore_tunneled; ipvs->sysctl_run_estimation = 1; tbl[idx++].data = &ipvs->sysctl_run_estimation; + ipvs->sysctl_run_estimation_target = -1; + tbl[idx++].data = &ipvs->sysctl_run_estimation_target; ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl); if (ipvs->sysctl_hdr == NULL) { diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c index 338e04fa6fbe..ef1743f69edc 100644 --- a/net/netfilter/ipvs/ip_vs_est.c +++ b/net/netfilter/ipvs/ip_vs_est.c @@ -195,10 +195,19 @@ void ip_vs_read_estimator(struct ip_vs_kstats *dst, struct ip_vs_stats *stats) int __net_init ip_vs_estimator_net_init(struct netns_ipvs *ipvs) { + int target; + INIT_LIST_HEAD(&ipvs->est_list); spin_lock_init(&ipvs->est_lock); + + target = sysctl_run_estimation_target(ipvs); timer_setup(&ipvs->est_timer, estimation_timer, 0); - mod_timer(&ipvs->est_timer, jiffies + 2 * HZ); + if (target > -1 && target < nr_cpu_ids && cpu_online(target)) { + ipvs->est_timer.expires = jiffies + 2 * HZ; + add_timer_on(&ipvs->est_timer, target); + } else + mod_timer(&ipvs->est_timer, jiffies + 2 * HZ); + return 0; } -- 2.25.1

反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/17274 邮件列表地址:https://mailweb.openeuler.org/archives/list/kernel@openeuler.org/message/PTT... FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/17274 Mailing list address: https://mailweb.openeuler.org/archives/list/kernel@openeuler.org/message/PTT...
participants (2)
-
patchwork bot
-
Xiangwei Li