From: Tang Jinyang tangjinyang@wxiat.com
Sunway inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I56QDM
--------------------------------
This support must work together with dynamic frequency scaling, which can dynamically turning on/off cores according to global system load.
Signed-off-by: Tang Jinyang tangjinyang@wxiat.com
Signed-off-by: Gu Zitao guzitao@wxiat.com Acked-by: Xie XiuQi xiexiuqi@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- arch/sw_64/Kconfig | 7 + arch/sw_64/include/asm/clock.h | 5 + arch/sw_64/include/asm/cputime.h | 4 +- arch/sw_64/kernel/Makefile | 5 +- arch/sw_64/kernel/clock.c | 8 + arch/sw_64/kernel/cpuautoplug.c | 496 +++++++++++++++++++++++++++++++ drivers/cpufreq/sw64_cpufreq.c | 2 + 7 files changed, 524 insertions(+), 3 deletions(-) create mode 100644 arch/sw_64/kernel/cpuautoplug.c
diff --git a/arch/sw_64/Kconfig b/arch/sw_64/Kconfig index 88ffc98c779e..45e411d7ca69 100644 --- a/arch/sw_64/Kconfig +++ b/arch/sw_64/Kconfig @@ -270,6 +270,13 @@ config SW64_CPUFREQ help Turns on the interface for SW64_CPU Frequency.
+config SW64_CPUAUTOPLUG + bool "sw64 CPU Autoplug interface" + depends on SW64_CPUFREQ + default y + help + Turns on the interface for SW64_CPU CPUAUTOPLUG. + config CPU_FREQ_GOV_ATTR_SET bool
diff --git a/arch/sw_64/include/asm/clock.h b/arch/sw_64/include/asm/clock.h index 437031a65ff1..06ad4bcd6ad3 100644 --- a/arch/sw_64/include/asm/clock.h +++ b/arch/sw_64/include/asm/clock.h @@ -13,6 +13,8 @@ struct clk;
extern struct cpufreq_frequency_table sw64_clockmod_table[];
+extern char curruent_policy[CPUFREQ_NAME_LEN]; + struct clk_ops { void (*init)(struct clk *clk); void (*enable)(struct clk *clk); @@ -45,7 +47,10 @@ int clk_init(void); int sw64_set_rate(int index, unsigned long rate);
struct clk *sw64_clk_get(struct device *dev, const char *id); + unsigned long sw64_clk_get_rate(struct clk *clk);
void sw64_update_clockevents(unsigned long cpu, u32 freq); + +void sw64_store_policy(struct cpufreq_policy *policy); #endif /* _ASM_SW64_CLOCK_H */ diff --git a/arch/sw_64/include/asm/cputime.h b/arch/sw_64/include/asm/cputime.h index bada5a01d887..cdd46b05e228 100644 --- a/arch/sw_64/include/asm/cputime.h +++ b/arch/sw_64/include/asm/cputime.h @@ -2,6 +2,8 @@ #ifndef _ASM_SW64_CPUTIME_H #define _ASM_SW64_CPUTIME_H
-#include <asm-generic/cputime.h> +typedef u64 __nocast cputime64_t; + +#define jiffies64_to_cputime64(__jif) ((__force cputime64_t)(__jif))
#endif /* _ASM_SW64_CPUTIME_H */ diff --git a/arch/sw_64/kernel/Makefile b/arch/sw_64/kernel/Makefile index 628e0c1bd44b..f6a2813b0466 100644 --- a/arch/sw_64/kernel/Makefile +++ b/arch/sw_64/kernel/Makefile @@ -15,7 +15,7 @@ endif
obj-y := entry.o traps.o process.o sys_sw64.o irq.o \ irq_sw64.o signal.o setup.o ptrace.o time.o \ - systbls.o dup_print.o tc.o platform.o \ + systbls.o dup_print.o tc.o \ insn.o early_init.o topology.o cacheinfo.o \ vdso.o vdso/
@@ -44,7 +44,8 @@ endif
# Core logic support obj-$(CONFIG_SW64) += core.o timer.o -obj-$(CONFIG_SW64_CPUFREQ) += clock.o +obj-$(CONFIG_SW64_CPUFREQ) += platform.o clock.o +obj-$(CONFIG_SW64_CPUAUTOPLUG) += cpuautoplug.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o diff --git a/arch/sw_64/kernel/clock.c b/arch/sw_64/kernel/clock.c index fd154a0a66e9..f31f596a0052 100644 --- a/arch/sw_64/kernel/clock.c +++ b/arch/sw_64/kernel/clock.c @@ -29,6 +29,8 @@ #define CORE_PLL0_CFG_SHIFT 4 #define CORE_PLL2_CFG_SHIFT 18
+char curruent_policy[CPUFREQ_NAME_LEN]; + /* Minimum CLK support */ enum { DC_0, DC_1, DC_2, DC_3, DC_4, DC_5, DC_6, DC_7, DC_8, @@ -116,6 +118,12 @@ unsigned long sw64_clk_get_rate(struct clk *clk) } EXPORT_SYMBOL(sw64_clk_get_rate);
+void sw64_store_policy(struct cpufreq_policy *policy) +{ + memcpy(curruent_policy, policy->governor->name, CPUFREQ_NAME_LEN); +} +EXPORT_SYMBOL_GPL(sw64_store_policy); + int sw64_set_rate(int index, unsigned long rate) { unsigned int i, val; diff --git a/arch/sw_64/kernel/cpuautoplug.c b/arch/sw_64/kernel/cpuautoplug.c new file mode 100644 index 000000000000..de6f77086185 --- /dev/null +++ b/arch/sw_64/kernel/cpuautoplug.c @@ -0,0 +1,496 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * linux/arch/sw/kernel/setup.c + * + * Copyright (C) 1995 Linus Torvalds + */ + +#include <linux/module.h> +#include <linux/cpu.h> +#include <linux/sched.h> +#include <linux/tick.h> +#include <linux/kernel_stat.h> +#include <linux/platform_device.h> + +#include <asm/clock.h> +#include <asm/cputime.h> +#include <asm/smp.h> + +int autoplug_enabled; +int autoplug_verbose; +int autoplug_adjusting; + +DEFINE_PER_CPU(int, cpu_adjusting); + +struct cpu_autoplug_info { + cputime64_t prev_idle; + cputime64_t prev_wall; + struct delayed_work work; + unsigned int sampling_rate; + int maxcpus; /* max cpus for autoplug */ + int mincpus; /* min cpus for autoplug */ + int dec_reqs; /* continuous core-decreasing requests */ + int inc_reqs; /* continuous core-increasing requests */ +}; + +struct cpu_autoplug_info ap_info; + +static ssize_t enabled_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", autoplug_enabled); +} + + +static ssize_t enabled_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + char val[5]; + int n; + + memcpy(val, buf, count); + n = kstrtol(val, 0, 0); + + if (n > 1 || n < 0) + return -EINVAL; + + autoplug_enabled = n; + + return count; +} + +static ssize_t verbose_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", autoplug_verbose); +} + +static ssize_t verbose_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + char val[5]; + int n; + + memcpy(val, buf, count); + n = kstrtol(val, 0, 0); + + if (n > 1 || n < 0) + return -EINVAL; + + autoplug_verbose = n; + + return count; +} + +static ssize_t maxcpus_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", ap_info.maxcpus); +} + +static ssize_t maxcpus_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + char val[5]; + int n; + + memcpy(val, buf, count); + n = kstrtol(val, 0, 0); + + if (n > num_possible_cpus() || n < ap_info.mincpus) + return -EINVAL; + + ap_info.maxcpus = n; + + return count; +} + +static ssize_t mincpus_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", ap_info.mincpus); +} + +static ssize_t mincpus_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + char val[5]; + int n; + + memcpy(val, buf, count); + n = kstrtol(val, 0, 0); + + if (n > ap_info.maxcpus || n < 1) + return -EINVAL; + + ap_info.mincpus = n; + + return count; +} + +static ssize_t sampling_rate_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", ap_info.sampling_rate); +} + +#define SAMPLING_RATE_MAX 1000 +#define SAMPLING_RATE_MIN 600 + +static ssize_t sampling_rate_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + char val[6]; + int n; + + memcpy(val, buf, count); + n = kstrtol(val, 0, 0); + + if (n > SAMPLING_RATE_MAX || n < SAMPLING_RATE_MIN) + return -EINVAL; + + ap_info.sampling_rate = n; + + return count; +} + +static ssize_t available_value_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "enabled: 0-1\nverbose: 0-1\nmaxcpus:" + "1-%d\nmincpus: 1-%d\nsampling_rate: %d-%d\n", + num_possible_cpus(), num_possible_cpus(), + SAMPLING_RATE_MIN, SAMPLING_RATE_MAX); +} + +static DEVICE_ATTR_RW(enabled); +static DEVICE_ATTR_RW(verbose); +static DEVICE_ATTR_RW(maxcpus); +static DEVICE_ATTR_RW(mincpus); +static DEVICE_ATTR_RW(sampling_rate); +static DEVICE_ATTR(available_value, 0644, available_value_show, NULL); + +static struct attribute *cpuclass_default_attrs[] = { + &dev_attr_enabled.attr, + &dev_attr_verbose.attr, + &dev_attr_maxcpus.attr, + &dev_attr_mincpus.attr, + &dev_attr_sampling_rate.attr, + &dev_attr_available_value.attr, + NULL +}; + +static struct attribute_group cpuclass_attr_group = { + .attrs = cpuclass_default_attrs, + .name = "cpuautoplug", +}; + +#ifndef MODULE +static int __init setup_autoplug(char *str) +{ + if (!strcmp(str, "off")) + autoplug_enabled = 0; + else if (!strcmp(str, "on")) + autoplug_enabled = 1; + else + return 0; + return 1; +} + +__setup("autoplug=", setup_autoplug); +#endif + +static cputime64_t calc_busy_time(unsigned int cpu) +{ + cputime64_t busy_time; + + busy_time = kcpustat_cpu(cpu).cpustat[CPUTIME_USER]; + busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SYSTEM]; + busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_IRQ]; + busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SOFTIRQ]; + busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_STEAL]; + busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_NICE]; + busy_time += 1; + + return busy_time; +} + +static inline cputime64_t get_idle_time_jiffy(cputime64_t *wall) +{ + unsigned int cpu; + cputime64_t idle_time = 0; + cputime64_t cur_wall_time; + cputime64_t busy_time; + + cur_wall_time = jiffies64_to_cputime64(get_jiffies_64()); + + for_each_online_cpu(cpu) { + busy_time = calc_busy_time(cpu); + + idle_time += cur_wall_time - busy_time; + } + + if (wall) + *wall = (cputime64_t)jiffies_to_usecs(cur_wall_time); + + return (cputime64_t)jiffies_to_usecs(idle_time); +} + +static inline cputime64_t sw64_get_idle_time(cputime64_t *wall) +{ + unsigned int cpu; + u64 idle_time = 0; + + for_each_online_cpu(cpu) { + idle_time += get_cpu_idle_time_us(cpu, wall); + if (idle_time == -1ULL) + return get_idle_time_jiffy(wall); + } + + return idle_time; +} + +static cputime64_t get_min_busy_time(cputime64_t arr[], int size) +{ + int loop, min_idx; + cputime64_t min_time = arr[0]; + + for (loop = 1; loop < size; loop++) { + if (arr[loop] > 0) { + if (arr[loop] < min_time) { + min_time = arr[loop]; + min_idx = loop; + } + } + } + return min_idx; +} + +static int find_min_busy_cpu(void) +{ + int nr_all_cpus = num_possible_cpus(); + unsigned int cpus, target_cpu; + cputime64_t busy_time; + cputime64_t b_time[nr_all_cpus]; + + memset(b_time, 0, sizeof(b_time)); + for_each_online_cpu(cpus) { + busy_time = calc_busy_time(cpus); + b_time[cpus] = busy_time; + } + target_cpu = get_min_busy_time(b_time, nr_all_cpus); + pr_info("The target_cpu is %d, the cpu_num is %d\n", + target_cpu, num_online_cpus() - 1); + return target_cpu; +} + +static void increase_cores(int cur_cpus) +{ + if (cur_cpus == ap_info.maxcpus) + return; + + cur_cpus = cpumask_next_zero(0, cpu_online_mask); + + struct device *dev = get_cpu_device(cur_cpus); + + per_cpu(cpu_adjusting, dev->id) = 1; + lock_device_hotplug(); + cpu_device_up(dev); + pr_info("The target_cpu is %d, After cpu_up, the cpu_num is %d\n", + dev->id, num_online_cpus()); + get_cpu_device(dev->id)->offline = false; + unlock_device_hotplug(); + per_cpu(cpu_adjusting, dev->id) = 0; +} + +static void decrease_cores(int cur_cpus) +{ + if (cur_cpus == ap_info.mincpus) + return; + + cur_cpus = find_min_busy_cpu(); + + struct device *dev = get_cpu_device(cur_cpus); + + if (dev->id > 0) { + per_cpu(cpu_adjusting, dev->id) = -1; + lock_device_hotplug(); + cpu_device_down(dev); + get_cpu_device(dev->id)->offline = true; + unlock_device_hotplug(); + per_cpu(cpu_adjusting, dev->id) = 0; + } +} + +#define INC_THRESHOLD 80 +#define DEC_THRESHOLD 40 + +static void do_autoplug_timer(struct work_struct *work) +{ + cputime64_t cur_wall_time = 0, cur_idle_time; + unsigned long idle_time, wall_time; + int delay, load; + int nr_cur_cpus = num_online_cpus(); + int nr_all_cpus = num_possible_cpus(); + int inc_req = 1, dec_req = 2; + + ap_info.maxcpus = + setup_max_cpus > nr_cpu_ids ? nr_cpu_ids : setup_max_cpus; + ap_info.mincpus = ap_info.maxcpus / 4; + + if (strcmp(curruent_policy, "performance") == 0) { + ap_info.mincpus = ap_info.maxcpus; + } else if (strcmp(curruent_policy, "powersave") == 0) { + ap_info.maxcpus = ap_info.mincpus; + } else if (strcmp(curruent_policy, "ondemand") == 0) { + ap_info.sampling_rate = 500; + inc_req = 0; + dec_req = 2; + } else if (strcmp(curruent_policy, "conservative") == 0) { + inc_req = 1; + dec_req = 3; + ap_info.sampling_rate = 1000; /* 1s */ + } + + BUG_ON(smp_processor_id() != 0); + delay = msecs_to_jiffies(ap_info.sampling_rate); + if (!autoplug_enabled || system_state != SYSTEM_RUNNING) + goto out; + + autoplug_adjusting = 1; + + if (nr_cur_cpus > ap_info.maxcpus) { + decrease_cores(nr_cur_cpus); + autoplug_adjusting = 0; + goto out; + } + if (nr_cur_cpus < ap_info.mincpus) { + increase_cores(nr_cur_cpus); + autoplug_adjusting = 0; + goto out; + } + + cur_idle_time = sw64_get_idle_time(&cur_wall_time); + if (cur_wall_time == 0) + cur_wall_time = jiffies64_to_cputime64(get_jiffies_64()); + + wall_time = (unsigned int)(cur_wall_time - ap_info.prev_wall); + ap_info.prev_wall = cur_wall_time; + + idle_time = (unsigned int)(cur_idle_time - ap_info.prev_idle); + idle_time += wall_time * (nr_all_cpus - nr_cur_cpus); + ap_info.prev_wall = cur_idle_time; + + if (unlikely(!wall_time || wall_time * nr_all_cpus < idle_time)) { + autoplug_adjusting = 0; + goto out; + } + + load = 100 * (wall_time * nr_all_cpus - idle_time) / wall_time; + + if (load < (nr_cur_cpus - 1) * 100 - DEC_THRESHOLD) { + ap_info.inc_reqs = 0; + if (ap_info.dec_reqs < dec_req) + ap_info.dec_reqs++; + else { + ap_info.dec_reqs = 0; + decrease_cores(nr_cur_cpus); + } + } else { + ap_info.dec_reqs = 0; + if (load > (nr_cur_cpus - 1) * 100 + INC_THRESHOLD) { + if (ap_info.inc_reqs < inc_req) + ap_info.inc_reqs++; + else { + ap_info.inc_reqs = 0; + increase_cores(nr_cur_cpus); + } + } + } + + autoplug_adjusting = 0; +out: + schedule_delayed_work_on(0, &ap_info.work, delay); +} + +static struct platform_device_id platform_device_ids[] = { + { + .name = "sw64_cpuautoplug", + }, + {} +}; + +MODULE_DEVICE_TABLE(platform, platform_device_ids); + +static struct platform_driver platform_driver = { + .driver = { + .name = "sw64_cpuautoplug", + .owner = THIS_MODULE, + }, + .id_table = platform_device_ids, +}; + +static int __init cpuautoplug_init(void) +{ + int i, ret, delay; + + ret = sysfs_create_group(&cpu_subsys.dev_root->kobj, + &cpuclass_attr_group); + if (ret) + return ret; + + ret = platform_driver_register(&platform_driver); + if (ret) + return ret; + + pr_info("cpuautoplug: SW64 CPU autoplug driver.\n"); + + ap_info.maxcpus = + setup_max_cpus > nr_cpu_ids ? nr_cpu_ids : setup_max_cpus; + ap_info.mincpus = 16; + ap_info.dec_reqs = 0; + ap_info.inc_reqs = 0; + ap_info.sampling_rate = 720; /* 720ms */ + if (setup_max_cpus == 0) { /* boot with npsmp */ + ap_info.maxcpus = 1; + autoplug_enabled = 0; + } + if (setup_max_cpus > num_possible_cpus()) + ap_info.maxcpus = num_possible_cpus(); + + pr_info("mincpu = %d, maxcpu = %d, autoplug_enabled = %d, rate = %d\n", + ap_info.mincpus, ap_info.maxcpus, autoplug_enabled, + ap_info.sampling_rate); + + for_each_possible_cpu(i) + per_cpu(cpu_adjusting, i) = 0; +#ifndef MODULE + delay = msecs_to_jiffies(ap_info.sampling_rate * 24); +#else + delay = msecs_to_jiffies(ap_info.sampling_rate * 8); +#endif + INIT_DEFERRABLE_WORK(&ap_info.work, do_autoplug_timer); + schedule_delayed_work_on(0, &ap_info.work, delay); + + if (!autoplug_enabled) + cancel_delayed_work_sync(&ap_info.work); + + return ret; +} + +static void __exit cpuautoplug_exit(void) +{ + cancel_delayed_work_sync(&ap_info.work); + platform_driver_unregister(&platform_driver); + sysfs_remove_group(&cpu_subsys.dev_root->kobj, &cpuclass_attr_group); +} + +late_initcall(cpuautoplug_init); +module_exit(cpuautoplug_exit); + +MODULE_DESCRIPTION("cpuautoplug driver for SW64"); diff --git a/drivers/cpufreq/sw64_cpufreq.c b/drivers/cpufreq/sw64_cpufreq.c index ba180e389c9e..5f49b5175d34 100644 --- a/drivers/cpufreq/sw64_cpufreq.c +++ b/drivers/cpufreq/sw64_cpufreq.c @@ -72,6 +72,8 @@ static int sw64_cpufreq_target(struct cpufreq_policy *policy,
freq = (get_cpu_freq() / 1000) * index / 48;
+ sw64_store_policy(policy); + /* setting the cpu frequency */ sw64_set_rate(-1, freq * 1000);