[PATCH openEuler-5.10 71/90] sw64: add dynamic turning on/off cores support

7 Jun 2022

From: Tang Jinyang <tangjinyang@wxiat.com>

Sunway inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I56QDM

--------------------------------

This support must work together with dynamic frequency scaling, which
can dynamically turning on/off cores according to global system load.

Signed-off-by: Tang Jinyang <tangjinyang@wxiat.com>

Signed-off-by: Gu Zitao <guzitao@wxiat.com>
Acked-by: Xie XiuQi <xiexiuqi@huawei.com>
Signed-off-by: Zheng Zengkai <zhengzengkai@huawei.com>
---
 arch/sw_64/Kconfig               |   7 +
 arch/sw_64/include/asm/clock.h   |   5 +
 arch/sw_64/include/asm/cputime.h |   4 +-
 arch/sw_64/kernel/Makefile       |   5 +-
 arch/sw_64/kernel/clock.c        |   8 +
 arch/sw_64/kernel/cpuautoplug.c  | 496 +++++++++++++++++++++++++++++++
 drivers/cpufreq/sw64_cpufreq.c   |   2 +
 7 files changed, 524 insertions(+), 3 deletions(-)
 create mode 100644 arch/sw_64/kernel/cpuautoplug.c

diff --git a/arch/sw_64/Kconfig b/arch/sw_64/Kconfig
index 88ffc98c779e..45e411d7ca69 100644
--- a/arch/sw_64/Kconfig
+++ b/arch/sw_64/Kconfig
@@ -270,6 +270,13 @@ config SW64_CPUFREQ
         help
           Turns on the interface for SW64_CPU Frequency.
 
+config SW64_CPUAUTOPLUG
+	bool "sw64 CPU Autoplug interface"
+	depends on SW64_CPUFREQ
+	default y
+	help
+          Turns on the interface for SW64_CPU CPUAUTOPLUG.
+
 config CPU_FREQ_GOV_ATTR_SET
 	bool
 
diff --git a/arch/sw_64/include/asm/clock.h b/arch/sw_64/include/asm/clock.h
index 437031a65ff1..06ad4bcd6ad3 100644
--- a/arch/sw_64/include/asm/clock.h
+++ b/arch/sw_64/include/asm/clock.h
@@ -13,6 +13,8 @@ struct clk;
 
 extern struct cpufreq_frequency_table sw64_clockmod_table[];
 
+extern char curruent_policy[CPUFREQ_NAME_LEN];
+
 struct clk_ops {
 	void (*init)(struct clk *clk);
 	void (*enable)(struct clk *clk);
@@ -45,7 +47,10 @@ int clk_init(void);
 int sw64_set_rate(int index, unsigned long rate);
 
 struct clk *sw64_clk_get(struct device *dev, const char *id);
+
 unsigned long sw64_clk_get_rate(struct clk *clk);
 
 void sw64_update_clockevents(unsigned long cpu, u32 freq);
+
+void sw64_store_policy(struct cpufreq_policy *policy);
 #endif				/* _ASM_SW64_CLOCK_H */
diff --git a/arch/sw_64/include/asm/cputime.h b/arch/sw_64/include/asm/cputime.h
index bada5a01d887..cdd46b05e228 100644
--- a/arch/sw_64/include/asm/cputime.h
+++ b/arch/sw_64/include/asm/cputime.h
@@ -2,6 +2,8 @@
 #ifndef _ASM_SW64_CPUTIME_H
 #define _ASM_SW64_CPUTIME_H
 
-#include <asm-generic/cputime.h>
+typedef u64 __nocast cputime64_t;
+
+#define jiffies64_to_cputime64(__jif)  ((__force cputime64_t)(__jif))
 
 #endif /* _ASM_SW64_CPUTIME_H */
diff --git a/arch/sw_64/kernel/Makefile b/arch/sw_64/kernel/Makefile
index 628e0c1bd44b..f6a2813b0466 100644
--- a/arch/sw_64/kernel/Makefile
+++ b/arch/sw_64/kernel/Makefile
@@ -15,7 +15,7 @@ endif
 
 obj-y    := entry.o traps.o process.o sys_sw64.o irq.o \
 	    irq_sw64.o signal.o setup.o ptrace.o time.o \
-	    systbls.o dup_print.o tc.o platform.o \
+	    systbls.o dup_print.o tc.o \
 	    insn.o early_init.o topology.o cacheinfo.o \
 	    vdso.o vdso/
 
@@ -44,7 +44,8 @@ endif
 
 # Core logic support
 obj-$(CONFIG_SW64)	+= core.o timer.o
-obj-$(CONFIG_SW64_CPUFREQ) += clock.o
+obj-$(CONFIG_SW64_CPUFREQ) += platform.o clock.o
+obj-$(CONFIG_SW64_CPUAUTOPLUG) += cpuautoplug.o
 
 obj-$(CONFIG_CRASH_DUMP)    += crash_dump.o
 obj-$(CONFIG_KEXEC)		+= machine_kexec.o relocate_kernel.o
diff --git a/arch/sw_64/kernel/clock.c b/arch/sw_64/kernel/clock.c
index fd154a0a66e9..f31f596a0052 100644
--- a/arch/sw_64/kernel/clock.c
+++ b/arch/sw_64/kernel/clock.c
@@ -29,6 +29,8 @@
 #define CORE_PLL0_CFG_SHIFT     4
 #define CORE_PLL2_CFG_SHIFT     18
 
+char   curruent_policy[CPUFREQ_NAME_LEN];
+
 /* Minimum CLK support */
 enum {
 	DC_0, DC_1, DC_2, DC_3, DC_4, DC_5, DC_6, DC_7, DC_8,
@@ -116,6 +118,12 @@ unsigned long sw64_clk_get_rate(struct clk *clk)
 }
 EXPORT_SYMBOL(sw64_clk_get_rate);
 
+void sw64_store_policy(struct cpufreq_policy *policy)
+{
+	memcpy(curruent_policy, policy->governor->name, CPUFREQ_NAME_LEN);
+}
+EXPORT_SYMBOL_GPL(sw64_store_policy);
+
 int sw64_set_rate(int index, unsigned long rate)
 {
 	unsigned int i, val;
diff --git a/arch/sw_64/kernel/cpuautoplug.c b/arch/sw_64/kernel/cpuautoplug.c
new file mode 100644
index 000000000000..de6f77086185
--- /dev/null
+++ b/arch/sw_64/kernel/cpuautoplug.c
@@ -0,0 +1,496 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  linux/arch/sw/kernel/setup.c
+ *
+ *  Copyright (C) 1995  Linus Torvalds
+ */
+
+#include <linux/module.h>
+#include <linux/cpu.h>
+#include <linux/sched.h>
+#include <linux/tick.h>
+#include <linux/kernel_stat.h>
+#include <linux/platform_device.h>
+
+#include <asm/clock.h>
+#include <asm/cputime.h>
+#include <asm/smp.h>
+
+int autoplug_enabled;
+int autoplug_verbose;
+int autoplug_adjusting;
+
+DEFINE_PER_CPU(int, cpu_adjusting);
+
+struct cpu_autoplug_info {
+	cputime64_t prev_idle;
+	cputime64_t prev_wall;
+	struct delayed_work work;
+	unsigned int sampling_rate;
+	int maxcpus;   /* max cpus for autoplug */
+	int mincpus;   /* min cpus for autoplug */
+	int dec_reqs;  /* continuous core-decreasing requests */
+	int inc_reqs;  /* continuous core-increasing requests */
+};
+
+struct cpu_autoplug_info ap_info;
+
+static ssize_t enabled_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", autoplug_enabled);
+}
+
+
+static ssize_t enabled_store(struct device *dev,
+		struct device_attribute *attr,
+		const char *buf, size_t count)
+{
+	char val[5];
+	int n;
+
+	memcpy(val, buf, count);
+	n = kstrtol(val, 0, 0);
+
+	if (n > 1 || n < 0)
+		return -EINVAL;
+
+	autoplug_enabled = n;
+
+	return count;
+}
+
+static ssize_t verbose_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", autoplug_verbose);
+}
+
+static ssize_t verbose_store(struct device *dev,
+		struct device_attribute *attr,
+		const char *buf, size_t count)
+{
+	char val[5];
+	int n;
+
+	memcpy(val, buf, count);
+	n = kstrtol(val, 0, 0);
+
+	if (n > 1 || n < 0)
+		return -EINVAL;
+
+	autoplug_verbose = n;
+
+	return count;
+}
+
+static ssize_t maxcpus_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", ap_info.maxcpus);
+}
+
+static ssize_t maxcpus_store(struct device *dev,
+		struct device_attribute *attr,
+		const char *buf, size_t count)
+{
+	char val[5];
+	int n;
+
+	memcpy(val, buf, count);
+	n = kstrtol(val, 0, 0);
+
+	if (n > num_possible_cpus() || n < ap_info.mincpus)
+		return -EINVAL;
+
+	ap_info.maxcpus = n;
+
+	return count;
+}
+
+static ssize_t mincpus_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", ap_info.mincpus);
+}
+
+static ssize_t mincpus_store(struct device *dev,
+		struct device_attribute *attr,
+		const char *buf, size_t count)
+{
+	char val[5];
+	int n;
+
+	memcpy(val, buf, count);
+	n = kstrtol(val, 0, 0);
+
+	if (n > ap_info.maxcpus || n < 1)
+		return -EINVAL;
+
+	ap_info.mincpus = n;
+
+	return count;
+}
+
+static ssize_t sampling_rate_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", ap_info.sampling_rate);
+}
+
+#define SAMPLING_RATE_MAX 1000
+#define SAMPLING_RATE_MIN 600
+
+static ssize_t sampling_rate_store(struct device *dev,
+		struct device_attribute *attr,
+		const char *buf, size_t count)
+{
+	char val[6];
+	int n;
+
+	memcpy(val, buf, count);
+	n = kstrtol(val, 0, 0);
+
+	if (n > SAMPLING_RATE_MAX || n < SAMPLING_RATE_MIN)
+		return -EINVAL;
+
+	ap_info.sampling_rate = n;
+
+	return count;
+}
+
+static ssize_t available_value_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "enabled: 0-1\nverbose: 0-1\nmaxcpus:"
+			"1-%d\nmincpus: 1-%d\nsampling_rate: %d-%d\n",
+			num_possible_cpus(), num_possible_cpus(),
+			SAMPLING_RATE_MIN, SAMPLING_RATE_MAX);
+}
+
+static DEVICE_ATTR_RW(enabled);
+static DEVICE_ATTR_RW(verbose);
+static DEVICE_ATTR_RW(maxcpus);
+static DEVICE_ATTR_RW(mincpus);
+static DEVICE_ATTR_RW(sampling_rate);
+static DEVICE_ATTR(available_value, 0644, available_value_show, NULL);
+
+static struct attribute *cpuclass_default_attrs[] = {
+	&dev_attr_enabled.attr,
+	&dev_attr_verbose.attr,
+	&dev_attr_maxcpus.attr,
+	&dev_attr_mincpus.attr,
+	&dev_attr_sampling_rate.attr,
+	&dev_attr_available_value.attr,
+	NULL
+};
+
+static struct attribute_group cpuclass_attr_group = {
+	.attrs = cpuclass_default_attrs,
+	.name = "cpuautoplug",
+};
+
+#ifndef MODULE
+static int __init setup_autoplug(char *str)
+{
+	if (!strcmp(str, "off"))
+		autoplug_enabled = 0;
+	else if (!strcmp(str, "on"))
+		autoplug_enabled = 1;
+	else
+		return 0;
+	return 1;
+}
+
+__setup("autoplug=", setup_autoplug);
+#endif
+
+static cputime64_t calc_busy_time(unsigned int cpu)
+{
+	cputime64_t busy_time;
+
+	busy_time = kcpustat_cpu(cpu).cpustat[CPUTIME_USER];
+	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SYSTEM];
+	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_IRQ];
+	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SOFTIRQ];
+	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_STEAL];
+	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_NICE];
+	busy_time += 1;
+
+	return busy_time;
+}
+
+static inline cputime64_t get_idle_time_jiffy(cputime64_t *wall)
+{
+	unsigned int cpu;
+	cputime64_t idle_time = 0;
+	cputime64_t cur_wall_time;
+	cputime64_t busy_time;
+
+	cur_wall_time = jiffies64_to_cputime64(get_jiffies_64());
+
+	for_each_online_cpu(cpu) {
+		busy_time = calc_busy_time(cpu);
+
+		idle_time += cur_wall_time - busy_time;
+	}
+
+	if (wall)
+		*wall = (cputime64_t)jiffies_to_usecs(cur_wall_time);
+
+	return (cputime64_t)jiffies_to_usecs(idle_time);
+}
+
+static inline cputime64_t sw64_get_idle_time(cputime64_t *wall)
+{
+	unsigned int cpu;
+	u64 idle_time = 0;
+
+	for_each_online_cpu(cpu) {
+		idle_time += get_cpu_idle_time_us(cpu, wall);
+		if (idle_time == -1ULL)
+			return get_idle_time_jiffy(wall);
+	}
+
+	return idle_time;
+}
+
+static cputime64_t get_min_busy_time(cputime64_t arr[], int size)
+{
+	int loop, min_idx;
+	cputime64_t min_time = arr[0];
+
+	for (loop = 1; loop < size; loop++) {
+		if (arr[loop] > 0) {
+			if (arr[loop] < min_time) {
+				min_time = arr[loop];
+				min_idx = loop;
+			}
+		}
+	}
+	return min_idx;
+}
+
+static int find_min_busy_cpu(void)
+{
+	int nr_all_cpus = num_possible_cpus();
+	unsigned int cpus, target_cpu;
+	cputime64_t busy_time;
+	cputime64_t b_time[nr_all_cpus];
+
+	memset(b_time, 0, sizeof(b_time));
+	for_each_online_cpu(cpus) {
+		busy_time = calc_busy_time(cpus);
+		b_time[cpus] = busy_time;
+	}
+	target_cpu = get_min_busy_time(b_time, nr_all_cpus);
+	pr_info("The target_cpu is %d, the cpu_num is %d\n",
+			target_cpu, num_online_cpus() - 1);
+	return target_cpu;
+}
+
+static void increase_cores(int cur_cpus)
+{
+	if (cur_cpus == ap_info.maxcpus)
+		return;
+
+	cur_cpus = cpumask_next_zero(0, cpu_online_mask);
+
+	struct device *dev = get_cpu_device(cur_cpus);
+
+	per_cpu(cpu_adjusting, dev->id) = 1;
+	lock_device_hotplug();
+	cpu_device_up(dev);
+	pr_info("The target_cpu is %d, After cpu_up, the cpu_num is %d\n",
+			dev->id, num_online_cpus());
+	get_cpu_device(dev->id)->offline = false;
+	unlock_device_hotplug();
+	per_cpu(cpu_adjusting, dev->id) = 0;
+}
+
+static void decrease_cores(int cur_cpus)
+{
+	if (cur_cpus == ap_info.mincpus)
+		return;
+
+	cur_cpus = find_min_busy_cpu();
+
+	struct device *dev = get_cpu_device(cur_cpus);
+
+	if (dev->id > 0) {
+		per_cpu(cpu_adjusting, dev->id) = -1;
+		lock_device_hotplug();
+		cpu_device_down(dev);
+		get_cpu_device(dev->id)->offline = true;
+		unlock_device_hotplug();
+		per_cpu(cpu_adjusting, dev->id) = 0;
+	}
+}
+
+#define INC_THRESHOLD 80
+#define DEC_THRESHOLD 40
+
+static void do_autoplug_timer(struct work_struct *work)
+{
+	cputime64_t cur_wall_time = 0, cur_idle_time;
+	unsigned long idle_time, wall_time;
+	int delay, load;
+	int nr_cur_cpus = num_online_cpus();
+	int nr_all_cpus = num_possible_cpus();
+	int inc_req = 1, dec_req = 2;
+
+	ap_info.maxcpus =
+		setup_max_cpus > nr_cpu_ids ? nr_cpu_ids : setup_max_cpus;
+	ap_info.mincpus = ap_info.maxcpus / 4;
+
+	if (strcmp(curruent_policy, "performance") == 0) {
+		ap_info.mincpus = ap_info.maxcpus;
+	} else if (strcmp(curruent_policy, "powersave") == 0) {
+		ap_info.maxcpus = ap_info.mincpus;
+	} else if (strcmp(curruent_policy, "ondemand") == 0) {
+		ap_info.sampling_rate = 500;
+		inc_req = 0;
+		dec_req = 2;
+	} else if (strcmp(curruent_policy, "conservative") == 0) {
+		inc_req = 1;
+		dec_req = 3;
+		ap_info.sampling_rate = 1000;  /* 1s */
+	}
+
+	BUG_ON(smp_processor_id() != 0);
+	delay = msecs_to_jiffies(ap_info.sampling_rate);
+	if (!autoplug_enabled || system_state != SYSTEM_RUNNING)
+		goto out;
+
+	autoplug_adjusting = 1;
+
+	if (nr_cur_cpus > ap_info.maxcpus) {
+		decrease_cores(nr_cur_cpus);
+		autoplug_adjusting = 0;
+		goto out;
+	}
+	if (nr_cur_cpus < ap_info.mincpus) {
+		increase_cores(nr_cur_cpus);
+		autoplug_adjusting = 0;
+		goto out;
+	}
+
+	cur_idle_time = sw64_get_idle_time(&cur_wall_time);
+	if (cur_wall_time == 0)
+		cur_wall_time = jiffies64_to_cputime64(get_jiffies_64());
+
+	wall_time = (unsigned int)(cur_wall_time - ap_info.prev_wall);
+	ap_info.prev_wall = cur_wall_time;
+
+	idle_time = (unsigned int)(cur_idle_time - ap_info.prev_idle);
+	idle_time += wall_time * (nr_all_cpus - nr_cur_cpus);
+	ap_info.prev_wall = cur_idle_time;
+
+	if (unlikely(!wall_time || wall_time * nr_all_cpus < idle_time)) {
+		autoplug_adjusting = 0;
+		goto out;
+	}
+
+	load = 100 * (wall_time * nr_all_cpus - idle_time) / wall_time;
+
+	if (load < (nr_cur_cpus - 1) * 100 - DEC_THRESHOLD) {
+		ap_info.inc_reqs = 0;
+		if (ap_info.dec_reqs < dec_req)
+			ap_info.dec_reqs++;
+		else {
+			ap_info.dec_reqs = 0;
+			decrease_cores(nr_cur_cpus);
+		}
+	} else {
+		ap_info.dec_reqs = 0;
+		if (load > (nr_cur_cpus - 1) * 100 + INC_THRESHOLD) {
+			if (ap_info.inc_reqs < inc_req)
+				ap_info.inc_reqs++;
+			else {
+				ap_info.inc_reqs = 0;
+				increase_cores(nr_cur_cpus);
+			}
+		}
+	}
+
+	autoplug_adjusting = 0;
+out:
+	schedule_delayed_work_on(0, &ap_info.work, delay);
+}
+
+static struct platform_device_id platform_device_ids[] = {
+	{
+		.name = "sw64_cpuautoplug",
+	},
+	{}
+};
+
+MODULE_DEVICE_TABLE(platform, platform_device_ids);
+
+static struct platform_driver platform_driver = {
+	.driver = {
+		.name = "sw64_cpuautoplug",
+		.owner = THIS_MODULE,
+	},
+	.id_table = platform_device_ids,
+};
+
+static int __init cpuautoplug_init(void)
+{
+	int i, ret, delay;
+
+	ret = sysfs_create_group(&cpu_subsys.dev_root->kobj,
+					&cpuclass_attr_group);
+	if (ret)
+		return ret;
+
+	ret = platform_driver_register(&platform_driver);
+	if (ret)
+		return ret;
+
+	pr_info("cpuautoplug: SW64 CPU autoplug driver.\n");
+
+	ap_info.maxcpus =
+		setup_max_cpus > nr_cpu_ids ? nr_cpu_ids : setup_max_cpus;
+	ap_info.mincpus = 16;
+	ap_info.dec_reqs = 0;
+	ap_info.inc_reqs = 0;
+	ap_info.sampling_rate = 720;  /* 720ms */
+	if (setup_max_cpus == 0) {    /* boot with npsmp */
+		ap_info.maxcpus = 1;
+		autoplug_enabled = 0;
+	}
+	if (setup_max_cpus > num_possible_cpus())
+		ap_info.maxcpus = num_possible_cpus();
+
+	pr_info("mincpu = %d, maxcpu = %d, autoplug_enabled = %d, rate = %d\n",
+			ap_info.mincpus, ap_info.maxcpus, autoplug_enabled,
+			ap_info.sampling_rate);
+
+	for_each_possible_cpu(i)
+		per_cpu(cpu_adjusting, i) = 0;
+#ifndef MODULE
+	delay = msecs_to_jiffies(ap_info.sampling_rate * 24);
+#else
+	delay = msecs_to_jiffies(ap_info.sampling_rate * 8);
+#endif
+	INIT_DEFERRABLE_WORK(&ap_info.work, do_autoplug_timer);
+	schedule_delayed_work_on(0, &ap_info.work, delay);
+
+	if (!autoplug_enabled)
+		cancel_delayed_work_sync(&ap_info.work);
+
+	return ret;
+}
+
+static void __exit cpuautoplug_exit(void)
+{
+	cancel_delayed_work_sync(&ap_info.work);
+	platform_driver_unregister(&platform_driver);
+	sysfs_remove_group(&cpu_subsys.dev_root->kobj, &cpuclass_attr_group);
+}
+
+late_initcall(cpuautoplug_init);
+module_exit(cpuautoplug_exit);
+
+MODULE_DESCRIPTION("cpuautoplug driver for SW64");
diff --git a/drivers/cpufreq/sw64_cpufreq.c b/drivers/cpufreq/sw64_cpufreq.c
index ba180e389c9e..5f49b5175d34 100644
--- a/drivers/cpufreq/sw64_cpufreq.c
+++ b/drivers/cpufreq/sw64_cpufreq.c
@@ -72,6 +72,8 @@ static int sw64_cpufreq_target(struct cpufreq_policy *policy,
 
 	freq = (get_cpu_freq() / 1000) * index / 48;
 
+	sw64_store_policy(policy);
+
 	/* setting the cpu frequency */
 	sw64_set_rate(-1, freq * 1000);
 
-- 
2.20.1