[PATCH OLK-5.10 17/33] livepatch/core: No stop machine in KLP_STACK_OPTIMIZE mode

21 May 2024

hulk inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I9R2TB
--------------------------------
After an old function has been placed a 'brk' instruction, every call of
that function will jump to new function in exception handler. So we can
check calltrace of every task to make sure no one is running in the old
function without using stop_machine, then we can directly replace the
old function with instructions that will jump to new function.
Signed-off-by: Zheng Yejian zhengyejian1@huawei.com
---
 include/linux/livepatch.h |   4 +
 kernel/livepatch/Kconfig  |  11 +++
 kernel/livepatch/core.c   | 202 ++++++++++++++++++++++++++++++++++++--
 3 files changed, 208 insertions(+), 9 deletions(-)

diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h
index b4cf90c03d29..7146989b5fbc 100644
--- a/include/linux/livepatch.h
+++ b/include/linux/livepatch.h
@@ -321,7 +321,11 @@ static inline int klp_module_coming(struct module *mod) { return 0; }
 static inline void klp_module_going(struct module *mod) {}
 static inline bool klp_patch_pending(struct task_struct *task) { return false; }
 static inline void klp_update_patch_state(struct task_struct *task) {}
+#ifdef CONFIG_LIVEPATCH_BREAKPOINT_NO_STOP_MACHINE
+void klp_copy_process(struct task_struct *child);
+#else
 static inline void klp_copy_process(struct task_struct *child) {}
+#endif
 static inline bool klp_have_reliable_stack(void) { return true; }
#ifndef klp_smp_isb
diff --git a/kernel/livepatch/Kconfig b/kernel/livepatch/Kconfig
index 297ca41c695e..a59cbb6506cb 100644
--- a/kernel/livepatch/Kconfig
+++ b/kernel/livepatch/Kconfig
@@ -98,5 +98,16 @@ config LIVEPATCH_RESTRICT_KPROBE
      We should not patch for the functions where registered with kprobe,
      and vice versa.
      Say Y here if you want to check those.
+
+config LIVEPATCH_BREAKPOINT_NO_STOP_MACHINE
+	bool "No stop_machine in breakpoint optimization mode"
+	depends on LIVEPATCH_WO_FTRACE
+	default n
+	help
+	  In breakpoint optimization mode, check tasks calltrace
+	  in batches without using stop machine so that reduce the
+	  service downtime.
+	  Say N if you are unsure.
+
 endmenu
 endif
diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c
index 345b57b67fbf..3bf53800868e 100644
--- a/kernel/livepatch/core.c
+++ b/kernel/livepatch/core.c
@@ -2051,18 +2051,185 @@ static bool klp_use_breakpoint(struct klp_patch *patch)
    return true;
 }
-static int klp_breakpoint_optimize(struct klp_patch *patch)
+#ifdef CONFIG_LIVEPATCH_BREAKPOINT_NO_STOP_MACHINE
+#include <linux/sched/task.h>
+#include "../sched/sched.h"
+
+int __weak arch_klp_check_task_calltrace(struct task_struct *t,
+					 bool (*fn)(void *, int *, unsigned long),
+					 void *data)
 {
-	int ret;
+	return -EINVAL;
+}
+
+/* Called from copy_process() during fork */
+void klp_copy_process(struct task_struct *child)
+{
+	child->patch_state = current->patch_state;
+}
+
+static void set_tasks_patch_state(int patch_state)
+{
+	unsigned int cpu;
+	struct task_struct *g, *task;
+
+	read_lock(&tasklist_lock);
+	for_each_process_thread(g, task) {
+		task->patch_state = patch_state;
+	}
+	read_unlock(&tasklist_lock);
+
+	get_online_cpus();
+	for_each_possible_cpu(cpu) {
+		task = idle_task(cpu);
+		task->patch_state = patch_state;
+	}
+	put_online_cpus();
+}
+
+static void update_patch_state(struct task_struct *task, struct klp_func_list *func_list)
+{
+	struct rq *rq;
+	struct rq_flags flags;
+
+	if (task->patch_state == KLP_PATCHED)
+		return;
+	WARN_ON_ONCE(task->patch_state != KLP_UNPATCHED);
+	rq = task_rq_lock(task, &flags);
+	if (task_running(rq, task) && task != current)
+		goto done;
+	if (arch_klp_check_task_calltrace(task, check_func_list, (void *)func_list))
+		goto done;
+	task->patch_state = KLP_PATCHED;
+done:
+	task_rq_unlock(rq, task, &flags);
+}
+
+#ifdef CONFIG_SMP
+static void check_task_calltrace_ipi(void *func_list)
+{
+	if (current->patch_state == KLP_PATCHED)
+		return;
+	if (arch_klp_check_task_calltrace(current, check_func_list, func_list))
+		return;
+	current->patch_state = KLP_PATCHED;
+}
+
+static void update_patch_state_ipi(struct klp_func_list *func_list)
+{
+	unsigned int cpu;
+	unsigned int curr_cpu;
+
+	preempt_disable();
+	curr_cpu = smp_processor_id();
+	for_each_online_cpu(cpu) {
+		if (cpu == curr_cpu)
+			continue;
+		smp_call_function_single(cpu, check_task_calltrace_ipi, func_list, 1);
+	}
+	preempt_enable();
+}
+#endif
+
+static void update_tasks_patch_state(struct klp_func_list *func_list)
+{
+	unsigned int cpu;
+	struct task_struct *g, *task;
+
+	read_lock(&tasklist_lock);
+	for_each_process_thread(g, task)
+		update_patch_state(task, func_list);
+	read_unlock(&tasklist_lock);
+
+	get_online_cpus();
+	for_each_possible_cpu(cpu) {
+		task = idle_task(cpu);
+		if (cpu_online(cpu)) {
+			update_patch_state(task, func_list);
+		} else if (task->patch_state != KLP_PATCHED) {
+			/* offline idle tasks can be directly updated */
+			task->patch_state = KLP_PATCHED;
+		}
+	}
+	put_online_cpus();
+#ifdef CONFIG_SMP
+	update_patch_state_ipi(func_list);
+#endif
+}
+
+static bool is_patchable(void)
+{
+	unsigned int cpu;
+	struct task_struct *g, *task;
+	int patchable = true;
+
+	get_online_cpus();
+	for_each_possible_cpu(cpu) {
+		task = idle_task(cpu);
+		WARN_ON_ONCE(task->patch_state == KLP_UNDEFINED);
+		if (task->patch_state != KLP_PATCHED) {
+			put_online_cpus();
+			return false;
+		}
+	}
+	put_online_cpus();
+	read_lock(&tasklist_lock);
+	for_each_process_thread(g, task) {
+		WARN_ON_ONCE(task->patch_state == KLP_UNDEFINED);
+		if (task->patch_state != KLP_PATCHED) {
+			patchable = false;
+			goto out_unlock;
+		}
+	}
+out_unlock:
+	read_unlock(&tasklist_lock);
+	return patchable;
+}
+
+static int klp_breakpoint_enable_patch(struct klp_patch *patch, int *cnt)
+{
+	struct klp_func_list *func_list = NULL;
+	int ret = -EINVAL;
    int i;
-	int cnt = 0;
+	int retry_cnt = 0;
-	ret = klp_add_breakpoint(patch);
+	ret = arch_klp_check_activeness_func(patch, true, add_func_to_list, &func_list);
    if (ret) {
-		pr_err("failed to add breakpoints, ret=%d\n", ret);
-		return ret;
+		pr_err("break optimize collecting active functions failed, ret=%d\n", ret);
+		goto out;
    }
+	set_tasks_patch_state(KLP_UNPATCHED);
+
+	for (i = 0; i < KLP_RETRY_COUNT; i++) {
+		retry_cnt++;
+
+		update_tasks_patch_state(func_list);
+		if (is_patchable()) {
+			arch_klp_code_modify_prepare();
+			ret = enable_patch(patch, true);
+			arch_klp_code_modify_post_process();
+			break;
+		}
+		ret = -EAGAIN;
+		pr_notice("try again in %d ms\n", KLP_RETRY_INTERVAL);
+		msleep(KLP_RETRY_INTERVAL);
+	}
+	set_tasks_patch_state(KLP_UNDEFINED);
+out:
+	free_func_list(&func_list);
+	*cnt = retry_cnt;
+	return ret;
+}
+
+#else  /* !CONFIG_LIVEPATCH_BREAKPOINT_NO_STOP_MACHINE */
+
+static int klp_breakpoint_enable_patch(struct klp_patch *patch, int *cnt)
+{
+	int ret = -EINVAL;
+	int i;
+	int retry_cnt = 0;
+
    for (i = 0; i < KLP_RETRY_COUNT; i++) {
    	struct patch_data patch_data = {
    		.patch = patch,
@@ -2073,20 +2240,37 @@ static int klp_breakpoint_optimize(struct klp_patch *patch)
    	if (i == KLP_RETRY_COUNT - 1)
    		patch_data.rollback = true;
-		cnt++;
+		retry_cnt++;
ret = klp_stop_machine(klp_try_enable_patch, &patch_data,
    			       cpu_online_mask);
    	if (!ret || ret != -EAGAIN)
    		break;
-		pr_notice("try again in %d ms.\n", KLP_RETRY_INTERVAL);
+		pr_notice("try again in %d ms\n", KLP_RETRY_INTERVAL);
msleep(KLP_RETRY_INTERVAL);
    }
+	*cnt = retry_cnt;
+	return ret;
+}
+#endif  /* CONFIG_LIVEPATCH_BREAKPOINT_NO_STOP_MACHINE */
+
+static int klp_breakpoint_optimize(struct klp_patch *patch)
+{
+	int ret;
+	int cnt = 0;
+
+	ret = klp_add_breakpoint(patch);
+	if (ret) {
+		pr_err("failed to add breakpoints, ret=%d\n", ret);
+		return ret;
+	}
+
+	ret = klp_breakpoint_enable_patch(patch, &cnt);
+
    pr_notice("patching %s, tried %d times, ret=%d.\n",
    	  ret ? "failed" : "success", cnt, ret);
-
    /*
     * If the patch is enabled successfully, the breakpoint instruction
     * has been replaced with the jump instruction.  However, if the patch
-- 
2.25.1


    

2025

2024

2023

2022

2021

2020

2019

[PATCH OLK-5.10 17/33] livepatch/core: No stop machine in KLP_STACK_OPTIMIZE mode