[PATCH openEuler-5.10 19/40] livepatch: Use breakpoint exception to optimize enabling livepatch

6 Jul 2022

From: Li Huafei <lihuafei1@huawei.com>

hulk inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I5CJ7X

--------------------------------

The commit 86e35fae15bb ("livepatch: checks only if the replaced
instruction is on the stack") optimizes stack checking. However, for
extremely hot functions, the replaced instruction may still be on the
stack, and there is room for further optimization.

By inserting a breakpoint exception instruction at the entry of the
patched old function, we can divert calls from the old function to the
new function. In this way, during stack check, only tasks that have
entered the old function before the breakpoint is inserted need to be
considered. This increases the probability of passing the stack check.

If the stack check fails, we sleep for a period of time and try again,
giving the task entering the old function a chance to run out of the
instruction replacement area.

We first enable the patch using the normal process, that is, do not
insert breakpoints. If the first enable fails and the force flag
KLP_STACK_OPTIMIZE is set for all functions of the patch, then we use
breakpoint exception optimization.

Signed-off-by: Li Huafei <lihuafei1@huawei.com>
Reviewed-by: Xu Kuohai <xukuohai@huawei.com>
Signed-off-by: Zheng Zengkai <zhengzengkai@huawei.com>
---
 arch/arm/kernel/livepatch.c        |   2 +-
 arch/arm64/kernel/livepatch.c      |   2 +-
 arch/powerpc/kernel/livepatch_32.c |   2 +-
 arch/powerpc/kernel/livepatch_64.c |   2 +-
 arch/x86/kernel/livepatch.c        |   2 +-
 include/linux/livepatch.h          |  14 +-
 kernel/livepatch/core.c            | 275 +++++++++++++++++++++++++++--
 kernel/livepatch/core.h            |  14 ++
 kernel/livepatch/patch.c           |  26 ++-
 kernel/livepatch/patch.h           |   4 +
 10 files changed, 322 insertions(+), 21 deletions(-)

diff --git a/arch/arm/kernel/livepatch.c b/arch/arm/kernel/livepatch.c
index 3162de4aec70..338222846b81 100644
--- a/arch/arm/kernel/livepatch.c
+++ b/arch/arm/kernel/livepatch.c
@@ -137,7 +137,7 @@ static int klp_check_activeness_func(struct klp_patch *patch, int enable,
 	for (obj = patch->objs; obj->funcs; obj++) {
 		for (func = obj->funcs; func->old_name; func++) {
 			if (enable) {
-				if (func->force == KLP_ENFORCEMENT)
+				if (func->patched || func->force == KLP_ENFORCEMENT)
 					continue;
 				/*
 				 * When enable, checking the currently
diff --git a/arch/arm64/kernel/livepatch.c b/arch/arm64/kernel/livepatch.c
index d629ad409721..3b1b4db58d52 100644
--- a/arch/arm64/kernel/livepatch.c
+++ b/arch/arm64/kernel/livepatch.c
@@ -131,7 +131,7 @@ static int klp_check_activeness_func(struct klp_patch *patch, int enable,
 	for (obj = patch->objs; obj->funcs; obj++) {
 		for (func = obj->funcs; func->old_name; func++) {
 			if (enable) {
-				if (func->force == KLP_ENFORCEMENT)
+				if (func->patched || func->force == KLP_ENFORCEMENT)
 					continue;
 				/*
 				 * When enable, checking the currently
diff --git a/arch/powerpc/kernel/livepatch_32.c b/arch/powerpc/kernel/livepatch_32.c
index 4ce4bd07eaaf..8478d496a991 100644
--- a/arch/powerpc/kernel/livepatch_32.c
+++ b/arch/powerpc/kernel/livepatch_32.c
@@ -134,7 +134,7 @@ static int klp_check_activeness_func(struct klp_patch *patch, int enable,
 	for (obj = patch->objs; obj->funcs; obj++) {
 		for (func = obj->funcs; func->old_name; func++) {
 			if (enable) {
-				if (func->force == KLP_ENFORCEMENT)
+				if (func->patched || func->force == KLP_ENFORCEMENT)
 					continue;
 				/*
 				 * When enable, checking the currently
diff --git a/arch/powerpc/kernel/livepatch_64.c b/arch/powerpc/kernel/livepatch_64.c
index acc6d94a5c91..b313917242ee 100644
--- a/arch/powerpc/kernel/livepatch_64.c
+++ b/arch/powerpc/kernel/livepatch_64.c
@@ -143,7 +143,7 @@ static int klp_check_activeness_func(struct klp_patch *patch, int enable,
 
 			/* Check func address in stack */
 			if (enable) {
-				if (func->force == KLP_ENFORCEMENT)
+				if (func->patched || func->force == KLP_ENFORCEMENT)
 					continue;
 				/*
 				 * When enable, checking the currently
diff --git a/arch/x86/kernel/livepatch.c b/arch/x86/kernel/livepatch.c
index 824b538d2861..5763876457d1 100644
--- a/arch/x86/kernel/livepatch.c
+++ b/arch/x86/kernel/livepatch.c
@@ -126,7 +126,7 @@ static int klp_check_activeness_func(struct klp_patch *patch, int enable,
 
 			/* Check func address in stack */
 			if (enable) {
-				if (func->force == KLP_ENFORCEMENT)
+				if (func->patched || func->force == KLP_ENFORCEMENT)
 					continue;
 				/*
 				 * When enable, checking the currently
diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h
index c12781f7397b..602e944dfc9e 100644
--- a/include/linux/livepatch.h
+++ b/include/linux/livepatch.h
@@ -229,19 +229,29 @@ struct klp_func_node {
 	struct list_head func_stack;
 	void *old_func;
 	struct arch_klp_data arch_data;
+	/*
+	 * Used in breakpoint exception handling functions.
+	 * If 'brk_func' is NULL, no breakpoint is inserted into the entry of
+	 * the old function.
+	 * If it is not NULL, the value is the new function that will jump to
+	 * when the breakpoint exception is triggered.
+	 */
+	void *brk_func;
 };
 
 struct klp_func_node *klp_find_func_node(const void *old_func);
 void klp_add_func_node(struct klp_func_node *func_node);
 void klp_del_func_node(struct klp_func_node *func_node);
+void *klp_get_brk_func(void *addr);
 
 static inline
 int klp_compare_address(unsigned long pc, unsigned long func_addr,
 			const char *func_name, unsigned long check_size)
 {
 	if (pc >= func_addr && pc < func_addr + check_size) {
-		pr_err("func %s is in use!\n", func_name);
-		return -EBUSY;
+		pr_warn("func %s is in use!\n", func_name);
+		/* Return -EAGAIN for next retry */
+		return -EAGAIN;
 	}
 	return 0;
 }
diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c
index 47d8661ee5e4..a682a8638e01 100644
--- a/kernel/livepatch/core.c
+++ b/kernel/livepatch/core.c
@@ -31,6 +31,7 @@
 #include "state.h"
 #include "transition.h"
 #elif defined(CONFIG_LIVEPATCH_STOP_MACHINE_CONSISTENCY)
+#include <linux/delay.h>
 #include <linux/stop_machine.h>
 #endif
 
@@ -57,6 +58,7 @@ static struct kobject *klp_root_kobj;
 struct patch_data {
 	struct klp_patch        *patch;
 	atomic_t                cpu_count;
+	bool			rollback;
 };
 #endif
 
@@ -1300,6 +1302,37 @@ void klp_del_func_node(struct klp_func_node *func_node)
 	list_del_rcu(&func_node->node);
 }
 
+/*
+ * Called from the breakpoint exception handler function.
+ */
+void *klp_get_brk_func(void *addr)
+{
+	struct klp_func_node *func_node;
+	void *brk_func = NULL;
+
+	if (!addr)
+		return NULL;
+
+	rcu_read_lock();
+
+	func_node = klp_find_func_node(addr);
+	if (!func_node)
+		goto unlock;
+
+	/*
+	 * Corresponds to smp_wmb() in {add, remove}_breakpoint(). If the
+	 * current breakpoint exception belongs to us, we have observed the
+	 * breakpoint instruction, so brk_func must be observed.
+	 */
+	smp_rmb();
+
+	brk_func = func_node->brk_func;
+
+unlock:
+	rcu_read_unlock();
+	return brk_func;
+}
+
 /*
  * This function is called from stop_machine() context.
  */
@@ -1370,6 +1403,25 @@ long __weak arch_klp_save_old_code(struct arch_klp_data *arch_data, void *old_fu
 	return -ENOSYS;
 }
 
+int __weak arch_klp_check_breakpoint(struct arch_klp_data *arch_data, void *old_func)
+{
+	return 0;
+}
+
+int __weak arch_klp_add_breakpoint(struct arch_klp_data *arch_data, void *old_func)
+{
+	return -ENOTSUPP;
+}
+
+void __weak arch_klp_remove_breakpoint(struct arch_klp_data *arch_data, void *old_func)
+{
+}
+
+void __weak arch_klp_set_brk_func(struct klp_func_node *func_node, void *new_func)
+{
+	func_node->brk_func = new_func;
+}
+
 static struct klp_func_node *func_node_alloc(struct klp_func *func)
 {
 	long ret;
@@ -1444,6 +1496,110 @@ static int klp_mem_prepare(struct klp_patch *patch)
 	return 0;
 }
 
+static void remove_breakpoint(struct klp_func *func, bool restore)
+{
+
+	struct klp_func_node *func_node = klp_find_func_node(func->old_func);
+	struct arch_klp_data *arch_data = &func_node->arch_data;
+
+	if (!func_node->brk_func)
+		return;
+
+	if (restore)
+		arch_klp_remove_breakpoint(arch_data, func->old_func);
+
+	/* Wait for all breakpoint exception handler functions to exit. */
+	synchronize_rcu();
+
+	/* 'brk_func' cannot be set to NULL before the breakpoint is removed. */
+	smp_wmb();
+
+	arch_klp_set_brk_func(func_node, NULL);
+}
+
+static void __klp_breakpoint_post_process(struct klp_patch *patch, bool restore)
+{
+	struct klp_object *obj;
+	struct klp_func *func;
+
+	klp_for_each_object(patch, obj) {
+		klp_for_each_func(obj, func) {
+			remove_breakpoint(func, restore);
+		}
+	}
+}
+
+static int add_breakpoint(struct klp_func *func)
+{
+	struct klp_func_node *func_node = klp_find_func_node(func->old_func);
+	struct arch_klp_data *arch_data = &func_node->arch_data;
+	int ret;
+
+	if (WARN_ON_ONCE(func_node->brk_func))
+		return -EINVAL;
+
+	ret = arch_klp_check_breakpoint(arch_data, func->old_func);
+	if (ret)
+		return ret;
+
+	arch_klp_set_brk_func(func_node, func->new_func);
+
+	/*
+	 * When entering an exception, we must see 'brk_func' or the kernel
+	 * will not be able to handle the breakpoint exception we are about
+	 * to insert.
+	 */
+	smp_wmb();
+
+	ret = arch_klp_add_breakpoint(arch_data, func->old_func);
+	if (ret)
+		arch_klp_set_brk_func(func_node, NULL);
+
+	return ret;
+}
+
+static int klp_add_breakpoint(struct klp_patch *patch)
+{
+	struct klp_object *obj;
+	struct klp_func *func;
+	int ret;
+
+	/*
+	 * Ensure that the module is not uninstalled before the breakpoint is
+	 * removed. After the breakpoint is removed, it can be ensured that the
+	 * new function will not be jumped through the handler function of the
+	 * breakpoint.
+	 */
+	if (!try_module_get(patch->mod))
+		return -ENODEV;
+
+	arch_klp_code_modify_prepare();
+
+	klp_for_each_object(patch, obj) {
+		klp_for_each_func(obj, func) {
+			ret = add_breakpoint(func);
+			if (ret) {
+				__klp_breakpoint_post_process(patch, true);
+				arch_klp_code_modify_post_process();
+				module_put(patch->mod);
+				return ret;
+			}
+		}
+	}
+
+	arch_klp_code_modify_post_process();
+
+	return 0;
+}
+
+static void klp_breakpoint_post_process(struct klp_patch *patch, bool restore)
+{
+	arch_klp_code_modify_prepare();
+	__klp_breakpoint_post_process(patch, restore);
+	arch_klp_code_modify_post_process();
+	module_put(patch->mod);
+}
+
 static int __klp_disable_patch(struct klp_patch *patch)
 {
 	int ret;
@@ -1614,7 +1770,7 @@ EXPORT_SYMBOL_GPL(klp_enable_patch);
 /*
  * This function is called from stop_machine() context.
  */
-static int enable_patch(struct klp_patch *patch)
+static int enable_patch(struct klp_patch *patch, bool rollback)
 {
 	struct klp_object *obj;
 	int ret;
@@ -1622,19 +1778,21 @@ static int enable_patch(struct klp_patch *patch)
 	pr_notice_once("tainting kernel with TAINT_LIVEPATCH\n");
 	add_taint(TAINT_LIVEPATCH, LOCKDEP_STILL_OK);
 
-	if (!try_module_get(patch->mod))
-		return -ENODEV;
+	if (!patch->enabled) {
+		if (!try_module_get(patch->mod))
+			return -ENODEV;
 
-	patch->enabled = true;
+		patch->enabled = true;
 
-	pr_notice("enabling patch '%s'\n", patch->mod->name);
+		pr_notice("enabling patch '%s'\n", patch->mod->name);
+	}
 
 	klp_for_each_object(patch, obj) {
 		if (!klp_is_object_loaded(obj))
 			continue;
 
-		ret = klp_patch_object(obj);
-		if (ret) {
+		ret = klp_patch_object(obj, rollback);
+		if (ret && klp_need_rollback(ret, rollback)) {
 			pr_warn("failed to patch object '%s'\n",
 				klp_is_module(obj) ? obj->name : "vmlinux");
 			goto disable;
@@ -1666,7 +1824,7 @@ int klp_try_enable_patch(void *data)
 			atomic_inc(&pd->cpu_count);
 			return ret;
 		}
-		ret = enable_patch(patch);
+		ret = enable_patch(patch, pd->rollback);
 		if (ret) {
 			atomic_inc(&pd->cpu_count);
 			return ret;
@@ -1682,12 +1840,89 @@ int klp_try_enable_patch(void *data)
 	return ret;
 }
 
+/*
+ * When the stop_machine is used to enable the patch, if the patch fails to be
+ * enabled because the stack check fails, a certain number of retries are
+ * allowed. The maximum number of retries is KLP_RETRY_COUNT.
+ *
+ * Sleeps for KLP_RETRY_INTERVAL milliseconds before each retry to give tasks
+ * that fail the stack check a chance to run out of the instruction replacement
+ * area.
+ */
+#define KLP_RETRY_COUNT 5
+#define KLP_RETRY_INTERVAL 100
+
+static bool klp_use_breakpoint(struct klp_patch *patch)
+{
+	struct klp_object *obj;
+	struct klp_func *func;
+
+	klp_for_each_object(patch, obj) {
+		klp_for_each_func(obj, func) {
+			if (func->force != KLP_STACK_OPTIMIZE)
+				return false;
+		}
+	}
+
+	return true;
+}
+
+static int klp_breakpoint_optimize(struct klp_patch *patch)
+{
+	int ret;
+	int i;
+	int cnt = 0;
+
+	ret = klp_add_breakpoint(patch);
+	if (ret) {
+		pr_err("failed to add breakpoints, ret=%d\n", ret);
+		return ret;
+	}
+
+	for (i = 0; i < KLP_RETRY_COUNT; i++) {
+		struct patch_data patch_data = {
+			.patch = patch,
+			.cpu_count = ATOMIC_INIT(0),
+			.rollback = false,
+		};
+
+		if (i == KLP_RETRY_COUNT - 1)
+			patch_data.rollback = true;
+
+		cnt++;
+
+		arch_klp_code_modify_prepare();
+		ret = stop_machine(klp_try_enable_patch, &patch_data,
+				   cpu_online_mask);
+		arch_klp_code_modify_post_process();
+		if (!ret || ret != -EAGAIN)
+			break;
+
+		pr_notice("try again in %d ms.\n", KLP_RETRY_INTERVAL);
+
+		msleep(KLP_RETRY_INTERVAL);
+	}
+	pr_notice("patching %s, tried %d times, ret=%d.\n",
+		  ret ? "failed" : "success", cnt, ret);
+
+	/*
+	 * If the patch is enabled successfully, the breakpoint instruction
+	 * has been replaced with the jump instruction.  However, if the patch
+	 * fails to be enabled, we need to delete the previously inserted
+	 * breakpoint to restore the instruction at the old function entry.
+	 */
+	klp_breakpoint_post_process(patch, !!ret);
+
+	return ret;
+}
+
 static int __klp_enable_patch(struct klp_patch *patch)
 {
 	int ret;
 	struct patch_data patch_data = {
 		.patch = patch,
 		.cpu_count = ATOMIC_INIT(0),
+		.rollback = true,
 	};
 
 	if (WARN_ON(patch->enabled))
@@ -1705,14 +1940,26 @@ static int __klp_enable_patch(struct klp_patch *patch)
 	ret = klp_mem_prepare(patch);
 	if (ret)
 		return ret;
+
 	arch_klp_code_modify_prepare();
-	ret = stop_machine(klp_try_enable_patch, &patch_data, cpu_online_mask);
+	ret = stop_machine(klp_try_enable_patch, &patch_data,
+			   cpu_online_mask);
 	arch_klp_code_modify_post_process();
-	if (ret) {
-		klp_mem_recycle(patch);
-		return ret;
+	if (!ret)
+		goto move_patch_to_tail;
+	if (ret != -EAGAIN)
+		goto err_out;
+
+	if (!klp_use_breakpoint(patch)) {
+		pr_debug("breakpoint exception optimization is not used.\n");
+		goto err_out;
 	}
 
+	ret = klp_breakpoint_optimize(patch);
+	if (ret)
+		goto err_out;
+
+move_patch_to_tail:
 #ifndef CONFIG_LIVEPATCH_STACK
 	/* move the enabled patch to the list tail */
 	list_del(&patch->list);
@@ -1720,6 +1967,10 @@ static int __klp_enable_patch(struct klp_patch *patch)
 #endif
 
 	return 0;
+
+err_out:
+	klp_mem_recycle(patch);
+	return ret;
 }
 
 /**
diff --git a/kernel/livepatch/core.h b/kernel/livepatch/core.h
index 9bcd139eb7d6..911b6452e5be 100644
--- a/kernel/livepatch/core.h
+++ b/kernel/livepatch/core.h
@@ -57,4 +57,18 @@ static inline void klp_post_unpatch_callback(struct klp_object *obj)
 	obj->callbacks.post_unpatch_enabled = false;
 }
 #endif /* CONFIG_LIVEPATCH_PER_TASK_CONSISTENCY */
+
+#ifdef CONFIG_LIVEPATCH_STOP_MACHINE_CONSISTENCY
+/*
+ * In the enable_patch() process, we do not need to roll back the patch
+ * immediately if the patch fails to enabled. In this way, the function that has
+ * been successfully patched does not need to be enabled repeatedly during
+ * retry. However, if it is the last retry (rollback == true) or not because of
+ * stack check failure (patch_err != -EAGAIN), rollback is required immediately.
+ */
+static inline bool klp_need_rollback(int patch_err, bool rollback)
+{
+	return patch_err != -EAGAIN || rollback;
+}
+#endif /* CONFIG_LIVEPATCH_STOP_MACHINE_CONSISTENCY */
 #endif /* _LIVEPATCH_CORE_H */
diff --git a/kernel/livepatch/patch.c b/kernel/livepatch/patch.c
index 6515b8e99829..bea6c5d0af94 100644
--- a/kernel/livepatch/patch.c
+++ b/kernel/livepatch/patch.c
@@ -269,10 +269,10 @@ static inline int klp_patch_func(struct klp_func *func)
 {
 	int ret = 0;
 
+	if (func->patched)
+		return 0;
 	if (WARN_ON(!func->old_func))
 		return -EINVAL;
-	if (WARN_ON(func->patched))
-		return -EINVAL;
 	if (WARN_ON(!func->func_node))
 		return -EINVAL;
 
@@ -306,6 +306,27 @@ void klp_unpatch_object(struct klp_object *obj)
 	__klp_unpatch_object(obj, false);
 }
 
+#ifdef CONFIG_LIVEPATCH_STOP_MACHINE_CONSISTENCY
+int klp_patch_object(struct klp_object *obj, bool rollback)
+{
+	struct klp_func *func;
+	int ret;
+
+	if (obj->patched)
+		return 0;
+
+	klp_for_each_func(obj, func) {
+		ret = klp_patch_func(func);
+		if (ret && klp_need_rollback(ret, rollback)) {
+			klp_unpatch_object(obj);
+			return ret;
+		}
+	}
+	obj->patched = true;
+
+	return 0;
+}
+#else
 int klp_patch_object(struct klp_object *obj)
 {
 	struct klp_func *func;
@@ -325,6 +346,7 @@ int klp_patch_object(struct klp_object *obj)
 
 	return 0;
 }
+#endif
 
 static void __klp_unpatch_objects(struct klp_patch *patch, bool nops_only)
 {
diff --git a/kernel/livepatch/patch.h b/kernel/livepatch/patch.h
index c9cde47f7e97..9566681660e4 100644
--- a/kernel/livepatch/patch.h
+++ b/kernel/livepatch/patch.h
@@ -29,7 +29,11 @@ struct klp_ops {
 
 struct klp_ops *klp_find_ops(void *old_func);
 
+#ifdef CONFIG_LIVEPATCH_STOP_MACHINE_CONSISTENCY
+int klp_patch_object(struct klp_object *obj, bool rollback);
+#else
 int klp_patch_object(struct klp_object *obj);
+#endif
 void klp_unpatch_object(struct klp_object *obj);
 void klp_unpatch_objects(struct klp_patch *patch);
 void klp_unpatch_objects_dynamic(struct klp_patch *patch);
-- 
2.20.1