hulk inclusion category: feature bugzilla: https://atomgit.com/openeuler/kernel/issues/8489 -------------------------------- Add sysctl interface to control GHES triggered soft-offline page handling: - BIT0: Enable/disable driver notification (for BMC reporting, default 0) - BIT1: Enable/disable soft-offline for base pages (default 1) - BIT2: Enable/disable soft-offline for HugeTLB pages (default 0) Only BIT0 changes trigger notifier chain for driver notification. BIT1-2 control whether to perform soft-offline without notifications. Default policy (0x2) performs soft-offline on base pages only. Signed-off-by: Qi Xi <xiqi2@huawei.com> --- drivers/acpi/apei/ghes.c | 77 ++++++++++++++++++++++++++++++++++++++++ include/linux/mm.h | 6 ++++ kernel/sysctl.c | 12 +++++++ 3 files changed, 95 insertions(+) diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 73a04c0b19eb..5ab67e8898d0 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -46,6 +46,7 @@ #include <linux/sched/clock.h> #include <linux/uuid.h> #include <linux/ras.h> +#include <linux/mm.h> #include <acpi/actbl1.h> #include <acpi/ghes.h> @@ -400,6 +401,79 @@ static void ghes_clear_estatus(struct ghes *ghes) ghes->flags &= ~GHES_TO_CLEAR; } +#ifdef CONFIG_ACPI_APEI_MEMORY_FAILURE +#define APEI_PAGE_OFFLINE_NOTIFY BIT(0) +#define APEI_PAGE_OFFLINE_ALLOW_BASE_PAGE BIT(1) +#define APEI_PAGE_OFFLINE_ALLOW_HUGETLB BIT(2) + +int sysctl_apei_page_offline_policy __read_mostly = + APEI_PAGE_OFFLINE_ALLOW_BASE_PAGE; +EXPORT_SYMBOL(sysctl_apei_page_offline_policy); + +static ATOMIC_NOTIFIER_HEAD(apei_page_offline_notifier_chain); + +int register_apei_page_offline_notifier(struct notifier_block *nb) +{ + return atomic_notifier_chain_register( + &apei_page_offline_notifier_chain, nb); +} +EXPORT_SYMBOL(register_apei_page_offline_notifier); + +int unregister_apei_page_offline_notifier(struct notifier_block *nb) +{ + return atomic_notifier_chain_unregister( + &apei_page_offline_notifier_chain, nb); +} +EXPORT_SYMBOL(unregister_apei_page_offline_notifier); + +int apei_page_offline_policy_handler(struct ctl_table *table, + int write, void __user *buffer, + size_t *lenp, loff_t *ppos) +{ + int old_val, new_val; + int ret; + + old_val = sysctl_apei_page_offline_policy & APEI_PAGE_OFFLINE_NOTIFY; + + ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); + new_val = sysctl_apei_page_offline_policy & APEI_PAGE_OFFLINE_NOTIFY; + + if (write && ret == 0 && old_val != new_val) { + atomic_notifier_call_chain( + &apei_page_offline_notifier_chain, 0, + &sysctl_apei_page_offline_policy); + } + + return ret; +} + +static bool apei_page_should_offline(unsigned long pfn) +{ + struct page *page; + + page = pfn_to_online_page(pfn); + if (!page) + return false; + + if (!(sysctl_apei_page_offline_policy & APEI_PAGE_OFFLINE_ALLOW_BASE_PAGE)) { + if (!PageHuge(page)) { + pr_info_once("disabled for normal pages by /proc/sys/vm/apei_page_offline_policy\n"); + return false; + } + } + + if (!(sysctl_apei_page_offline_policy & APEI_PAGE_OFFLINE_ALLOW_HUGETLB)) { + if (PageHuge(page)) { + pr_info_once("disabled for HugeTLB pages by /proc/sys/vm/apei_page_offline_policy\n"); + return false; + } + } + + return true; +} + +#endif + static void ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, int sev) { #ifdef CONFIG_ACPI_APEI_MEMORY_FAILURE @@ -426,6 +500,9 @@ static void ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, int if (sev == GHES_SEV_RECOVERABLE && sec_sev == GHES_SEV_RECOVERABLE) flags = 0; + if (flags == MF_SOFT_OFFLINE && !apei_page_should_offline(pfn)) + return; + if (flags != -1) memory_failure_queue(pfn, flags); #endif diff --git a/include/linux/mm.h b/include/linux/mm.h index 0274a82144e4..2a91d2dd2452 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2874,6 +2874,12 @@ extern int get_hwpoison_page(struct page *page); extern int sysctl_memory_failure_early_kill; extern int sysctl_memory_failure_recovery; extern int sysctl_enable_soft_offline; +#ifdef CONFIG_ACPI_APEI_MEMORY_FAILURE +extern int sysctl_apei_page_offline_policy; +extern int apei_page_offline_policy_handler(struct ctl_table *table, + int write, void __user *buffer, + size_t *lenp, loff_t *ppos); +#endif extern void shake_page(struct page *p, int access); extern atomic_long_t num_poisoned_pages __read_mostly; extern int soft_offline_page(struct page *page, int flags); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index f35a1990456e..4186b6a795e3 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -130,6 +130,7 @@ static int __maybe_unused two = 2; static int __maybe_unused three = 3; static int __maybe_unused four = 4; static int __maybe_unused five = 5; +static int __maybe_unused seven = 7; static int __maybe_unused uce_kernel_recovery_max = 31; static int int_max = INT_MAX; static unsigned long zero_ul; @@ -1833,6 +1834,17 @@ static struct ctl_table vm_table[] = { .extra1 = &zero, .extra2 = &one, }, +#endif +#ifdef CONFIG_ACPI_APEI_MEMORY_FAILURE + { + .procname = "apei_page_offline_policy", + .data = &sysctl_apei_page_offline_policy, + .maxlen = sizeof(sysctl_apei_page_offline_policy), + .mode = 0644, + .proc_handler = apei_page_offline_policy_handler, + .extra1 = &zero, + .extra2 = &seven, + }, #endif { .procname = "user_reserve_kbytes", -- 2.33.0