From: Ma Wupeng mawupeng1@huawei.com
Page ejection is achieved by soft_offline_page and unpoison_memory. A linked list is maintained to log the pfns which are offlined by this module. Only the pfns present in the list is allowed to go online.
The follow sysfs is introduced to online/offline page via this module, since there is a internal linked list to record all offlined pages, remove_page is introduce to remove this page from list.
- /sys/kernel/page_eject/online_page - /sys/kernel/page_eject/offline_page - /sys/kernel/page_eject/remove_page
Ma Wupeng (4): mm/hwpoison: Export symbol soft_offline_page mm/memory-failure: introduce soft_online_page mm: page_eject: Introuduce page ejection config: update defconfig for PAGE_EJECT
arch/arm64/configs/openeuler_defconfig | 1 + drivers/ras/Kconfig | 10 + drivers/ras/Makefile | 1 + drivers/ras/page_eject.c | 249 +++++++++++++++++++++++++ include/linux/mm.h | 1 + mm/memory-failure.c | 40 ++-- 6 files changed, 288 insertions(+), 14 deletions(-) create mode 100644 drivers/ras/page_eject.c
From: Ma Wupeng mawupeng1@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I90COS
--------------------------------
Export symbol soft_offline_page so that other module can use this.
Function soft_offline_page is used to isolate pages. During page isolation, migrate will be issued if this current page is mapped by user task and the page will be removed from buddy system finally.
Signed-off-by: Ma Wupeng mawupeng1@huawei.com --- mm/memory-failure.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 0b59cf8c544a8..367e303ba565e 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -2801,3 +2801,4 @@ int soft_offline_page(unsigned long pfn, int flags)
return ret; } +EXPORT_SYMBOL_GPL(soft_offline_page);
From: Ma Wupeng mawupeng1@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I90COS
--------------------------------
Commit 67f22ba7750f ("mm/memory-failure: disable unpoison once hw error happens") disable unpoison_memory once real memory-failure happens since kpte will be clear in x86 and this will lead to kernel panic after unpoison.
This problem do not exist on arm64, disable this check by introduce soft_online_page. This is only used for bypass this check, real failure pages in x86 should call this to online.
Signed-off-by: Ma Wupeng mawupeng1@huawei.com --- include/linux/mm.h | 1 + mm/memory-failure.c | 39 +++++++++++++++++++++++++-------------- 2 files changed, 26 insertions(+), 14 deletions(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h index 0ed628b136a05..67a5205b02c67 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3880,6 +3880,7 @@ int mf_dax_kill_procs(struct address_space *mapping, pgoff_t index, extern int memory_failure(unsigned long pfn, int flags); extern void memory_failure_queue_kick(int cpu); extern int unpoison_memory(unsigned long pfn); +extern int soft_online_page(unsigned long pfn); extern void shake_page(struct page *p); extern atomic_long_t num_poisoned_pages __read_mostly; extern int soft_offline_page(unsigned long pfn, int flags); diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 367e303ba565e..d67cc40693678 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -2496,19 +2496,7 @@ core_initcall(memory_failure_init); pr_info(fmt, pfn); \ })
-/** - * unpoison_memory - Unpoison a previously poisoned page - * @pfn: Page number of the to be unpoisoned page - * - * Software-unpoison a page that has been poisoned by - * memory_failure() earlier. - * - * This is only done on the software-level, so it only works - * for linux injected failures, not real hardware failures - * - * Returns 0 for success, otherwise -errno. - */ -int unpoison_memory(unsigned long pfn) +static int __unpoison_memory(unsigned long pfn, bool hw_mf_check) { struct folio *folio; struct page *p; @@ -2526,7 +2514,7 @@ int unpoison_memory(unsigned long pfn)
mutex_lock(&mf_mutex);
- if (hw_memory_failure) { + if (hw_mf_check && hw_memory_failure) { unpoison_pr_info("Unpoison: Disabled after HW memory failure %#lx\n", pfn, &unpoison_rs); ret = -EOPNOTSUPP; @@ -2609,8 +2597,31 @@ int unpoison_memory(unsigned long pfn) } return ret; } + +/** + * unpoison_memory - Unpoison a previously poisoned page + * @pfn: Page number of the to be unpoisoned page + * + * Software-unpoison a page that has been poisoned by + * memory_failure() earlier. + * + * This is only done on the software-level, so it only works + * for linux injected failures, not real hardware failures + * + * Returns 0 for success, otherwise -errno. + */ +int unpoison_memory(unsigned long pfn) +{ + return __unpoison_memory(pfn, true); +} EXPORT_SYMBOL(unpoison_memory);
+int soft_online_page(unsigned long pfn) +{ + return __unpoison_memory(pfn, false); +} +EXPORT_SYMBOL_GPL(soft_online_page); + static bool isolate_page(struct page *page, struct list_head *pagelist) { bool isolated = false;
From: Ma Wupeng mawupeng1@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I90COS
--------------------------------
Page ejection is achieved by soft_offline_page and unpoison_memory. A linked list is maintained to log the pfns which are offlined by this module. Only the pfns present in the list is allowed to go online.
The follow sysfs is introduced to online/offline page via this module, since there is a internal linked list to record all offlined pages, remove_page is introduced when a user no longer wishes to online this particular page. To reduce the length of the internal list, this function removes the specified page from the list.
- /sys/kernel/page_eject/online_page - /sys/kernel/page_eject/offline_page - /sys/kernel/page_eject/remove_page
Signed-off-by: Ma Wupeng mawupeng1@huawei.com --- drivers/ras/Kconfig | 10 ++ drivers/ras/Makefile | 1 + drivers/ras/page_eject.c | 249 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 260 insertions(+) create mode 100644 drivers/ras/page_eject.c
diff --git a/drivers/ras/Kconfig b/drivers/ras/Kconfig index c2a236f2e8460..bbe9dc12c07f0 100644 --- a/drivers/ras/Kconfig +++ b/drivers/ras/Kconfig @@ -29,6 +29,16 @@ menuconfig RAS so have ideal availability, but may be unreliable, with frequent data corruption.
+config PAGE_EJECT + tristate "page eject" + default m + depends on ARM64 && MEMORY_FAILURE + help + Used to eject page, which is achieved by soft_offline_page and + unpoison_memory. A linked list is maintained to log the pfns + which are offlined by this module. Only the pfns present in the + list is allowed to go online. + if RAS
source "arch/x86/ras/Kconfig" diff --git a/drivers/ras/Makefile b/drivers/ras/Makefile index 6f0404f501071..ba551a2403c32 100644 --- a/drivers/ras/Makefile +++ b/drivers/ras/Makefile @@ -2,3 +2,4 @@ obj-$(CONFIG_RAS) += ras.o obj-$(CONFIG_DEBUG_FS) += debugfs.o obj-$(CONFIG_RAS_CEC) += cec.o +obj-$(CONFIG_PAGE_EJECT) += page_eject.o diff --git a/drivers/ras/page_eject.c b/drivers/ras/page_eject.c new file mode 100644 index 0000000000000..310f27f4d7a16 --- /dev/null +++ b/drivers/ras/page_eject.c @@ -0,0 +1,249 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) Huawei Technologies Co., Ltd. 2023. All rights reserved. + */ + +#define pr_fmt(fmt) "page eject: " fmt + +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/slab.h> + +static struct list_head eject_page_list = LIST_HEAD_INIT(eject_page_list); +static DEFINE_MUTEX(eject_page_mutex); +static struct kobject *eject_page_kobj; + +struct ejected_pfn { + struct list_head list; + unsigned long pfn; +}; + +static struct ejected_pfn *page_eject_remove_pfn_locked(unsigned long pfn) +{ + struct ejected_pfn *item, *next, *ret = NULL; + + mutex_lock(&eject_page_mutex); + list_for_each_entry_safe(item, next, &eject_page_list, list) { + if (pfn == item->pfn) { + list_del(&item->list); + ret = item; + break; + } + } + mutex_unlock(&eject_page_mutex); + + return ret; +} + +static void page_eject_add_pfn_locked(struct ejected_pfn *item) +{ + mutex_lock(&eject_page_mutex); + list_add_tail(&item->list, &eject_page_list); + mutex_unlock(&eject_page_mutex); +} + +static void page_eject_clear_list_locked(void) +{ + struct ejected_pfn *item, *next; + + mutex_lock(&eject_page_mutex); + list_for_each_entry_safe(item, next, &eject_page_list, list) { + list_del(&item->list); + kfree(item); + } + mutex_unlock(&eject_page_mutex); +} + +static int page_eject_offline_page(unsigned long pfn) +{ + struct ejected_pfn *item; + struct page *page; + int ret; + + page = pfn_to_online_page(pfn); + if (!page) + return -EINVAL; + + if (PageHWPoison(page)) { + pr_err("page fail to be offlined, page is already offlined, pfn: %#lx\n", pfn); + return -EINVAL; + } + + item = kzalloc(sizeof(struct ejected_pfn), GFP_KERNEL); + if (!item) + return -ENOMEM; + + /* + * if soft_offline_page return 0 because PageHWPoison, this pfn + * will add to list and this add will be removed during online + * since it is poisoned. + */ + ret = soft_offline_page(pfn, 0); + if (ret) { + pr_err("page fail to be offlined, soft_offline_page failed(%d), pfn=%#lx\n", + ret, pfn); + kfree(item); + return ret; + } + + item->pfn = pfn; + + page_eject_add_pfn_locked(item); + + return 0; +} + +static int page_eject_online_page(unsigned long pfn) +{ + struct ejected_pfn *item; + struct page *page; + int ret; + + page = pfn_to_online_page(pfn); + if (!page) + return -EINVAL; + + item = page_eject_remove_pfn_locked(pfn); + if (!item) { + pr_err("page failed to be onlined, pfn: %#lx\n", pfn); + return -EINVAL; + } + + ret = soft_online_page(pfn); + if (!ret) { + kfree(item); + return ret; + } + + /* re-add pfn to list if unpoison failed */ + page_eject_add_pfn_locked(item); + pr_err("page failed to be onlined, online error(%d), pfn: %#lx\n", + ret, pfn); + return ret; +} + +static int page_eject_remove_page(unsigned long pfn) +{ + struct ejected_pfn *item; + + item = page_eject_remove_pfn_locked(pfn); + if (!item) { + pr_info("page fail to be removed, pfn: %#lx\n", pfn); + return -EINVAL; + } + + kfree(item); + + return 0; +} + +static ssize_t offline_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + u64 paddr; + int res; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (kstrtoull(buf, 16, &paddr)) + return -EINVAL; + + res = page_eject_offline_page(paddr >> PAGE_SHIFT); + if (res) + return res; + + return count; +} + +static ssize_t online_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + u64 paddr; + int res; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (kstrtoull(buf, 16, &paddr)) + return -EINVAL; + + res = page_eject_online_page(paddr >> PAGE_SHIFT); + if (res) + return res; + + return count; +} + +static ssize_t remove_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + u64 paddr; + int res; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (kstrtoull(buf, 16, &paddr)) + return -EINVAL; + + res = page_eject_remove_page(paddr >> PAGE_SHIFT); + if (res) + return res; + + return count; +} + +static struct kobj_attribute online_attr = + __ATTR(online_page, 0200, NULL, online_store); +static struct kobj_attribute offline_attr = + __ATTR(offline_page, 0200, NULL, offline_store); +static struct kobj_attribute remove_attr = + __ATTR(remove_page, 0200, NULL, remove_store); + +static struct attribute *eject_page_attrs[] = { + &offline_attr.attr, + &online_attr.attr, + &remove_attr.attr, + NULL, +}; + +static struct attribute_group eject_page_attr_group = { + .attrs = eject_page_attrs, +}; + +static int __init page_eject_init(void) +{ + int ret = -ENOMEM; + + eject_page_kobj = kobject_create_and_add("page_eject", kernel_kobj); + if (!eject_page_kobj) + return ret; + + ret = sysfs_create_group(eject_page_kobj, &eject_page_attr_group); + if (ret) { + kobject_put(eject_page_kobj); + return ret; + } + + mutex_init(&eject_page_mutex); + + pr_info("init page eject succeed\n"); + return ret; +} + +static void __exit page_eject_exit(void) +{ + page_eject_clear_list_locked(); + + kobject_put(eject_page_kobj); + + pr_info("exit page eject succeed\n"); +} + +module_init(page_eject_init); +module_exit(page_eject_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Ma Wupeng mawupeng1@huawei.com"); +MODULE_DESCRIPTION("page eject");
From: Ma Wupeng mawupeng1@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I90COS
--------------------------------
Enable PAGE_EJECT for arm64 by default.
Signed-off-by: Ma Wupeng mawupeng1@huawei.com --- arch/arm64/configs/openeuler_defconfig | 1 + 1 file changed, 1 insertion(+)
diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 3987545e76768..9d2a3c9bebc11 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -6733,6 +6733,7 @@ CONFIG_HNS3_PMU=m # end of Performance monitor support
CONFIG_RAS=y +CONFIG_PAGE_EJECT=m CONFIG_USB4=m # CONFIG_USB4_DEBUGFS_WRITE is not set # CONFIG_USB4_DMA_TEST is not set
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/4439 邮件列表地址:https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/E...
FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/4439 Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/E...