From: Ma Wupeng mawupeng1@huawei.com
Page ejection is achieved by soft_offline_page and unpoison_memory. A linked list is maintained to log the pfns which are offlined by this module. Only the pfns present in the list is allowed to go online.
The follow sysfs is introduced to online/offline page via this module, since there is a internal linked list to record all offlined pages, remove_page is introduce to remove this page from list.
- /sys/kernel/page_eject/online_page - /sys/kernel/page_eject/offline_page - /sys/kernel/page_eject/remove_page
Besides pag eject, machine check safe is enhanced as follow:
During soft offline page, page is copied to new page in kernel. If the origin page has UCE, there will lead to kernel panic.
In order to solve this problem, use machine check safe to catch this error which can be achieved by using copy_mc_to_kernel to replace copy_page. Signal SIGBUS will be send to user task if this UCE is consumed by this situation to avoid kernel panic.
Ma Wupeng (3): mm/hwpoison: Export symbol soft_offline_page mm: page_eject: Introuduce page ejection config: update defconfig for PAGE_EJECT
arch/arm64/configs/openeuler_defconfig | 1 + arch/x86/configs/openeuler_defconfig | 1 + drivers/ras/Kconfig | 10 + drivers/ras/Makefile | 1 + drivers/ras/page_eject.c | 248 +++++++++++++++++++++++++ mm/memory-failure.c | 1 + 6 files changed, 262 insertions(+) create mode 100644 drivers/ras/page_eject.c
From: Ma Wupeng mawupeng1@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8K5CO
--------------------------------
Export symbol soft_offline_page so that other module can use this.
Function soft_offline_page is used to isolate pages. During page isolation, migrate will be issued if this current page is mapped by user task and the page will be removed from buddy system finally.
Signed-off-by: Ma Wupeng mawupeng1@huawei.com --- mm/memory-failure.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 406895b98422..b87ba13fd56b 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -2184,3 +2184,4 @@ int soft_offline_page(unsigned long pfn, int flags)
return ret; } +EXPORT_SYMBOL_GPL(soft_offline_page);
From: Ma Wupeng mawupeng1@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8K5CO
--------------------------------
Page ejection is achieved by soft_offline_page and unpoison_memory. A linked list is maintained to log the pfns which are offlined by this module. Only the pfns present in the list is allowed to go online.
The follow sysfs is introduced to online/offline page via this module, since there is a internal linked list to record all offlined pages, remove_page is introduce to remove this page from list.
- /sys/kernel/page_eject/online_page - /sys/kernel/page_eject/offline_page - /sys/kernel/page_eject/remove_page
Signed-off-by: Ma Wupeng mawupeng1@huawei.com --- drivers/ras/Kconfig | 10 ++ drivers/ras/Makefile | 1 + drivers/ras/page_eject.c | 248 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 259 insertions(+) create mode 100644 drivers/ras/page_eject.c
diff --git a/drivers/ras/Kconfig b/drivers/ras/Kconfig index c2a236f2e846..0a98685b5075 100644 --- a/drivers/ras/Kconfig +++ b/drivers/ras/Kconfig @@ -29,6 +29,16 @@ menuconfig RAS so have ideal availability, but may be unreliable, with frequent data corruption.
+config PAGE_EJECT + tristate "page eject" + default m + depends on MEMORY_FAILURE + help + Used to eject page, which is achieved by soft_offline_page and + unpoison_memory. A linked list is maintained to log the pfns + which are offlined by this module. Only the pfns present in the + list is allowed to go online. + if RAS
source "arch/x86/ras/Kconfig" diff --git a/drivers/ras/Makefile b/drivers/ras/Makefile index 6f0404f50107..ba551a2403c3 100644 --- a/drivers/ras/Makefile +++ b/drivers/ras/Makefile @@ -2,3 +2,4 @@ obj-$(CONFIG_RAS) += ras.o obj-$(CONFIG_DEBUG_FS) += debugfs.o obj-$(CONFIG_RAS_CEC) += cec.o +obj-$(CONFIG_PAGE_EJECT) += page_eject.o diff --git a/drivers/ras/page_eject.c b/drivers/ras/page_eject.c new file mode 100644 index 000000000000..e9f6af6d100f --- /dev/null +++ b/drivers/ras/page_eject.c @@ -0,0 +1,248 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) Huawei Technologies Co., Ltd. 2023. All rights reserved. + */ + +#define pr_fmt(fmt) "page eject: " fmt + +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/slab.h> + +static struct list_head eject_page_list = LIST_HEAD_INIT(eject_page_list); +static DEFINE_MUTEX(eject_page_mutex); +static struct kobject *eject_page_kobj; + +struct ejected_pfn { + struct list_head list; + unsigned long pfn; +}; + +static struct ejected_pfn *page_eject_remove_pfn_locked(unsigned long pfn) +{ + struct ejected_pfn *item = NULL, *next; + + mutex_lock(&eject_page_mutex); + list_for_each_entry_safe(item, next, &eject_page_list, list) { + if (pfn == item->pfn) + break; + } + if (item) + list_del(&item->list); + mutex_unlock(&eject_page_mutex); + + return item; +} + +static void page_eject_add_pfn_locked(struct ejected_pfn *item) +{ + mutex_lock(&eject_page_mutex); + list_add_tail(&item->list, &eject_page_list); + mutex_unlock(&eject_page_mutex); +} + +static void page_eject_clear_list_locked(void) +{ + struct ejected_pfn *item, *next; + + mutex_lock(&eject_page_mutex); + list_for_each_entry_safe(item, next, &eject_page_list, list) { + list_del(&item->list); + kfree(item); + } + mutex_unlock(&eject_page_mutex); +} + +static int page_eject_offline_page(unsigned long pfn) +{ + struct ejected_pfn *item; + struct page *page; + int ret; + + page = pfn_to_online_page(pfn); + if (!page) + return -EINVAL; + + if (PageHWPoison(page)) { + pr_err("page fail to be offlined, page is already offlined, pfn: %#lx\n", pfn); + return -EINVAL; + } + + item = kzalloc(sizeof(struct ejected_pfn), GFP_KERNEL); + if (!item) + return -ENOMEM; + + /* + * if soft_offline_page return 0 because PageHWPoison, this pfn + * will add to list and this add will be removed during online + * since it is poisoned. + */ + ret = soft_offline_page(pfn, 0); + if (ret) { + pr_err("page fail to be offlined, soft_offline_page failed(%d), pfn=%#lx\n", + ret, pfn); + kfree(item); + return ret; + } + + item->pfn = pfn; + + page_eject_add_pfn_locked(item); + + return 0; +} + +static int page_eject_online_page(unsigned long pfn) +{ + struct ejected_pfn *item; + struct page *page; + int ret; + + page = pfn_to_online_page(pfn); + if (!page) + return -EINVAL; + + item = page_eject_remove_pfn_locked(pfn); + if (!item) { + pr_err("page failed to be onlined, pfn: %#lx\n", pfn); + return -EINVAL; + } + + ret = unpoison_memory(pfn); + if (!ret) { + kfree(item); + return ret; + } + + /* re-add pfn to list if unpoison failed */ + page_eject_add_pfn_locked(item); + pr_err("page failed to be onlined, unpoison_memory error(%d), pfn: %#lx\n", + ret, pfn); + return ret; +} + +static int page_eject_remove_page(unsigned long pfn) +{ + struct ejected_pfn *item; + + item = page_eject_remove_pfn_locked(pfn); + if (!item) { + pr_info("page fail to be removed, pfn: %#lx\n", pfn); + return -EINVAL; + } + + kfree(item); + + return 0; +} + +static ssize_t offline_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + u64 paddr; + int res; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (kstrtoull(buf, 16, &paddr)) + return -EINVAL; + + res = page_eject_offline_page(paddr >> PAGE_SHIFT); + if (res) + return res; + + return count; +} + +static ssize_t online_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + u64 paddr; + int res; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (kstrtoull(buf, 16, &paddr)) + return -EINVAL; + + res = page_eject_online_page(paddr >> PAGE_SHIFT); + if (res) + return res; + + return count; +} + +static ssize_t remove_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + u64 paddr; + int res; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (kstrtoull(buf, 16, &paddr)) + return -EINVAL; + + res = page_eject_remove_page(paddr >> PAGE_SHIFT); + if (res) + return res; + + return count; +} + +static struct kobj_attribute online_attr = + __ATTR(online_page, 0200, NULL, online_store); +static struct kobj_attribute offline_attr = + __ATTR(offline_page, 0200, NULL, offline_store); +static struct kobj_attribute remove_attr = + __ATTR(remove_page, 0200, NULL, remove_store); + +static struct attribute *eject_page_attrs[] = { + &offline_attr.attr, + &online_attr.attr, + &remove_attr.attr, + NULL, +}; + +static struct attribute_group eject_page_attr_group = { + .attrs = eject_page_attrs, +}; + +static int __init page_eject_init(void) +{ + int ret = -ENOMEM; + + eject_page_kobj = kobject_create_and_add("page_eject", kernel_kobj); + if (!eject_page_kobj) + return ret; + + ret = sysfs_create_group(eject_page_kobj, &eject_page_attr_group); + if (ret) { + kobject_put(eject_page_kobj); + return ret; + } + + mutex_init(&eject_page_mutex); + + pr_info("init page eject succeed\n"); + return ret; +} + +static void __exit page_eject_exit(void) +{ + page_eject_clear_list_locked(); + + kobject_put(eject_page_kobj); + + pr_info("exit page eject succeed\n"); +} + +module_init(page_eject_init); +module_exit(page_eject_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Ma Wupeng mawupeng1@huawei.com"); +MODULE_DESCRIPTION("page eject");
From: Ma Wupeng mawupeng1@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8K5CO
--------------------------------
Enable PAGE_EJECT for arm64 by default and disable it for x86 since it is not fully tested in x86.
Signed-off-by: Ma Wupeng mawupeng1@huawei.com --- arch/arm64/configs/openeuler_defconfig | 1 + arch/x86/configs/openeuler_defconfig | 1 + 2 files changed, 2 insertions(+)
diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index b04770986bc7..5b79dccc7278 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -6173,6 +6173,7 @@ CONFIG_HISI_LPDDRC_PMU=m # end of Performance monitor support
CONFIG_RAS=y +CONFIG_PAGE_EJECT=m CONFIG_USB4=m # CONFIG_USB4_DEBUGFS_WRITE is not set
diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig index 32236e4fa96d..d445ec58a0d3 100644 --- a/arch/x86/configs/openeuler_defconfig +++ b/arch/x86/configs/openeuler_defconfig @@ -7242,6 +7242,7 @@ CONFIG_INTEL_RAPL=m # end of Performance monitor support
CONFIG_RAS=y +# CONFIG_PAGE_EJECT is not set # CONFIG_RAS_CEC is not set CONFIG_USB4=m # CONFIG_USB4_DEBUGFS_WRITE is not set
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/3055 邮件列表地址:https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/U...
FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/3055 Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/U...