Offering: HULK hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8CCP5
----------------------------------
Hbm memory device add support for acls hot repair. The patch add two methods for userpace: - query a paddr if it support acls repair - repair a paddr in hbm memory device
The feature of ACLS hot repair can help to fix a memory error from userspace by passing through the error physical address to HBM hardware.
Signed-off-by: Zhang Zekun zhangzekun11@huawei.com --- drivers/soc/hisilicon/Kconfig | 10 +++ drivers/soc/hisilicon/hisi_hbmdev.c | 134 ++++++++++++++++++++++++++++ 2 files changed, 144 insertions(+)
diff --git a/drivers/soc/hisilicon/Kconfig b/drivers/soc/hisilicon/Kconfig index 383375f20cac..45754528873e 100644 --- a/drivers/soc/hisilicon/Kconfig +++ b/drivers/soc/hisilicon/Kconfig @@ -44,4 +44,14 @@ config HISI_HBMCACHE To compile the driver as a module, choose M here: the module will be called hisi_hbmcache.
+config HISI_HBMDEV_ACLS + bool "Add support for HISI ACLS repair" + depends on HISI_HBMDEV + help + Add ACLS support for hbm device, which can be used to query and + repair hardware error in HBM devices. This feature need to work with + hardware firmwares. + + If not sure say no. + endmenu diff --git a/drivers/soc/hisilicon/hisi_hbmdev.c b/drivers/soc/hisilicon/hisi_hbmdev.c index 5b6b1618148c..5cde7a123033 100644 --- a/drivers/soc/hisilicon/hisi_hbmdev.c +++ b/drivers/soc/hisilicon/hisi_hbmdev.c @@ -26,6 +26,9 @@ struct memory_dev { struct kobject *memdev_kobj; struct kobject *topo_kobj; struct cdev_node cdev_list; +#ifdef CONFIG_HISI_HBMDEV_ACLS + struct kobject *acls_kobj; +#endif nodemask_t cluster_cpumask[MAX_NUMNODES]; };
@@ -85,6 +88,131 @@ static void memory_topo_init(void) kobject_put(mdev->topo_kobj); }
+#ifdef CONFIG_HISI_HBMDEV_ACLS +static struct acpi_device* paddr_to_acpi_device(u64 paddr) +{ + int nid; + u64 pfn; + + pfn = __phys_to_pfn(paddr); + if (!pfn_valid(pfn)) + return NULL; + + nid = pfn_to_nid(pfn); + if (nid < 0 && nid >= MAX_NUMNODES) + return NULL; + + return hotplug_mdev[nid]; +} + +static ssize_t acls_query_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct acpi_object_list arg_list; + struct acpi_device *adev; + union acpi_object obj; + acpi_status status; + u64 paddr, res; + + if (kstrtoull(buf, 16, &paddr)) + return -EINVAL; + + adev = paddr_to_acpi_device(paddr); + if (!adev) + return -EINVAL; + + obj.type = ACPI_TYPE_INTEGER; + obj.integer.value = paddr; + arg_list.count = 1; + arg_list.pointer = &obj; + + status = acpi_evaluate_integer(adev->handle, "AQRY", &arg_list, &res); + if (ACPI_FAILURE(status)) + return -ENODEV; + + if (IS_ERR_VALUE(-res)) + return -res; + else if (res) + return -ENODEV; + + return count; +} + +static struct kobj_attribute acls_query_store_attribute = + __ATTR(acls_query, 0200, NULL, acls_query_store); + +static ssize_t acls_repair_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct acpi_object_list arg_list; + struct acpi_device *adev; + union acpi_object obj; + acpi_status status; + u64 paddr, res; + + if (kstrtoull(buf, 16, &paddr)) + return -EINVAL; + + adev = paddr_to_acpi_device(paddr); + if (!adev) + return -EINVAL; + + obj.type = ACPI_TYPE_INTEGER; + obj.integer.value = paddr; + arg_list.count = 1; + arg_list.pointer = &obj; + + status = acpi_evaluate_integer(adev->handle, "AREP", &arg_list, &res); + if (ACPI_FAILURE(status)) + return -ENODEV; + + if (IS_ERR_VALUE(-res)) + return -res; + else if (res) + return -ENODEV; + + return count; +} +static struct kobj_attribute acls_repair_store_attribute = + __ATTR(acls_repair, 0200, NULL, acls_repair_store); + +static struct attribute *acls_attrs[] = { + &acls_query_store_attribute.attr, + &acls_repair_store_attribute.attr, + NULL, +}; + +static struct attribute_group acls_attr_group = { + .attrs = acls_attrs, +}; + +static void acls_init(void) +{ + int ret = -ENOMEM; + + mdev->acls_kobj = kobject_create_and_add("acls", mdev->memdev_kobj); + if (!mdev->acls_kobj) + goto out; + + ret = sysfs_create_group(mdev->acls_kobj, &acls_attr_group); + if (ret) + kobject_put(mdev->acls_kobj); + +out: + if (ret) + pr_err("ACLS hot repair is not enabled\n"); + + return; +} + +static void acls_remove(void) +{ + kobject_put(mdev->acls_kobj); +} +#endif + static int get_pxm(struct acpi_device *acpi_device, void *arg) { acpi_handle handle = acpi_device->handle; @@ -284,6 +412,9 @@ static int __init mdev_init(void) }
memory_topo_init(); +#ifdef CONFIG_HISI_HBMDEV_ACLS + acls_init(); +#endif return ret; } module_init(mdev_init); @@ -293,6 +424,9 @@ static void __exit mdev_exit(void) container_remove(); kobject_put(mdev->memdev_kobj); kobject_put(mdev->topo_kobj); +#ifdef CONFIG_HISI_HBMDEV_ACLS + acls_remove(); +#endif kfree(mdev); } module_exit(mdev_exit);
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/2738 邮件列表地址:https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/C...
FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/2738 Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/C...