这个config移到到上面
On 2023/12/14 15:46, Zhang Zekun wrote:
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8CCP5 CVE: NA
Hbm memory device add support for acls hot repair. The patch add two methods for userpace:
- query a paddr if it support acls repair
- repair a paddr in hbm memory device
The feature of ACLS hot repair can help to fix a memory error from userspace by passing through the error physical address to HBM hardware.
Signed-off-by: Zhang Zekun zhangzekun11@huawei.com
drivers/soc/hisilicon/Kconfig | 10 +++ drivers/soc/hisilicon/hisi_hbmdev.c | 134 ++++++++++++++++++++++++++++ 2 files changed, 144 insertions(+)
diff --git a/drivers/soc/hisilicon/Kconfig b/drivers/soc/hisilicon/Kconfig index 6d3067440c54..0a07d0e266a5 100644 --- a/drivers/soc/hisilicon/Kconfig +++ b/drivers/soc/hisilicon/Kconfig @@ -29,6 +29,16 @@ config HISI_HBMCACHE To compile the driver as a module, choose M here: the module will be called hisi_hbmcache.
+config HISI_HBMDEV_ACLS
- bool "Add support for HISI ACLS repair"
- depends on HISI_HBMDEV
- help
Add ACLS support for hbm device, which can be used to query and
repair hardware error in HBM devices. This feature need to work with
hardware firmwares.
If not sure say no.
- config KUNPENG_HCCS tristate "HCCS driver on Kunpeng SoC" depends on ACPI
diff --git a/drivers/soc/hisilicon/hisi_hbmdev.c b/drivers/soc/hisilicon/hisi_hbmdev.c index 5b6b1618148c..a9cc78bde81b 100644 --- a/drivers/soc/hisilicon/hisi_hbmdev.c +++ b/drivers/soc/hisilicon/hisi_hbmdev.c @@ -11,6 +11,7 @@ #include <linux/node.h> #include <linux/arch_topology.h> #include <linux/memory_hotplug.h> +#include <linux/mm.h>
#include "hisi_internal.h"
@@ -25,6 +26,9 @@ struct cdev_node { struct memory_dev { struct kobject *memdev_kobj; struct kobject *topo_kobj; +#ifdef CONFIG_HISI_HBMDEV_ACLS
- struct kobject *acls_kobj;
+#endif struct cdev_node cdev_list; nodemask_t cluster_cpumask[MAX_NUMNODES]; }; @@ -85,6 +89,134 @@ static void memory_topo_init(void) kobject_put(mdev->topo_kobj); }
+#ifdef CONFIG_HISI_HBMDEV_ACLS +static struct acpi_device *paddr_to_acpi_device(u64 paddr) +{
- unsigned long pfn;
- int nid;
- pfn = __phys_to_pfn(paddr);
- if (!pfn_valid(pfn))
return NULL;
- nid = pfn_to_nid(pfn);
- if (nid < 0 && nid >= MAX_NUMNODES)
return NULL;
- return hotplug_mdev[nid];
+}
+static ssize_t acls_query_store(struct kobject *kobj,
struct kobj_attribute *attr,
const char *buf, size_t count)
+{
- struct acpi_object_list arg_list;
- struct acpi_device *adev;
- union acpi_object obj;
- acpi_status status;
- u64 paddr, res;
- if (kstrtoull(buf, 16, &paddr))
return -EINVAL;
- adev = paddr_to_acpi_device(paddr);
- if (!adev)
return -EINVAL;
- obj.type = ACPI_TYPE_INTEGER;
- obj.integer.value = paddr;
- arg_list.count = 1;
- arg_list.pointer = &obj;
- status = acpi_evaluate_integer(adev->handle, "AQRY", &arg_list, &res);
- if (ACPI_FAILURE(status))
return -ENODEV;
- /* AQRY will return a positive error code to represent error status */
- if (IS_ERR_VALUE(-res))
return -res;
- else if (res)
return -ENODEV;
- return count;
+}
+static struct kobj_attribute acls_query_store_attribute =
- __ATTR(acls_query, 0200, NULL, acls_query_store);
+static ssize_t acls_repair_store(struct kobject *kobj,
struct kobj_attribute *attr,
const char *buf, size_t count)
+{
- struct acpi_object_list arg_list;
- struct acpi_device *adev;
- union acpi_object obj;
- acpi_status status;
- u64 paddr, res;
- if (kstrtoull(buf, 16, &paddr))
return -EINVAL;
- adev = paddr_to_acpi_device(paddr);
- if (!adev)
return -EINVAL;
- obj.type = ACPI_TYPE_INTEGER;
- obj.integer.value = paddr;
- arg_list.count = 1;
- arg_list.pointer = &obj;
- status = acpi_evaluate_integer(adev->handle, "AREP", &arg_list, &res);
- if (ACPI_FAILURE(status))
return -ENODEV;
- /* AREP will return a positive error code to represent error status */
- if (IS_ERR_VALUE(-res))
return -res;
- else if (res)
return -ENODEV;
- return count;
+} +static struct kobj_attribute acls_repair_store_attribute =
- __ATTR(acls_repair, 0200, NULL, acls_repair_store);
+static struct attribute *acls_attrs[] = {
- &acls_query_store_attribute.attr,
- &acls_repair_store_attribute.attr,
- NULL,
+};
+static struct attribute_group acls_attr_group = {
- .attrs = acls_attrs,
+};
+static void acls_init(void) +{
- int ret = -ENOMEM;
- mdev->acls_kobj = kobject_create_and_add("acls", mdev->memdev_kobj);
- if (!mdev->acls_kobj)
goto out;
- ret = sysfs_create_group(mdev->acls_kobj, &acls_attr_group);
- if (ret)
kobject_put(mdev->acls_kobj);
+out:
- if (ret)
pr_err("ACLS hot repair is not enabled\n");
+}
+static void acls_remove(void) +{
- kobject_put(mdev->acls_kobj);
+} +#else +static void acls_init(void) {} +static void acls_remove(void) {} +#endif
- static int get_pxm(struct acpi_device *acpi_device, void *arg) { acpi_handle handle = acpi_device->handle;
@@ -284,6 +416,7 @@ static int __init mdev_init(void) }
memory_topo_init();
- acls_init(); return ret; } module_init(mdev_init);
@@ -293,6 +426,7 @@ static void __exit mdev_exit(void) container_remove(); kobject_put(mdev->memdev_kobj); kobject_put(mdev->topo_kobj);
- acls_remove(); kfree(mdev); } module_exit(mdev_exit);