hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I67QNJ CVE: NA
------------------------------------------------------------------
Platform devices which supports power control are often required to be power off/on together with the devices in the same power domain. However, there isn't a generic driver that support the power control logic of these devices.
ACPI container seems to be a good place to hold these control logic. Add platform devices in the same power domain in a ACPI container, we can easily get the locality information about these devices and can moniter the power of these devices in the same power domain together.
This patch provide three userspace control interface to control the power of devices together in the container: - state: Echo online to state to power up the devices in the container and then online these devices which will be triggered by BIOS. Echo offline to the state to offline and eject the child devices in the container which are ejectable. - pxms: show the pxms of devices which are present in the container.
In our scenario, we need to control the power of HBM memory devices which can be power consuming and will only be used in some specialized scenarios, such as HPC. HBM memory devices in a socket are in the same power domain, and should be power off/on together. We have come up with an idea that put these power control logic in a specialized driver, but ACPI container seems to be a more generic place to hold these control logic.
Signed-off-by: Zhang Zekun zhangzekun11@huawei.com --- v2->v3: - replace module license "GPL" with "GPL V2"
drivers/base/container.c | 3 + drivers/soc/hisilicon/Kconfig | 12 ++ drivers/soc/hisilicon/Makefile | 2 + drivers/soc/hisilicon/hisi_hbmdev.c | 240 ++++++++++++++++++++++++++ drivers/soc/hisilicon/hisi_internal.h | 31 ++++ 5 files changed, 288 insertions(+) create mode 100644 drivers/soc/hisilicon/hisi_hbmdev.c create mode 100644 drivers/soc/hisilicon/hisi_internal.h
diff --git a/drivers/base/container.c b/drivers/base/container.c index 1ba42d2d3532..055dfa8daff1 100644 --- a/drivers/base/container.c +++ b/drivers/base/container.c @@ -30,6 +30,9 @@ struct bus_type container_subsys = { .online = trivial_online, .offline = container_offline, }; +#if IS_ENABLED(CONFIG_HISI_HBMDEV) +EXPORT_SYMBOL_GPL(container_subsys); +#endif
void __init container_dev_init(void) { diff --git a/drivers/soc/hisilicon/Kconfig b/drivers/soc/hisilicon/Kconfig index 0ab688af308f..3e5cf16be223 100644 --- a/drivers/soc/hisilicon/Kconfig +++ b/drivers/soc/hisilicon/Kconfig @@ -3,6 +3,18 @@ menu "Hisilicon SoC drivers" depends on ARCH_HISI || COMPILE_TEST
+config HISI_HBMDEV + tristate "add extra support for hbm memory device" + depends on ACPI_HOTPLUG_MEMORY + select ACPI_CONTAINER + help + This driver add extra supports for memory devices. The driver + provides methods for userpace to control the power of memory + devices in a container. + + To compile this driver as a module, choose M here: + the module will be called hisi_hbmdev. + config KUNPENG_HCCS tristate "HCCS driver on Kunpeng SoC" depends on ACPI diff --git a/drivers/soc/hisilicon/Makefile b/drivers/soc/hisilicon/Makefile index 226e747e70d6..4d5794b150b3 100644 --- a/drivers/soc/hisilicon/Makefile +++ b/drivers/soc/hisilicon/Makefile @@ -1,2 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_KUNPENG_HCCS) += kunpeng_hccs.o + +obj-$(CONFIG_HISI_HBMDEV) += hisi_hbmdev.o diff --git a/drivers/soc/hisilicon/hisi_hbmdev.c b/drivers/soc/hisilicon/hisi_hbmdev.c new file mode 100644 index 000000000000..0a0977956042 --- /dev/null +++ b/drivers/soc/hisilicon/hisi_hbmdev.c @@ -0,0 +1,240 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) Huawei Technologies Co., Ltd. 2023. All rights reserved. + */ + +#include <linux/kobject.h> +#include <linux/module.h> +#include <linux/nodemask.h> +#include <linux/acpi.h> +#include <linux/container.h> + +#include "hisi_internal.h" + +#define ACPI_MEMORY_DEVICE_HID "PNP0C80" +#define ACPI_GENERIC_CONTAINER_DEVICE_HID "PNP0A06" + +struct cdev_node { + struct device *dev; + struct list_head clist; +}; + +struct memory_dev { + struct kobject *memdev_kobj; + struct cdev_node cdev_list; +}; + +static struct memory_dev *mdev; + +static int get_pxm(struct acpi_device *acpi_device, void *arg) +{ + acpi_handle handle = acpi_device->handle; + nodemask_t *mask = arg; + unsigned long long sta; + acpi_status status; + int nid; + + status = acpi_evaluate_integer(handle, "_STA", NULL, &sta); + if (ACPI_SUCCESS(status) && (sta & ACPI_STA_DEVICE_ENABLED)) { + nid = acpi_get_node(handle); + if (nid >= 0) + node_set(nid, *mask); + } + + return 0; +} + +static ssize_t pxms_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct acpi_device *adev = ACPI_COMPANION(dev); + nodemask_t mask; + + nodes_clear(mask); + acpi_dev_for_each_child(adev, get_pxm, &mask); + + return sysfs_emit(buf, "%*pbl\n", + nodemask_pr_args(&mask)); +} +static DEVICE_ATTR_RO(pxms); + +static int memdev_power_on(struct acpi_device *adev) +{ + acpi_handle handle = adev->handle; + acpi_status status; + + status = acpi_evaluate_object(handle, "_ON", NULL, NULL); + if (ACPI_FAILURE(status)) { + acpi_handle_warn(handle, "Power on failed (0x%x)\n", status); + return -ENODEV; + } + + return 0; +} + +static int eject_device(struct acpi_device *acpi_device, void *not_used) +{ + acpi_object_type unused; + acpi_status status; + + status = acpi_get_type(acpi_device->handle, &unused); + if (ACPI_FAILURE(status) || !acpi_device->flags.ejectable) + return -ENODEV; + + get_device(&acpi_device->dev); + status = acpi_hotplug_schedule(acpi_device, ACPI_OST_EC_OSPM_EJECT); + if (ACPI_SUCCESS(status)) + return 0; + + put_device(&acpi_device->dev); + acpi_evaluate_ost(acpi_device->handle, ACPI_OST_EC_OSPM_EJECT, + ACPI_OST_SC_NON_SPECIFIC_FAILURE, NULL); + + return status == AE_NO_MEMORY ? -ENOMEM : -EAGAIN; +} + +static int memdev_power_off(struct acpi_device *adev) +{ + return acpi_dev_for_each_child(adev, eject_device, NULL); +} + +static ssize_t state_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct acpi_device *adev = ACPI_COMPANION(dev); + const int type = online_type_from_str(buf); + int ret = -EINVAL; + + switch (type) { + case STATE_ONLINE: + ret = memdev_power_on(adev); + break; + case STATE_OFFLINE: + ret = memdev_power_off(adev); + break; + default: + break; + } + + if (ret) + return ret; + + return count; +} +static DEVICE_ATTR_WO(state); + +static int hbmdev_find(struct acpi_device *adev, void *arg) +{ + const char *hid = acpi_device_hid(adev); + bool *found = arg; + + if (!strcmp(hid, ACPI_MEMORY_DEVICE_HID)) { + *found = true; + return -1; + } + + return 0; +} + +static bool has_hbmdev(struct device *dev) +{ + struct acpi_device *adev = ACPI_COMPANION(dev); + const char *hid = acpi_device_hid(adev); + bool found = false; + + if (strcmp(hid, ACPI_GENERIC_CONTAINER_DEVICE_HID)) + return found; + + acpi_dev_for_each_child(adev, hbmdev_find, &found); + + return found; +} + +static int container_add(struct device *dev, void *data) +{ + struct cdev_node *cnode; + + if (!has_hbmdev(dev)) + return 0; + + cnode = kmalloc(sizeof(struct cdev_node), GFP_KERNEL); + if (!cnode) + return -ENOMEM; + + cnode->dev = dev; + list_add_tail(&cnode->clist, &mdev->cdev_list.clist); + + return 0; +} + +static void container_remove(void) +{ + struct cdev_node *cnode, *tmp; + + list_for_each_entry_safe(cnode, tmp, &mdev->cdev_list.clist, clist) { + device_remove_file(cnode->dev, &dev_attr_state); + device_remove_file(cnode->dev, &dev_attr_pxms); + list_del(&cnode->clist); + kfree(cnode); + } +} + +static int container_init(void) +{ + struct cdev_node *cnode; + + INIT_LIST_HEAD(&mdev->cdev_list.clist); + + if (bus_for_each_dev(&container_subsys, NULL, NULL, container_add)) { + container_remove(); + return -ENOMEM; + } + + if (list_empty(&mdev->cdev_list.clist)) + return -ENODEV; + + list_for_each_entry(cnode, &mdev->cdev_list.clist, clist) { + device_create_file(cnode->dev, &dev_attr_state); + device_create_file(cnode->dev, &dev_attr_pxms); + } + + return 0; +} + + +static int __init mdev_init(void) +{ + int ret; + + mdev = kzalloc(sizeof(struct memory_dev), GFP_KERNEL); + if (!mdev) + return -ENOMEM; + + ret = container_init(); + if (ret) { + kfree(mdev); + return ret; + } + + mdev->memdev_kobj = kobject_create_and_add("hbm_memory", kernel_kobj); + if (!mdev->memdev_kobj) { + container_remove(); + kfree(mdev); + return -ENOMEM; + } + + return ret; +} +module_init(mdev_init); + +static void __exit mdev_exit(void) +{ + container_remove(); + kobject_put(mdev->memdev_kobj); + kfree(mdev); +} +module_exit(mdev_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Zhang Zekun zhangzekun11@huawei.com"); diff --git a/drivers/soc/hisilicon/hisi_internal.h b/drivers/soc/hisilicon/hisi_internal.h new file mode 100644 index 000000000000..5345174f6b84 --- /dev/null +++ b/drivers/soc/hisilicon/hisi_internal.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) Huawei Technologies Co., Ltd. 2023. All rights reserved. + */ + +#ifndef _HISI_INTERNAL_H +#define _HISI_INTERNAL_H + +enum { + STATE_ONLINE, + STATE_OFFLINE, +}; + +static const char *const online_type_to_str[] = { + [STATE_ONLINE] = "online", + [STATE_OFFLINE] = "offline", +}; + +static inline int online_type_from_str(const char *str) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(online_type_to_str); i++) { + if (sysfs_streq(str, online_type_to_str[i])) + return i; + } + + return -EINVAL; +} + +#endif