[PATCH v2 openEuler-26.09 0/3] Introduce xsched dmem
Introduce xsched dmem Alexander Pavlenko (3): xsched/dmem: init xsched dmem region xsched/dmem: introduce xsched_dmem_alloc() xsched/dmem: introduce xsched_dmem_free() include/linux/xsched.h | 16 ++++ include/uapi/linux/xcu_vstream.h | 8 ++ kernel/xsched/Makefile | 1 + kernel/xsched/core.c | 4 + kernel/xsched/dmem.c | 135 +++++++++++++++++++++++++++++++ kernel/xsched/vstream.c | 42 ++++++++++ 6 files changed, 206 insertions(+) create mode 100644 kernel/xsched/dmem.c -- 2.34.1
From: Alexander Pavlenko <pavlenko.alexander@huawei.com> hulk inclusion category: feature bugzilla: https://atomgit.com/openeuler/kernel/issues/8422 ---------------------------------------- Add support to initialize the xsched device memory (dmem) region during XPU device setup. Signed-off-by: Alexander Pavlenko <pavlenko.alexander@huawei.com> Signed-off-by: Liu Kai <liukai284@huawei.com> --- include/linux/xsched.h | 5 ++++ kernel/xsched/Makefile | 1 + kernel/xsched/core.c | 4 +++ kernel/xsched/dmem.c | 61 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 71 insertions(+) create mode 100644 kernel/xsched/dmem.c diff --git a/include/linux/xsched.h b/include/linux/xsched.h index 60cb43b4631f..8809ab22a50c 100644 --- a/include/linux/xsched.h +++ b/include/linux/xsched.h @@ -474,4 +474,9 @@ void xsched_quota_refill(struct work_struct *work); #endif +#ifdef CONFIG_CGROUP_DMEM +/* Dmem interface */ +int xsched_dmem_init(void); +#endif /* CONFIG_CGROUP_DMEM */ + #endif /* !__LINUX_XSCHED_H__ */ diff --git a/kernel/xsched/Makefile b/kernel/xsched/Makefile index a6081a7aaf14..3e23012ea298 100644 --- a/kernel/xsched/Makefile +++ b/kernel/xsched/Makefile @@ -6,4 +6,5 @@ obj-y += core.o obj-$(CONFIG_XCU_SCHED_RT) += rt.o obj-$(CONFIG_XCU_SCHED_CFS) += cfs.o cfs_quota.o obj-$(CONFIG_CGROUP_XCU) += cgroup.o +obj-$(CONFIG_CGROUP_DMEM) += dmem.o endif diff --git a/kernel/xsched/core.c b/kernel/xsched/core.c index b23f2ca7820b..c6ec746448ef 100644 --- a/kernel/xsched/core.c +++ b/kernel/xsched/core.c @@ -530,6 +530,10 @@ __init int xsched_sched_init(void) xcu_cg_subsys_init(); #endif +#ifdef CONFIG_CGROUP_DMEM + xsched_dmem_init(); +#endif + return 0; } late_initcall(xsched_sched_init); diff --git a/kernel/xsched/dmem.c b/kernel/xsched/dmem.c new file mode 100644 index 000000000000..27e0c4b1a506 --- /dev/null +++ b/kernel/xsched/dmem.c @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Core kernel scheduler code for XPU device + * + * Copyright (C) 2025 Huawei Technologies Co., Ltd + * + * Author: Alexander Pavlenko <pavlenko.alexander@huawei.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ +#include <linux/err.h> +#include <linux/list.h> +#include <linux/xsched.h> +#include <linux/types.h> +#include <linux/cgroup_dmem.h> + +static struct dmem_cgroup_region *hbm_regions[XSCHED_NR_CUS]; + +struct xsched_dmem_pool { + uint64_t id; + struct dmem_cgroup_pool_state *pool; + struct list_head pool_node; +}; + +int xsched_dmem_init(void) +{ + // TODO: get max memory capacity from driver info or CONFIG + const size_t hbm_size_gb = 32; + const size_t hardcoded_hbm_size = hbm_size_gb * SZ_1G; + int dev_id, retval; + + // register HBM region for each device + for (dev_id = 0; dev_id < XSCHED_NR_CUS; dev_id++) { + hbm_regions[dev_id] = dmem_cgroup_register_region( + hardcoded_hbm_size, "HBM%d", dev_id); + + if (IS_ERR_OR_NULL(hbm_regions[dev_id])) { + XSCHED_ERR("Fail to register HBM region for xcu %d\n", dev_id); + retval = PTR_ERR(hbm_regions[dev_id]); + goto err_out; + } + XSCHED_INFO("register HBM%d %zuGB region(s) in dmem\n", dev_id, hbm_size_gb); + } + + return 0; + +err_out: + for (dev_id--; dev_id >= 0; dev_id--) { + dmem_cgroup_unregister_region(hbm_regions[dev_id]); + hbm_regions[dev_id] = NULL; + } + return retval; +} -- 2.34.1
From: Alexander Pavlenko <pavlenko.alexander@huawei.com> hulk inclusion category: feature bugzilla: https://atomgit.com/openeuler/kernel/issues/8422 ---------------------------------------- This commit changes the XPU device memory allocation flow: the xsched subsystem must first successfully register the intended device memory (dmem) region before any physical memory is allocated from the XPU. Signed-off-by: Alexander Pavlenko <pavlenko.alexander@huawei.com> Signed-off-by: Liu Kai <liukai284@huawei.com> --- include/linux/xsched.h | 8 ++++++ include/uapi/linux/xcu_vstream.h | 7 +++++ kernel/xsched/dmem.c | 47 ++++++++++++++++++++++++++++++++ kernel/xsched/vstream.c | 23 ++++++++++++++++ 4 files changed, 85 insertions(+) diff --git a/include/linux/xsched.h b/include/linux/xsched.h index 8809ab22a50c..baf90237dc14 100644 --- a/include/linux/xsched.h +++ b/include/linux/xsched.h @@ -348,6 +348,10 @@ struct xsched_context { struct list_head vstream_list; struct list_head ctx_node; +#ifdef CONFIG_CGROUP_DMEM + struct list_head pool_list; +#endif + struct xsched_entity xse; spinlock_t ctx_lock; @@ -477,6 +481,10 @@ void xsched_quota_refill(struct work_struct *work); #ifdef CONFIG_CGROUP_DMEM /* Dmem interface */ int xsched_dmem_init(void); +int xsched_dmem_alloc(struct xsched_context *ctx, struct vstream_args *args); +#else +static inline int xsched_dmem_alloc( + struct xsched_context *ctx, struct vstream_args *args) { return 0; } #endif /* CONFIG_CGROUP_DMEM */ #endif /* !__LINUX_XSCHED_H__ */ diff --git a/include/uapi/linux/xcu_vstream.h b/include/uapi/linux/xcu_vstream.h index b60c0e0e15f5..d076498e75af 100644 --- a/include/uapi/linux/xcu_vstream.h +++ b/include/uapi/linux/xcu_vstream.h @@ -22,6 +22,7 @@ typedef enum VSTREAM_COMMAND { VSTREAM_ALLOC = 0, VSTREAM_FREE, VSTREAM_KICK, + VSTREAM_ALLOC_HBM, MAX_COMMAND } vstream_command_t; @@ -51,6 +52,11 @@ typedef struct vstream_kick_args { KABI_RESERVE_BYTES(2, 8); } vstream_kick_args_t; +typedef struct vstream_hbm_args { + __u64 size; + __u64 pool_id; +} vstream_hbm_args_t; + typedef struct vstream_args { __u32 channel_id; __u32 fd; @@ -64,6 +70,7 @@ typedef struct vstream_args { vstream_alloc_args_t va_args; vstream_free_args_t vf_args; vstream_kick_args_t vk_args; + vstream_hbm_args_t vh_args; }; __u32 payload_size; diff --git a/kernel/xsched/dmem.c b/kernel/xsched/dmem.c index 27e0c4b1a506..c600e3df2821 100644 --- a/kernel/xsched/dmem.c +++ b/kernel/xsched/dmem.c @@ -59,3 +59,50 @@ int xsched_dmem_init(void) } return retval; } + +int xsched_dmem_alloc(struct xsched_context *ctx, struct vstream_args *args) +{ + struct dmem_cgroup_pool_state *ret_pool, *ret_limit_pool; + struct xsched_dmem_pool *new_pool; + int ret = -EAGAIN; + static uint64_t cur_id; + struct dmem_cgroup_region *hbm_region; + + hbm_region = hbm_regions[args->dev_id]; + if (!hbm_region) { + XSCHED_ERR("Try to charge memory when region is not registered (region HBM%u)\n", + args->dev_id); + goto error_out; + } + + ret = dmem_cgroup_try_charge(hbm_region, args->vh_args.size, &ret_pool, &ret_limit_pool); + if (ret != 0) { + XSCHED_ERR("Fail to charge a new allocation to a HBM region\n"); + goto error_out; + } + + new_pool = kzalloc(sizeof(*new_pool), GFP_KERNEL); + if (!new_pool) { + XSCHED_ERR("Fail to alloc xsched dmem alloc @ %s\n", __func__); + ret = -ENOMEM; + goto error_out; + } + + new_pool->id = cur_id++; + new_pool->pool = ret_pool; + + /* protect list using ctx_lock */ + spin_lock(&ctx->ctx_lock); + list_add_tail(&new_pool->pool_node, &ctx->pool_list); + spin_unlock(&ctx->ctx_lock); + + args->vh_args.pool_id = new_pool->id; + XSCHED_DEBUG("charged %llu bytes, new_alloc = %p with id %llu", + args->vh_args.size, new_pool, new_pool->id); + + return 0; + +error_out: + args->vh_args.pool_id = ULLONG_MAX; + return ret; +} diff --git a/kernel/xsched/vstream.c b/kernel/xsched/vstream.c index d0815e33e081..5d052a33f8e3 100644 --- a/kernel/xsched/vstream.c +++ b/kernel/xsched/vstream.c @@ -158,6 +158,10 @@ static void init_xsched_ctx(struct xsched_context *ctx, INIT_LIST_HEAD(&ctx->vstream_list); INIT_LIST_HEAD(&ctx->ctx_node); +#ifdef CONFIG_CGROUP_DMEM + INIT_LIST_HEAD(&ctx->pool_list); +#endif + spin_lock_init(&ctx->ctx_lock); mutex_init(&ctx->ctx_mutex); } @@ -617,6 +621,24 @@ int vstream_kick(struct vstream_args *arg) return err; } +static int vstream_hbm_alloc(struct vstream_args *arg) +{ + struct xsched_cu *xcu_found; + struct xsched_context *ctx; + + xcu_found = xcu_find(XCU_TYPE_XPU, arg->dev_id, arg->channel_id); + if (!xcu_found) + return -EINVAL; + + ctx = ctx_find_by_tgid_and_xcu(current->tgid, xcu_found); + if (!ctx) { + XSCHED_ERR("Failed to find a context for HBM alloc"); + return -EINVAL; + } + + return xsched_dmem_alloc(ctx, arg); +} + /* * vstream_manage_cmd table */ @@ -624,6 +646,7 @@ static vstream_manage_t(*vstream_command_table[MAX_COMMAND + 1]) = { vstream_alloc, // VSTREAM_ALLOC vstream_free, // VSTREAM_FREE vstream_kick, // VSTREAM_KICK + vstream_hbm_alloc, // VSTREAM_HBM_ALLOC NULL // MAX_COMMAND }; -- 2.34.1
From: Alexander Pavlenko <pavlenko.alexander@huawei.com> hulk inclusion category: feature bugzilla: https://atomgit.com/openeuler/kernel/issues/8422 ---------------------------------------- This commit enforces a strict teardown order when releasing memory associated with an XPU device: the device memory (dmem) region must first be unregistered from the xsched subsystem before the underlying physical memory is deallocated. Signed-off-by: Alexander Pavlenko <pavlenko.alexander@huawei.com> Signed-off-by: Liu Kai <liukai284@huawei.com> --- include/linux/xsched.h | 3 +++ include/uapi/linux/xcu_vstream.h | 1 + kernel/xsched/dmem.c | 27 +++++++++++++++++++++++++++ kernel/xsched/vstream.c | 19 +++++++++++++++++++ 4 files changed, 50 insertions(+) diff --git a/include/linux/xsched.h b/include/linux/xsched.h index baf90237dc14..8cd1295fbefb 100644 --- a/include/linux/xsched.h +++ b/include/linux/xsched.h @@ -482,9 +482,12 @@ void xsched_quota_refill(struct work_struct *work); /* Dmem interface */ int xsched_dmem_init(void); int xsched_dmem_alloc(struct xsched_context *ctx, struct vstream_args *args); +int xsched_dmem_free(struct xsched_context *ctx, struct vstream_args *args); #else static inline int xsched_dmem_alloc( struct xsched_context *ctx, struct vstream_args *args) { return 0; } +static inline int xsched_dmem_free( + struct xsched_context *ctx, struct vstream_args *args) { return 0; } #endif /* CONFIG_CGROUP_DMEM */ #endif /* !__LINUX_XSCHED_H__ */ diff --git a/include/uapi/linux/xcu_vstream.h b/include/uapi/linux/xcu_vstream.h index d076498e75af..14552fae2159 100644 --- a/include/uapi/linux/xcu_vstream.h +++ b/include/uapi/linux/xcu_vstream.h @@ -23,6 +23,7 @@ typedef enum VSTREAM_COMMAND { VSTREAM_FREE, VSTREAM_KICK, VSTREAM_ALLOC_HBM, + VSTREAM_HBM_FREE, MAX_COMMAND } vstream_command_t; diff --git a/kernel/xsched/dmem.c b/kernel/xsched/dmem.c index c600e3df2821..27d55a8aab0a 100644 --- a/kernel/xsched/dmem.c +++ b/kernel/xsched/dmem.c @@ -106,3 +106,30 @@ int xsched_dmem_alloc(struct xsched_context *ctx, struct vstream_args *args) args->vh_args.pool_id = ULLONG_MAX; return ret; } + +int xsched_dmem_free(struct xsched_context *ctx, struct vstream_args *args) +{ + struct xsched_dmem_pool *pool, *target = NULL; + + spin_lock(&ctx->ctx_lock); + list_for_each_entry(pool, &ctx->pool_list, pool_node) { + if (pool->id == args->vh_args.pool_id) { + list_del(&pool->pool_node); + target = pool; + break; + } + } + spin_unlock(&ctx->ctx_lock); + + if (!target) { + XSCHED_ERR("pool with id %llu is not found\n", args->vh_args.pool_id); + return -EINVAL; + } + + XSCHED_DEBUG("uncharged %llu bytes for pool = %p with id %llu\n", + args->vh_args.size, target, target->id); + dmem_cgroup_uncharge(target->pool, args->vh_args.size); + kfree(target); + + return 0; +} diff --git a/kernel/xsched/vstream.c b/kernel/xsched/vstream.c index 5d052a33f8e3..7b769a2e2545 100644 --- a/kernel/xsched/vstream.c +++ b/kernel/xsched/vstream.c @@ -639,6 +639,24 @@ static int vstream_hbm_alloc(struct vstream_args *arg) return xsched_dmem_alloc(ctx, arg); } +static int vstream_hbm_free(struct vstream_args *arg) +{ + struct xsched_cu *xcu_found; + struct xsched_context *ctx; + + xcu_found = xcu_find(XCU_TYPE_XPU, arg->dev_id, arg->channel_id); + if (!xcu_found) + return -EINVAL; + + ctx = ctx_find_by_tgid_and_xcu(current->tgid, xcu_found); + if (!ctx) { + XSCHED_ERR("Failed to find a context for HBM free"); + return -EINVAL; + } + + return xsched_dmem_free(ctx, arg); +} + /* * vstream_manage_cmd table */ @@ -647,6 +665,7 @@ static vstream_manage_t(*vstream_command_table[MAX_COMMAND + 1]) = { vstream_free, // VSTREAM_FREE vstream_kick, // VSTREAM_KICK vstream_hbm_alloc, // VSTREAM_HBM_ALLOC + vstream_hbm_free, // VSTREAM_HBM_FREE NULL // MAX_COMMAND }; -- 2.34.1
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://atomgit.com/openeuler/kernel/merge_requests/20954 邮件列表地址:https://mailweb.openeuler.org/archives/list/kernel@openeuler.org/message/TAJ... FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://atomgit.com/openeuler/kernel/merge_requests/20954 Mailing list address: https://mailweb.openeuler.org/archives/list/kernel@openeuler.org/message/TAJ...
participants (2)
-
Liu Kai -
patchwork bot