[PATCH v3 openEuler-26.09 0/4] Introduce xsched dmem
Introduce xsched dmem Alexander Pavlenko (4): xsched/dmem: set max region size of xsched dmem root group to physical XPU memory size xsched/dmem: init xsched dmem region xsched/dmem: introduce xsched_dmem_alloc() xsched/dmem: introduce xsched_dmem_free() include/linux/xsched.h | 16 ++++ include/uapi/linux/xcu_vstream.h | 8 ++ kernel/cgroup/dmem.c | 5 ++ kernel/xsched/Makefile | 1 + kernel/xsched/core.c | 4 + kernel/xsched/dmem.c | 135 +++++++++++++++++++++++++++++++ kernel/xsched/vstream.c | 42 ++++++++++ 7 files changed, 211 insertions(+) create mode 100644 kernel/xsched/dmem.c -- 2.34.1
From: Alexander Pavlenko <pavlenko.alexander@huawei.com> hulk inclusion category: feature bugzilla: https://atomgit.com/openeuler/kernel/issues/8422 ---------------------------------------- This commit configures the maximum region size of the xsched dmem root group to match the total physical memory size of the XPU device. By aligning the root group’s max limit with actual hardware capacity, we ensure that child groups cannot collectively exceed the device’s real memory capacity, enabling accurate resource accounting and preventing overcommitment. Signed-off-by: Alexander Pavlenko <pavlenko.alexander@huawei.com> Signed-off-by: Liu Kai <liukai284@huawei.com> --- kernel/cgroup/dmem.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/cgroup/dmem.c b/kernel/cgroup/dmem.c index 4cc33e4d8257..31473d9793fe 100644 --- a/kernel/cgroup/dmem.c +++ b/kernel/cgroup/dmem.c @@ -382,6 +382,11 @@ alloc_pool_single(struct dmemcg_state *dmemcs, struct dmem_cgroup_region *region dmemcg_pool_get(ppool); } +#ifdef CONFIG_XCU_SCHEDULER + if (!ppool) + set_resource_max(pool, region->size); +#endif + list_add_tail_rcu(&pool->css_node, &dmemcs->pools); list_add_tail(&pool->region_node, ®ion->pools); -- 2.34.1
From: Alexander Pavlenko <pavlenko.alexander@huawei.com> hulk inclusion category: feature bugzilla: https://atomgit.com/openeuler/kernel/issues/8422 ---------------------------------------- Add support to initialize the xsched device memory (dmem) region during XPU device setup. Signed-off-by: Alexander Pavlenko <pavlenko.alexander@huawei.com> Signed-off-by: Liu Kai <liukai284@huawei.com> --- include/linux/xsched.h | 5 ++++ kernel/xsched/Makefile | 1 + kernel/xsched/core.c | 4 +++ kernel/xsched/dmem.c | 61 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 71 insertions(+) create mode 100644 kernel/xsched/dmem.c diff --git a/include/linux/xsched.h b/include/linux/xsched.h index 60cb43b4631f..8809ab22a50c 100644 --- a/include/linux/xsched.h +++ b/include/linux/xsched.h @@ -474,4 +474,9 @@ void xsched_quota_refill(struct work_struct *work); #endif +#ifdef CONFIG_CGROUP_DMEM +/* Dmem interface */ +int xsched_dmem_init(void); +#endif /* CONFIG_CGROUP_DMEM */ + #endif /* !__LINUX_XSCHED_H__ */ diff --git a/kernel/xsched/Makefile b/kernel/xsched/Makefile index a6081a7aaf14..3e23012ea298 100644 --- a/kernel/xsched/Makefile +++ b/kernel/xsched/Makefile @@ -6,4 +6,5 @@ obj-y += core.o obj-$(CONFIG_XCU_SCHED_RT) += rt.o obj-$(CONFIG_XCU_SCHED_CFS) += cfs.o cfs_quota.o obj-$(CONFIG_CGROUP_XCU) += cgroup.o +obj-$(CONFIG_CGROUP_DMEM) += dmem.o endif diff --git a/kernel/xsched/core.c b/kernel/xsched/core.c index b23f2ca7820b..c6ec746448ef 100644 --- a/kernel/xsched/core.c +++ b/kernel/xsched/core.c @@ -530,6 +530,10 @@ __init int xsched_sched_init(void) xcu_cg_subsys_init(); #endif +#ifdef CONFIG_CGROUP_DMEM + xsched_dmem_init(); +#endif + return 0; } late_initcall(xsched_sched_init); diff --git a/kernel/xsched/dmem.c b/kernel/xsched/dmem.c new file mode 100644 index 000000000000..27e0c4b1a506 --- /dev/null +++ b/kernel/xsched/dmem.c @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Core kernel scheduler code for XPU device + * + * Copyright (C) 2025 Huawei Technologies Co., Ltd + * + * Author: Alexander Pavlenko <pavlenko.alexander@huawei.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ +#include <linux/err.h> +#include <linux/list.h> +#include <linux/xsched.h> +#include <linux/types.h> +#include <linux/cgroup_dmem.h> + +static struct dmem_cgroup_region *hbm_regions[XSCHED_NR_CUS]; + +struct xsched_dmem_pool { + uint64_t id; + struct dmem_cgroup_pool_state *pool; + struct list_head pool_node; +}; + +int xsched_dmem_init(void) +{ + // TODO: get max memory capacity from driver info or CONFIG + const size_t hbm_size_gb = 32; + const size_t hardcoded_hbm_size = hbm_size_gb * SZ_1G; + int dev_id, retval; + + // register HBM region for each device + for (dev_id = 0; dev_id < XSCHED_NR_CUS; dev_id++) { + hbm_regions[dev_id] = dmem_cgroup_register_region( + hardcoded_hbm_size, "HBM%d", dev_id); + + if (IS_ERR_OR_NULL(hbm_regions[dev_id])) { + XSCHED_ERR("Fail to register HBM region for xcu %d\n", dev_id); + retval = PTR_ERR(hbm_regions[dev_id]); + goto err_out; + } + XSCHED_INFO("register HBM%d %zuGB region(s) in dmem\n", dev_id, hbm_size_gb); + } + + return 0; + +err_out: + for (dev_id--; dev_id >= 0; dev_id--) { + dmem_cgroup_unregister_region(hbm_regions[dev_id]); + hbm_regions[dev_id] = NULL; + } + return retval; +} -- 2.34.1
From: Alexander Pavlenko <pavlenko.alexander@huawei.com> hulk inclusion category: feature bugzilla: https://atomgit.com/openeuler/kernel/issues/8422 ---------------------------------------- This commit changes the XPU device memory allocation flow: the xsched subsystem must first successfully register the intended device memory (dmem) region before any physical memory is allocated from the XPU. Signed-off-by: Alexander Pavlenko <pavlenko.alexander@huawei.com> Signed-off-by: Liu Kai <liukai284@huawei.com> --- include/linux/xsched.h | 8 ++++++ include/uapi/linux/xcu_vstream.h | 7 +++++ kernel/xsched/dmem.c | 47 ++++++++++++++++++++++++++++++++ kernel/xsched/vstream.c | 23 ++++++++++++++++ 4 files changed, 85 insertions(+) diff --git a/include/linux/xsched.h b/include/linux/xsched.h index 8809ab22a50c..baf90237dc14 100644 --- a/include/linux/xsched.h +++ b/include/linux/xsched.h @@ -348,6 +348,10 @@ struct xsched_context { struct list_head vstream_list; struct list_head ctx_node; +#ifdef CONFIG_CGROUP_DMEM + struct list_head pool_list; +#endif + struct xsched_entity xse; spinlock_t ctx_lock; @@ -477,6 +481,10 @@ void xsched_quota_refill(struct work_struct *work); #ifdef CONFIG_CGROUP_DMEM /* Dmem interface */ int xsched_dmem_init(void); +int xsched_dmem_alloc(struct xsched_context *ctx, struct vstream_args *args); +#else +static inline int xsched_dmem_alloc( + struct xsched_context *ctx, struct vstream_args *args) { return 0; } #endif /* CONFIG_CGROUP_DMEM */ #endif /* !__LINUX_XSCHED_H__ */ diff --git a/include/uapi/linux/xcu_vstream.h b/include/uapi/linux/xcu_vstream.h index b60c0e0e15f5..d076498e75af 100644 --- a/include/uapi/linux/xcu_vstream.h +++ b/include/uapi/linux/xcu_vstream.h @@ -22,6 +22,7 @@ typedef enum VSTREAM_COMMAND { VSTREAM_ALLOC = 0, VSTREAM_FREE, VSTREAM_KICK, + VSTREAM_ALLOC_HBM, MAX_COMMAND } vstream_command_t; @@ -51,6 +52,11 @@ typedef struct vstream_kick_args { KABI_RESERVE_BYTES(2, 8); } vstream_kick_args_t; +typedef struct vstream_hbm_args { + __u64 size; + __u64 pool_id; +} vstream_hbm_args_t; + typedef struct vstream_args { __u32 channel_id; __u32 fd; @@ -64,6 +70,7 @@ typedef struct vstream_args { vstream_alloc_args_t va_args; vstream_free_args_t vf_args; vstream_kick_args_t vk_args; + vstream_hbm_args_t vh_args; }; __u32 payload_size; diff --git a/kernel/xsched/dmem.c b/kernel/xsched/dmem.c index 27e0c4b1a506..c600e3df2821 100644 --- a/kernel/xsched/dmem.c +++ b/kernel/xsched/dmem.c @@ -59,3 +59,50 @@ int xsched_dmem_init(void) } return retval; } + +int xsched_dmem_alloc(struct xsched_context *ctx, struct vstream_args *args) +{ + struct dmem_cgroup_pool_state *ret_pool, *ret_limit_pool; + struct xsched_dmem_pool *new_pool; + int ret = -EAGAIN; + static uint64_t cur_id; + struct dmem_cgroup_region *hbm_region; + + hbm_region = hbm_regions[args->dev_id]; + if (!hbm_region) { + XSCHED_ERR("Try to charge memory when region is not registered (region HBM%u)\n", + args->dev_id); + goto error_out; + } + + ret = dmem_cgroup_try_charge(hbm_region, args->vh_args.size, &ret_pool, &ret_limit_pool); + if (ret != 0) { + XSCHED_ERR("Fail to charge a new allocation to a HBM region\n"); + goto error_out; + } + + new_pool = kzalloc(sizeof(*new_pool), GFP_KERNEL); + if (!new_pool) { + XSCHED_ERR("Fail to alloc xsched dmem alloc @ %s\n", __func__); + ret = -ENOMEM; + goto error_out; + } + + new_pool->id = cur_id++; + new_pool->pool = ret_pool; + + /* protect list using ctx_lock */ + spin_lock(&ctx->ctx_lock); + list_add_tail(&new_pool->pool_node, &ctx->pool_list); + spin_unlock(&ctx->ctx_lock); + + args->vh_args.pool_id = new_pool->id; + XSCHED_DEBUG("charged %llu bytes, new_alloc = %p with id %llu", + args->vh_args.size, new_pool, new_pool->id); + + return 0; + +error_out: + args->vh_args.pool_id = ULLONG_MAX; + return ret; +} diff --git a/kernel/xsched/vstream.c b/kernel/xsched/vstream.c index d0815e33e081..5d052a33f8e3 100644 --- a/kernel/xsched/vstream.c +++ b/kernel/xsched/vstream.c @@ -158,6 +158,10 @@ static void init_xsched_ctx(struct xsched_context *ctx, INIT_LIST_HEAD(&ctx->vstream_list); INIT_LIST_HEAD(&ctx->ctx_node); +#ifdef CONFIG_CGROUP_DMEM + INIT_LIST_HEAD(&ctx->pool_list); +#endif + spin_lock_init(&ctx->ctx_lock); mutex_init(&ctx->ctx_mutex); } @@ -617,6 +621,24 @@ int vstream_kick(struct vstream_args *arg) return err; } +static int vstream_hbm_alloc(struct vstream_args *arg) +{ + struct xsched_cu *xcu_found; + struct xsched_context *ctx; + + xcu_found = xcu_find(XCU_TYPE_XPU, arg->dev_id, arg->channel_id); + if (!xcu_found) + return -EINVAL; + + ctx = ctx_find_by_tgid_and_xcu(current->tgid, xcu_found); + if (!ctx) { + XSCHED_ERR("Failed to find a context for HBM alloc"); + return -EINVAL; + } + + return xsched_dmem_alloc(ctx, arg); +} + /* * vstream_manage_cmd table */ @@ -624,6 +646,7 @@ static vstream_manage_t(*vstream_command_table[MAX_COMMAND + 1]) = { vstream_alloc, // VSTREAM_ALLOC vstream_free, // VSTREAM_FREE vstream_kick, // VSTREAM_KICK + vstream_hbm_alloc, // VSTREAM_HBM_ALLOC NULL // MAX_COMMAND }; -- 2.34.1
From: Alexander Pavlenko <pavlenko.alexander@huawei.com> hulk inclusion category: feature bugzilla: https://atomgit.com/openeuler/kernel/issues/8422 ---------------------------------------- This commit enforces a strict teardown order when releasing memory associated with an XPU device: the device memory (dmem) region must first be unregistered from the xsched subsystem before the underlying physical memory is deallocated. Signed-off-by: Alexander Pavlenko <pavlenko.alexander@huawei.com> Signed-off-by: Liu Kai <liukai284@huawei.com> --- include/linux/xsched.h | 3 +++ include/uapi/linux/xcu_vstream.h | 1 + kernel/xsched/dmem.c | 27 +++++++++++++++++++++++++++ kernel/xsched/vstream.c | 19 +++++++++++++++++++ 4 files changed, 50 insertions(+) diff --git a/include/linux/xsched.h b/include/linux/xsched.h index baf90237dc14..8cd1295fbefb 100644 --- a/include/linux/xsched.h +++ b/include/linux/xsched.h @@ -482,9 +482,12 @@ void xsched_quota_refill(struct work_struct *work); /* Dmem interface */ int xsched_dmem_init(void); int xsched_dmem_alloc(struct xsched_context *ctx, struct vstream_args *args); +int xsched_dmem_free(struct xsched_context *ctx, struct vstream_args *args); #else static inline int xsched_dmem_alloc( struct xsched_context *ctx, struct vstream_args *args) { return 0; } +static inline int xsched_dmem_free( + struct xsched_context *ctx, struct vstream_args *args) { return 0; } #endif /* CONFIG_CGROUP_DMEM */ #endif /* !__LINUX_XSCHED_H__ */ diff --git a/include/uapi/linux/xcu_vstream.h b/include/uapi/linux/xcu_vstream.h index d076498e75af..14552fae2159 100644 --- a/include/uapi/linux/xcu_vstream.h +++ b/include/uapi/linux/xcu_vstream.h @@ -23,6 +23,7 @@ typedef enum VSTREAM_COMMAND { VSTREAM_FREE, VSTREAM_KICK, VSTREAM_ALLOC_HBM, + VSTREAM_HBM_FREE, MAX_COMMAND } vstream_command_t; diff --git a/kernel/xsched/dmem.c b/kernel/xsched/dmem.c index c600e3df2821..27d55a8aab0a 100644 --- a/kernel/xsched/dmem.c +++ b/kernel/xsched/dmem.c @@ -106,3 +106,30 @@ int xsched_dmem_alloc(struct xsched_context *ctx, struct vstream_args *args) args->vh_args.pool_id = ULLONG_MAX; return ret; } + +int xsched_dmem_free(struct xsched_context *ctx, struct vstream_args *args) +{ + struct xsched_dmem_pool *pool, *target = NULL; + + spin_lock(&ctx->ctx_lock); + list_for_each_entry(pool, &ctx->pool_list, pool_node) { + if (pool->id == args->vh_args.pool_id) { + list_del(&pool->pool_node); + target = pool; + break; + } + } + spin_unlock(&ctx->ctx_lock); + + if (!target) { + XSCHED_ERR("pool with id %llu is not found\n", args->vh_args.pool_id); + return -EINVAL; + } + + XSCHED_DEBUG("uncharged %llu bytes for pool = %p with id %llu\n", + args->vh_args.size, target, target->id); + dmem_cgroup_uncharge(target->pool, args->vh_args.size); + kfree(target); + + return 0; +} diff --git a/kernel/xsched/vstream.c b/kernel/xsched/vstream.c index 5d052a33f8e3..7b769a2e2545 100644 --- a/kernel/xsched/vstream.c +++ b/kernel/xsched/vstream.c @@ -639,6 +639,24 @@ static int vstream_hbm_alloc(struct vstream_args *arg) return xsched_dmem_alloc(ctx, arg); } +static int vstream_hbm_free(struct vstream_args *arg) +{ + struct xsched_cu *xcu_found; + struct xsched_context *ctx; + + xcu_found = xcu_find(XCU_TYPE_XPU, arg->dev_id, arg->channel_id); + if (!xcu_found) + return -EINVAL; + + ctx = ctx_find_by_tgid_and_xcu(current->tgid, xcu_found); + if (!ctx) { + XSCHED_ERR("Failed to find a context for HBM free"); + return -EINVAL; + } + + return xsched_dmem_free(ctx, arg); +} + /* * vstream_manage_cmd table */ @@ -647,6 +665,7 @@ static vstream_manage_t(*vstream_command_table[MAX_COMMAND + 1]) = { vstream_free, // VSTREAM_FREE vstream_kick, // VSTREAM_KICK vstream_hbm_alloc, // VSTREAM_HBM_ALLOC + vstream_hbm_free, // VSTREAM_HBM_FREE NULL // MAX_COMMAND }; -- 2.34.1
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://atomgit.com/openeuler/kernel/merge_requests/21005 邮件列表地址:https://mailweb.openeuler.org/archives/list/kernel@openeuler.org/message/VNG... FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://atomgit.com/openeuler/kernel/merge_requests/21005 Mailing list address: https://mailweb.openeuler.org/archives/list/kernel@openeuler.org/message/VNG...
participants (2)
-
Liu Kai -
patchwork bot