
From: Alekseev Dmitry <alekseev.dmitry@huawei.com> hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IC5EHB ----------------------------------------- Add support for CFS quota for cgroups. Signed-off-by: Alekseev Dmitry <alekseev.dmitry@huawei.com> Signed-off-by: Hui Tang <tanghui20@.huawei.com> Signed-off-by: Liu Kai <liukai284@huawei.com> Signed-off-by: Xia Fukun <xiafukun@huawei.com> --- include/linux/xsched.h | 11 +++- include/uapi/linux/xcu_vstream.h | 1 + kernel/xsched/Makefile | 2 +- kernel/xsched/cfs.c | 1 + kernel/xsched/cfs_quota.c | 95 ++++++++++++++++++++++++++++++++ kernel/xsched/cgroup.c | 62 ++++++++++++++++++++- kernel/xsched/core.c | 23 ++++---- 7 files changed, 181 insertions(+), 14 deletions(-) create mode 100644 kernel/xsched/cfs_quota.c diff --git a/include/linux/xsched.h b/include/linux/xsched.h index e59e4fe5e4b4..5ffaffc5afdb 100644 --- a/include/linux/xsched.h +++ b/include/linux/xsched.h @@ -38,7 +38,8 @@ #define RUNTIME_INF ((u64)~0ULL) #define XSCHED_TIME_INF RUNTIME_INF #define XSCHED_CFS_ENTITY_WEIGHT_DFLT 1 -#define XSCHED_CFS_MIN_TIMESLICE (10*NSEC_PER_MSEC) +#define XSCHED_CFS_MIN_TIMESLICE (10 * NSEC_PER_MSEC) +#define XSCHED_CFS_QUOTA_PERIOD_MS (100 * NSEC_PER_MSEC) #define XSCHED_CFG_SHARE_DFLT 1024 #define __GET_VS_TASK_TYPE(t) ((t)&0xFF) @@ -590,6 +591,7 @@ static inline void xsched_init_vsm(struct vstream_metadata *vsm, struct vstream_info *vs, vstream_args_t *arg) { vsm->sq_id = arg->sq_id; + vsm->exec_time = arg->vk_args.exec_time; vsm->sqe_num = arg->vk_args.sqe_num; vsm->timeout = arg->vk_args.timeout; memcpy(vsm->sqe, arg->vk_args.sqe, XCU_SQE_SIZE_MAX); @@ -615,6 +617,13 @@ int xsched_group_inherit(struct task_struct *tsk, struct xsched_entity *xse); void xcu_cg_init_common(struct xsched_group *xcg); void xcu_grp_shares_update(struct xsched_group *xg); void xsched_group_xse_detach(struct xsched_entity *xse); + +void xsched_quota_init(void); +void xsched_quota_timeout_init(struct xsched_group *xg); +void xsched_quota_timeout_update(struct xsched_group *xg); +void xsched_quota_account(struct xsched_group *xg, s64 exec_time); +bool xsched_quota_exceed(struct xsched_group *xg); +void xsched_quota_refill(struct work_struct *work); void enqueue_ctx(struct xsched_entity *xse, struct xsched_cu *xcu); void dequeue_ctx(struct xsched_entity *xse, struct xsched_cu *xcu); #endif /* __LINUX_XSCHED_H__ */ diff --git a/include/uapi/linux/xcu_vstream.h b/include/uapi/linux/xcu_vstream.h index 32c71dce5ad1..46d5a32db68e 100644 --- a/include/uapi/linux/xcu_vstream.h +++ b/include/uapi/linux/xcu_vstream.h @@ -28,6 +28,7 @@ typedef struct vstream_free_args { } vstream_free_args_t; typedef struct vstream_kick_args { __u32 sqe_num; + __u32 exec_time; __s32 timeout; __s8 sqe[XCU_SQE_SIZE_MAX]; } vstream_kick_args_t; diff --git a/kernel/xsched/Makefile b/kernel/xsched/Makefile index c4c06b6038ff..8ab32b086b3d 100644 --- a/kernel/xsched/Makefile +++ b/kernel/xsched/Makefile @@ -1,4 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 obj-y += vstream.o -obj-$(CONFIG_XCU_SCHEDULER) += core.o rt.o cfs.o +obj-$(CONFIG_XCU_SCHEDULER) += core.o rt.o cfs.o cfs_quota.o obj-$(CONFIG_CGROUP_XCU) += cgroup.o diff --git a/kernel/xsched/cfs.c b/kernel/xsched/cfs.c index 1313c7e73a11..94189d8088ac 100644 --- a/kernel/xsched/cfs.c +++ b/kernel/xsched/cfs.c @@ -209,6 +209,7 @@ static void put_prev_ctx_fair(struct xsched_entity *xse) { struct xsched_entity_cfs *prev = &xse->cfs; + xsched_quota_account(xse->parent_grp, (s64)xse->last_exec_runtime); xs_update(prev, xse->last_exec_runtime); } diff --git a/kernel/xsched/cfs_quota.c b/kernel/xsched/cfs_quota.c new file mode 100644 index 000000000000..a62f07ad3cdc --- /dev/null +++ b/kernel/xsched/cfs_quota.c @@ -0,0 +1,95 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Bandwidth provisioning for XPU device + * + * Copyright (C) 2025-2026 Huawei Technologies Co., Ltd + * + * Author: Konstantin Meskhidze <konstantin.meskhidze@huawei.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ +#include <linux/timer.h> +#include <linux/xsched.h> + +static struct workqueue_struct *quota_workqueue; + +void xsched_quota_refill(struct work_struct *work) +{ + uint32_t id; + struct xsched_cu *xcu; + struct xsched_group *xg; + + xg = container_of(work, struct xsched_group, refill_work); + + spin_lock(&xg->lock); + xg->runtime = max((xg->runtime - xg->quota), (s64)0); + hrtimer_start(&xg->quota_timeout, ns_to_ktime(xg->period), HRTIMER_MODE_REL_SOFT); + spin_unlock(&xg->lock); + + for_each_active_xcu(xcu, id) { + xcu = xsched_cu_mgr[id]; + mutex_lock(&xcu->xcu_lock); + if (!READ_ONCE(xg->perxcu_priv[id].xse.on_rq)) { + enqueue_ctx(&xg->perxcu_priv[id].xse, xcu); + wake_up_interruptible(&xcu->wq_xcu_idle); + } + mutex_unlock(&xcu->xcu_lock); + } +} + +static enum hrtimer_restart quota_timer_cb(struct hrtimer *hrtimer) +{ + struct xsched_group *xg; + + xg = container_of(hrtimer, struct xsched_group, quota_timeout); + queue_work(quota_workqueue, &xg->refill_work); + + return HRTIMER_NORESTART; +} + +void xsched_quota_account(struct xsched_group *xg, s64 exec_time) +{ + spin_lock(&xg->lock); + xg->runtime += exec_time; + spin_unlock(&xg->lock); +} + +bool xsched_quota_exceed(struct xsched_group *xg) +{ + bool ret; + + spin_lock(&xg->lock); + ret = (xg->quota > 0) ? (xg->runtime >= xg->quota) : false; + spin_unlock(&xg->lock); + + return ret; +} + +void xsched_quota_init(void) +{ + quota_workqueue = create_singlethread_workqueue("xsched_quota_workqueue"); +} + +void xsched_quota_timeout_init(struct xsched_group *xg) +{ + hrtimer_init(&xg->quota_timeout, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT); + xg->quota_timeout.function = quota_timer_cb; +} + +void xsched_quota_timeout_update(struct xsched_group *xg) +{ + struct hrtimer *t = &xg->quota_timeout; + + hrtimer_cancel(t); + + if (xg->quota > 0 && xg->period > 0) + hrtimer_start(t, ns_to_ktime(xg->period), HRTIMER_MODE_REL_SOFT); +} diff --git a/kernel/xsched/cgroup.c b/kernel/xsched/cgroup.c index 8ae17069e031..aa675a013927 100644 --- a/kernel/xsched/cgroup.c +++ b/kernel/xsched/cgroup.c @@ -47,6 +47,8 @@ void xcu_cg_init_common(struct xsched_group *xcg) spin_lock_init(&xcg->lock); INIT_LIST_HEAD(&xcg->members); INIT_LIST_HEAD(&xcg->children_groups); + xsched_quota_timeout_init(xcg); + INIT_WORK(&xcg->refill_work, xsched_quota_refill); } static void xcu_cfs_root_cg_init(void) @@ -62,6 +64,10 @@ static void xcu_cfs_root_cg_init(void) } root_xcg->sched_type = XSCHED_TYPE_DFLT; + root_xcg->period = XSCHED_CFS_QUOTA_PERIOD_MS; + root_xcg->quota = XSCHED_TIME_INF; + root_xcg->runtime = 0; + xsched_quota_init(); } /** @@ -115,6 +121,9 @@ static int xcu_cfs_cg_init(struct xsched_group *xcg, xcg->shares_cfg = XSCHED_CFG_SHARE_DFLT; xcu_grp_shares_update(parent_xg); + xcg->period = XSCHED_CFS_QUOTA_PERIOD_MS; + xcg->quota = XSCHED_TIME_INF; + xcg->runtime = 0; return 0; @@ -223,6 +232,8 @@ static void xcu_css_free(struct cgroup_subsys_state *css) break; } } + hrtimer_cancel(&xcg->quota_timeout); + cancel_work_sync(&xcg->refill_work); list_del(&xcg->group_node); mutex_unlock(&xcg_mutex); @@ -460,6 +471,12 @@ static s64 xcu_read_s64(struct cgroup_subsys_state *css, struct cftype *cft) spin_lock(&xcucg->lock); switch (cft->private) { + case XCU_FILE_PERIOD_MS: + ret = xcucg->period / NSEC_PER_MSEC; + break; + case XCU_FILE_QUOTA_MS: + ret = (xcucg->quota > 0) ? xcucg->quota / NSEC_PER_MSEC : xcucg->quota; + break; case XCU_FILE_SHARES: ret = xcucg->shares_cfg; break; @@ -530,11 +547,37 @@ static int xcu_write_s64(struct cgroup_subsys_state *css, struct cftype *cft, { int ret = 0; struct xsched_group *xcucg = xcu_cg_from_css(css); + s64 quota_ns; + + if (xcucg->sched_type != XSCHED_TYPE_CFS) + return -EINVAL; spin_lock(&xcucg->lock); switch (cft->private) { + case XCU_FILE_PERIOD_MS: + if (val < 1 || val > (S64_MAX / NSEC_PER_MSEC)) { + ret = -EINVAL; + break; + } + xcucg->period = val * NSEC_PER_MSEC; + xsched_quota_timeout_update(xcucg); + break; + case XCU_FILE_QUOTA_MS: + if (val < -1 || val > (S64_MAX / NSEC_PER_MSEC)) { + ret = -EINVAL; + break; + } + /* Runtime should be updated when modifying quota_ms configuration */ + quota_ns = (val > 0) ? val * NSEC_PER_MSEC : val; + if (xcucg->quota > 0 && quota_ns > 0) + xcucg->runtime = max((xcucg->runtime - quota_ns), (s64)0); + else + xcucg->runtime = 0; + xcucg->quota = quota_ns; + xsched_quota_timeout_update(xcucg); + break; case XCU_FILE_SHARES: - if (val <= 0) { + if (val <= 0 || val > U64_MAX) { ret = -EINVAL; break; } @@ -577,11 +620,28 @@ static int xcu_stat(struct seq_file *sf, void *v) seq_printf(sf, "exec_runtime: %llu\n", exec_runtime); seq_printf(sf, "shares cfg: %llu/%llu x%u\n", xcucg->shares_cfg, xcucg->parent->children_shares_sum, xcucg->weight); + seq_printf(sf, "quota: %lld\n", xcucg->quota); + seq_printf(sf, "used: %lld\n", xcucg->runtime); + seq_printf(sf, "period: %lld\n", xcucg->period); return 0; } static struct cftype xcu_cg_files[] = { + { + .name = "period_ms", + .flags = CFTYPE_NOT_ON_ROOT, + .read_s64 = xcu_read_s64, + .write_s64 = xcu_write_s64, + .private = XCU_FILE_PERIOD_MS, + }, + { + .name = "quota_ms", + .flags = CFTYPE_NOT_ON_ROOT, + .read_s64 = xcu_read_s64, + .write_s64 = xcu_write_s64, + .private = XCU_FILE_QUOTA_MS, + }, { .name = "shares", .flags = CFTYPE_NOT_ON_ROOT, diff --git a/kernel/xsched/core.c b/kernel/xsched/core.c index 64f2cbafb8cd..78808f6ae561 100644 --- a/kernel/xsched/core.c +++ b/kernel/xsched/core.c @@ -46,7 +46,6 @@ static void put_prev_ctx(struct xsched_entity *xse) struct xsched_cu *xcu = xse->xcu; lockdep_assert_held(&xcu->xcu_lock); - xse->class->put_prev_ctx(xse); xse->last_exec_runtime = 0; atomic_set(&xse->submitted_one_kick, 0); @@ -505,16 +504,18 @@ static int xsched_schedule(void *input_xcu) continue; curr_xse = xcu->xrq.curr_xse; - if (curr_xse) { /* if not deleted yet */ - put_prev_ctx(curr_xse); - if (!atomic_read(&curr_xse->kicks_pending_ctx_cnt)) { - dequeue_ctx(curr_xse, xcu); - XSCHED_DEBUG( - "%s: Dequeue xse %d due to zero kicks on xcu %u\n", - __func__, curr_xse->tgid, xcu->id); - curr_xse = xcu->xrq.curr_xse = NULL; - } - } + if (!curr_xse) + continue; + + /* if not deleted yet */ + put_prev_ctx(curr_xse); + if (!atomic_read(&curr_xse->kicks_pending_ctx_cnt)) + dequeue_ctx(curr_xse, xcu); + + if (xsched_quota_exceed(curr_xse->parent_grp)) + dequeue_ctx(&curr_xse->parent_grp->perxcu_priv[xcu->id].xse, xcu); + + xcu->xrq.curr_xse = NULL; } return err; -- 2.34.1