
From: Alekseev Dmitry <alekseev.dmitry@huawei.com> hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IC5EHB ----------------------------------------- Add support for CFS quota for cgroups. Signed-off-by: Alekseev Dmitry <alekseev.dmitry@huawei.com> Signed-off-by: Hui Tang <tanghui20@.huawei.com> --- include/linux/xsched.h | 10 ++++ include/uapi/linux/xcu_vstream.h | 1 + kernel/xsched/Makefile | 2 +- kernel/xsched/cfs_quota.c | 96 ++++++++++++++++++++++++++++++++ kernel/xsched/cgroup.c | 52 ++++++++++++++++- kernel/xsched/core.c | 13 ++++- 6 files changed, 171 insertions(+), 3 deletions(-) create mode 100644 kernel/xsched/cfs_quota.c diff --git a/include/linux/xsched.h b/include/linux/xsched.h index 51be002b1970..5de53d9c231b 100644 --- a/include/linux/xsched.h +++ b/include/linux/xsched.h @@ -59,6 +59,8 @@ #define XSCHED_TIME_INF RUNTIME_INF #define XSCHED_CFS_ENTITY_WEIGHT_DFLT 1 #define XSCHED_CFS_MIN_TIMESLICE (10 * NSEC_PER_MSEC) +#define XSCHED_CFS_PERIOD (1 * NSEC_PER_MSEC) +#define XSCHED_CFS_QUOTA_PERIOD_MS (100 * NSEC_PER_MSEC) #define XSCHED_CFG_SHARE_DFLT 1024 #define __GET_VS_TASK_TYPE(t) ((t)&0xFF) @@ -621,6 +623,7 @@ static inline void xsched_init_vsm(struct vstream_metadata *vsm, vstream_args_t *arg) { vsm->sq_id = arg->sq_id; + vsm->exec_time = arg->vk_args.exec_time; vsm->sqe_num = arg->vk_args.sqe_num; vsm->timeout = arg->vk_args.timeout; memcpy(vsm->sqe, arg->vk_args.sqe, XCU_SQE_SIZE_MAX); @@ -645,5 +648,12 @@ int xsched_group_inherit(struct task_struct *tsk, struct xsched_entity *xse); void xcu_cg_init_common(struct xsched_group *xcg); void xcu_grp_shares_update(struct xsched_group *xg); void xsched_group_xse_detach(struct xsched_entity *xse); + +void xsched_quotas_init(void); +void xsched_quota_timeout_init(struct xsched_group *xg); +void xsched_quota_timeout_update(struct xsched_group *xg); +void xsched_quota_account(struct xsched_group *xg, s64 exec_time); +bool xsched_quota_exceed(struct xsched_group *xg); +void xsched_quota_refill(struct work_struct *work); void enqueue_ctx(struct xsched_entity *xse, struct xsched_cu *xcu); #endif /* !__LINUX_XSCHED_H__ */ diff --git a/include/uapi/linux/xcu_vstream.h b/include/uapi/linux/xcu_vstream.h index 095d203ad422..1024f5af5c2d 100644 --- a/include/uapi/linux/xcu_vstream.h +++ b/include/uapi/linux/xcu_vstream.h @@ -28,6 +28,7 @@ typedef struct vstream_free_args { } vstream_free_args_t; typedef struct vstream_kick_args { __u32 sqe_num; + __u32 exec_time; __s32 timeout; __s8 sqe[XCU_SQE_SIZE_MAX]; } vstream_kick_args_t; diff --git a/kernel/xsched/Makefile b/kernel/xsched/Makefile index c4c06b6038ff..8ab32b086b3d 100644 --- a/kernel/xsched/Makefile +++ b/kernel/xsched/Makefile @@ -1,4 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 obj-y += vstream.o -obj-$(CONFIG_XCU_SCHEDULER) += core.o rt.o cfs.o +obj-$(CONFIG_XCU_SCHEDULER) += core.o rt.o cfs.o cfs_quota.o obj-$(CONFIG_CGROUP_XCU) += cgroup.o diff --git a/kernel/xsched/cfs_quota.c b/kernel/xsched/cfs_quota.c new file mode 100644 index 000000000000..6de1e78a7bef --- /dev/null +++ b/kernel/xsched/cfs_quota.c @@ -0,0 +1,96 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Bandwidth provisioning for XPU device + * + * Copyright (C) 2025-2026 Huawei Technologies Co., Ltd + * + * Author: Konstantin Meskhidze <konstantin.meskhidze@huawei.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ +#include <linux/timer.h> +#include <linux/xsched.h> + +static struct workqueue_struct *quota_workqueue; + +void xsched_quota_refill(struct work_struct *work) +{ + uint32_t id; + struct xsched_cu *xcu; + struct xsched_group *xg; + + xg = container_of(work, struct xsched_group, refill_work); + + spin_lock(&xg->lock); + xg->rt_exec = max((xg->rt_exec - xg->quota), 0LL); + hrtimer_start(&xg->quota_timeout, ns_to_ktime(xg->period), HRTIMER_MODE_REL_SOFT); + spin_unlock(&xg->lock); + + for_each_active_xcu(xcu, id) { + xcu = xsched_cu_mgr[id]; + mutex_lock(&xcu->xcu_lock); + if (!READ_ONCE(xg->perxcu_priv[id].xse.on_rq)) { + enqueue_ctx(&xg->perxcu_priv[id].xse, xcu); + XSCHED_INFO("Enqueue xg with quota refilled on xcu %u @ %s\n", + xcu->id, __func__); + wake_up_interruptible(&xcu->wq_xcu_idle); + } + mutex_unlock(&xcu->xcu_lock); + } +} + +static enum hrtimer_restart quota_timer_cb(struct hrtimer *hrtimer) +{ + struct xsched_group *xg; + + xg = container_of(hrtimer, struct xsched_group, quota_timeout); + queue_work(quota_workqueue, &xg->refill_work); + + return HRTIMER_NORESTART; +} + +void xsched_quota_account(struct xsched_group *xg, s64 exec_time) +{ + spin_lock(&xg->lock); + xg->rt_exec += exec_time; + spin_unlock(&xg->lock); +} + +bool xsched_quota_exceed(struct xsched_group *xg) +{ + bool ret; + + spin_lock(&xg->lock); + ret = (xg->quota > 0) ? (xg->rt_exec >= xg->quota) : false; + spin_unlock(&xg->lock); + + return ret; +} + +void xsched_quotas_init(void) +{ + quota_workqueue = create_singlethread_workqueue("xsched_quota_workqueue"); +} + +void xsched_quota_timeout_init(struct xsched_group *xg) +{ + hrtimer_init(&xg->quota_timeout, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT); + xg->quota_timeout.function = quota_timer_cb; +} + +void xsched_quota_timeout_update(struct xsched_group *xg) +{ + struct hrtimer *t = &xg->quota_timeout; + + hrtimer_cancel(t); + if (xg->quota > 0 && xg->period > 0) + hrtimer_start(t, ns_to_ktime(xg->period), HRTIMER_MODE_REL_SOFT); +} diff --git a/kernel/xsched/cgroup.c b/kernel/xsched/cgroup.c index 758b8b9c183d..74a682903ede 100644 --- a/kernel/xsched/cgroup.c +++ b/kernel/xsched/cgroup.c @@ -47,6 +47,8 @@ void xcu_cg_init_common(struct xsched_group *xcg) spin_lock_init(&xcg->lock); INIT_LIST_HEAD(&xcg->members); INIT_LIST_HEAD(&xcg->children_groups); + xsched_quota_timeout_init(xcg); + INIT_WORK(&xcg->refill_work, xsched_quota_refill); } static void xcu_cfs_root_cg_init(void) @@ -62,6 +64,9 @@ static void xcu_cfs_root_cg_init(void) } root_xcg->sched_type = XSCHED_TYPE_DFLT; + root_xcg->period = XSCHED_CFS_QUOTA_PERIOD_MS; + root_xcg->quota = XSCHED_TIME_INF; + xsched_quotas_init(); } /** @@ -110,6 +115,9 @@ static void xcu_cfs_cg_init(struct xsched_group *xcg, xcg->shares_cfg = XSCHED_CFG_SHARE_DFLT; xcu_grp_shares_update(parent_xg); + + xcg->period = XSCHED_CFS_QUOTA_PERIOD_MS; + xcg->quota = XSCHED_TIME_INF; } static void xcu_cfs_cg_deinit(struct xsched_group *xcg) @@ -206,6 +214,8 @@ static void xcu_css_free(struct cgroup_subsys_state *css) break; } } + hrtimer_cancel(&xcg->quota_timeout); + cancel_work_sync(&xcg->refill_work); list_del(&xcg->group_node); mutex_unlock(&xcg_mutex); @@ -445,6 +455,13 @@ static s64 xcu_read_s64(struct cgroup_subsys_state *css, struct cftype *cft) spin_lock(&xcucg->lock); switch (cft->private) { + case XCU_FILE_PERIOD_MS: + ret = xcucg->period / NSEC_PER_MSEC; + break; + case XCU_FILE_QUOTA_MS: + ret = (xcucg->quota > 0) ? xcucg->quota / NSEC_PER_MSEC : + xcucg->quota; + break; case XCU_FILE_SHARES: ret = xcucg->shares_cfg; break; @@ -521,8 +538,24 @@ static int xcu_write_s64(struct cgroup_subsys_state *css, struct cftype *cft, spin_lock(&xcucg->lock); switch (cft->private) { + case XCU_FILE_PERIOD_MS: + if (val < 1 || val > (S64_MAX / NSEC_PER_MSEC)) { + ret = -EINVAL; + break; + } + xcucg->period = val * NSEC_PER_MSEC; + xsched_quota_timeout_update(xcucg); + break; + case XCU_FILE_QUOTA_MS: + if (val < -1 || val > (S64_MAX / NSEC_PER_MSEC)) { + ret = -EINVAL; + break; + } + xcucg->quota = (val > 0) ? val * NSEC_PER_MSEC : val; + xsched_quota_timeout_update(xcucg); + break; case XCU_FILE_SHARES: - if (val <= 0) { + if (val <= 0 || val > U64_MAX) { ret = -EINVAL; break; } @@ -565,11 +598,28 @@ static int xcu_stat(struct seq_file *sf, void *v) seq_printf(sf, "exec_runtime: %llu\n", exec_runtime); seq_printf(sf, "shares cfg: %llu/%llu x%u\n", xcucg->shares_cfg, xcucg->parent->children_shares_sum, xcucg->weight); + seq_printf(sf, "quota: %lld\n", xcucg->quota); + seq_printf(sf, "used: %lld\n", xcucg->rt_exec); + seq_printf(sf, "period: %lld\n", xcucg->period); return 0; } static struct cftype xcu_cg_files[] = { + { + .name = "period_ms", + .flags = CFTYPE_NOT_ON_ROOT, + .read_s64 = xcu_read_s64, + .write_s64 = xcu_write_s64, + .private = XCU_FILE_PERIOD_MS, + }, + { + .name = "quota_ms", + .flags = CFTYPE_NOT_ON_ROOT, + .read_s64 = xcu_read_s64, + .write_s64 = xcu_write_s64, + .private = XCU_FILE_QUOTA_MS, + }, { .name = "shares", .flags = CFTYPE_NOT_ON_ROOT, diff --git a/kernel/xsched/core.c b/kernel/xsched/core.c index b28a11909fd6..98f4965d97e5 100644 --- a/kernel/xsched/core.c +++ b/kernel/xsched/core.c @@ -48,6 +48,7 @@ static void put_prev_ctx(struct xsched_entity *xse) lockdep_assert_held(&xcu->xcu_lock); + xsched_quota_account(xse->parent_grp, xse->last_process_time); xse->class->put_prev_ctx(xse); XSCHED_INFO("Put current xse %d sum_exec_runtime %llu @ %s\n", xse->tgid, xse->cfs.sum_exec_runtime, __func__); @@ -617,7 +618,17 @@ static int xsched_schedule(void *input_xcu) XSCHED_INFO( "%s: Dequeue xse %d due to zero kicks on xcu %u\n", __func__, curr_xse->tgid, xcu->id); - curr_xse = xcu->xrq.curr_xse = NULL; + xcu->xrq.curr_xse = NULL; + } + if (xsched_quota_exceed(curr_xse->parent_grp)) { + dequeue_ctx(&curr_xse->parent_grp + ->perxcu_priv[xcu->id] + .xse, + xcu); + XSCHED_INFO( + "%s: Dequeue group of xse %d due to quota exceed on xcu %u\n", + __func__, curr_xse->tgid, xcu->id); + xcu->xrq.curr_xse = NULL; } } } -- 2.34.1