[PATCH openEuler-25.03 16/18] xsched: Add support for CFS quota for cgroups

1 Jul 2025

From: Alekseev Dmitry <alekseev.dmitry@huawei.com>

hulk inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/IC5EHB

-----------------------------------------

Add support for CFS quota for cgroups.

Signed-off-by: Alekseev Dmitry <alekseev.dmitry@huawei.com>
Signed-off-by: Hui Tang <tanghui20@.huawei.com>
---
 include/linux/xsched.h           | 10 ++++
 include/uapi/linux/xcu_vstream.h |  1 +
 kernel/xsched/Makefile           |  2 +-
 kernel/xsched/cfs_quota.c        | 96 ++++++++++++++++++++++++++++++++
 kernel/xsched/cgroup.c           | 52 ++++++++++++++++-
 kernel/xsched/core.c             | 13 ++++-
 6 files changed, 171 insertions(+), 3 deletions(-)
 create mode 100644 kernel/xsched/cfs_quota.c

diff --git a/include/linux/xsched.h b/include/linux/xsched.h
index 51be002b1970..5de53d9c231b 100644
--- a/include/linux/xsched.h
+++ b/include/linux/xsched.h
@@ -59,6 +59,8 @@
 #define XSCHED_TIME_INF RUNTIME_INF
 #define XSCHED_CFS_ENTITY_WEIGHT_DFLT 1
 #define XSCHED_CFS_MIN_TIMESLICE (10 * NSEC_PER_MSEC)
+#define XSCHED_CFS_PERIOD (1 * NSEC_PER_MSEC)
+#define XSCHED_CFS_QUOTA_PERIOD_MS (100 * NSEC_PER_MSEC)
 #define XSCHED_CFG_SHARE_DFLT 1024
 
 #define __GET_VS_TASK_TYPE(t) ((t)&0xFF)
@@ -621,6 +623,7 @@ static inline void xsched_init_vsm(struct vstream_metadata *vsm,
 				   vstream_args_t *arg)
 {
 	vsm->sq_id = arg->sq_id;
+	vsm->exec_time = arg->vk_args.exec_time;
 	vsm->sqe_num = arg->vk_args.sqe_num;
 	vsm->timeout = arg->vk_args.timeout;
 	memcpy(vsm->sqe, arg->vk_args.sqe, XCU_SQE_SIZE_MAX);
@@ -645,5 +648,12 @@ int xsched_group_inherit(struct task_struct *tsk, struct xsched_entity *xse);
 void xcu_cg_init_common(struct xsched_group *xcg);
 void xcu_grp_shares_update(struct xsched_group *xg);
 void xsched_group_xse_detach(struct xsched_entity *xse);
+
+void xsched_quotas_init(void);
+void xsched_quota_timeout_init(struct xsched_group *xg);
+void xsched_quota_timeout_update(struct xsched_group *xg);
+void xsched_quota_account(struct xsched_group *xg, s64 exec_time);
+bool xsched_quota_exceed(struct xsched_group *xg);
+void xsched_quota_refill(struct work_struct *work);
 void enqueue_ctx(struct xsched_entity *xse, struct xsched_cu *xcu);
 #endif /* !__LINUX_XSCHED_H__ */
diff --git a/include/uapi/linux/xcu_vstream.h b/include/uapi/linux/xcu_vstream.h
index 095d203ad422..1024f5af5c2d 100644
--- a/include/uapi/linux/xcu_vstream.h
+++ b/include/uapi/linux/xcu_vstream.h
@@ -28,6 +28,7 @@ typedef struct vstream_free_args { } vstream_free_args_t;
 
 typedef struct vstream_kick_args {
 	__u32 sqe_num;
+	__u32 exec_time;
 	__s32 timeout;
 	__s8 sqe[XCU_SQE_SIZE_MAX];
 } vstream_kick_args_t;
diff --git a/kernel/xsched/Makefile b/kernel/xsched/Makefile
index c4c06b6038ff..8ab32b086b3d 100644
--- a/kernel/xsched/Makefile
+++ b/kernel/xsched/Makefile
@@ -1,4 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0
 obj-y += vstream.o
-obj-$(CONFIG_XCU_SCHEDULER) += core.o rt.o cfs.o
+obj-$(CONFIG_XCU_SCHEDULER) += core.o rt.o cfs.o cfs_quota.o
 obj-$(CONFIG_CGROUP_XCU) += cgroup.o
diff --git a/kernel/xsched/cfs_quota.c b/kernel/xsched/cfs_quota.c
new file mode 100644
index 000000000000..6de1e78a7bef
--- /dev/null
+++ b/kernel/xsched/cfs_quota.c
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Bandwidth provisioning for XPU device
+ *
+ * Copyright (C) 2025-2026 Huawei Technologies Co., Ltd
+ *
+ * Author: Konstantin Meskhidze <konstantin.meskhidze@huawei.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+#include <linux/timer.h>
+#include <linux/xsched.h>
+
+static struct workqueue_struct *quota_workqueue;
+
+void xsched_quota_refill(struct work_struct *work)
+{
+	uint32_t id;
+	struct xsched_cu *xcu;
+	struct xsched_group *xg;
+
+	xg = container_of(work, struct xsched_group, refill_work);
+
+	spin_lock(&xg->lock);
+	xg->rt_exec = max((xg->rt_exec - xg->quota), 0LL);
+	hrtimer_start(&xg->quota_timeout, ns_to_ktime(xg->period), HRTIMER_MODE_REL_SOFT);
+	spin_unlock(&xg->lock);
+
+	for_each_active_xcu(xcu, id) {
+		xcu = xsched_cu_mgr[id];
+		mutex_lock(&xcu->xcu_lock);
+		if (!READ_ONCE(xg->perxcu_priv[id].xse.on_rq)) {
+			enqueue_ctx(&xg->perxcu_priv[id].xse, xcu);
+			XSCHED_INFO("Enqueue xg with quota refilled on xcu %u @ %s\n",
+						xcu->id, __func__);
+			wake_up_interruptible(&xcu->wq_xcu_idle);
+		}
+		mutex_unlock(&xcu->xcu_lock);
+	}
+}
+
+static enum hrtimer_restart quota_timer_cb(struct hrtimer *hrtimer)
+{
+	struct xsched_group *xg;
+
+	xg = container_of(hrtimer, struct xsched_group, quota_timeout);
+	queue_work(quota_workqueue, &xg->refill_work);
+
+	return HRTIMER_NORESTART;
+}
+
+void xsched_quota_account(struct xsched_group *xg, s64 exec_time)
+{
+	spin_lock(&xg->lock);
+	xg->rt_exec += exec_time;
+	spin_unlock(&xg->lock);
+}
+
+bool xsched_quota_exceed(struct xsched_group *xg)
+{
+	bool ret;
+
+	spin_lock(&xg->lock);
+	ret = (xg->quota > 0) ? (xg->rt_exec >= xg->quota) : false;
+	spin_unlock(&xg->lock);
+
+	return ret;
+}
+
+void xsched_quotas_init(void)
+{
+	quota_workqueue = create_singlethread_workqueue("xsched_quota_workqueue");
+}
+
+void xsched_quota_timeout_init(struct xsched_group *xg)
+{
+	hrtimer_init(&xg->quota_timeout, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT);
+	xg->quota_timeout.function = quota_timer_cb;
+}
+
+void xsched_quota_timeout_update(struct xsched_group *xg)
+{
+	struct hrtimer *t = &xg->quota_timeout;
+
+	hrtimer_cancel(t);
+	if (xg->quota > 0 && xg->period > 0)
+		hrtimer_start(t, ns_to_ktime(xg->period), HRTIMER_MODE_REL_SOFT);
+}
diff --git a/kernel/xsched/cgroup.c b/kernel/xsched/cgroup.c
index 758b8b9c183d..74a682903ede 100644
--- a/kernel/xsched/cgroup.c
+++ b/kernel/xsched/cgroup.c
@@ -47,6 +47,8 @@ void xcu_cg_init_common(struct xsched_group *xcg)
 	spin_lock_init(&xcg->lock);
 	INIT_LIST_HEAD(&xcg->members);
 	INIT_LIST_HEAD(&xcg->children_groups);
+	xsched_quota_timeout_init(xcg);
+	INIT_WORK(&xcg->refill_work, xsched_quota_refill);
 }
 
 static void xcu_cfs_root_cg_init(void)
@@ -62,6 +64,9 @@ static void xcu_cfs_root_cg_init(void)
 	}
 
 	root_xcg->sched_type = XSCHED_TYPE_DFLT;
+	root_xcg->period = XSCHED_CFS_QUOTA_PERIOD_MS;
+	root_xcg->quota = XSCHED_TIME_INF;
+	xsched_quotas_init();
 }
 
 /**
@@ -110,6 +115,9 @@ static void xcu_cfs_cg_init(struct xsched_group *xcg,
 
 	xcg->shares_cfg = XSCHED_CFG_SHARE_DFLT;
 	xcu_grp_shares_update(parent_xg);
+
+	xcg->period = XSCHED_CFS_QUOTA_PERIOD_MS;
+	xcg->quota = XSCHED_TIME_INF;
 }
 
 static void xcu_cfs_cg_deinit(struct xsched_group *xcg)
@@ -206,6 +214,8 @@ static void xcu_css_free(struct cgroup_subsys_state *css)
 			break;
 		}
 	}
+	hrtimer_cancel(&xcg->quota_timeout);
+	cancel_work_sync(&xcg->refill_work);
 	list_del(&xcg->group_node);
 
 	mutex_unlock(&xcg_mutex);
@@ -445,6 +455,13 @@ static s64 xcu_read_s64(struct cgroup_subsys_state *css, struct cftype *cft)
 
 	spin_lock(&xcucg->lock);
 	switch (cft->private) {
+	case XCU_FILE_PERIOD_MS:
+		ret = xcucg->period / NSEC_PER_MSEC;
+		break;
+	case XCU_FILE_QUOTA_MS:
+		ret = (xcucg->quota > 0) ? xcucg->quota / NSEC_PER_MSEC :
+					   xcucg->quota;
+		break;
 	case XCU_FILE_SHARES:
 		ret = xcucg->shares_cfg;
 		break;
@@ -521,8 +538,24 @@ static int xcu_write_s64(struct cgroup_subsys_state *css, struct cftype *cft,
 
 	spin_lock(&xcucg->lock);
 	switch (cft->private) {
+	case XCU_FILE_PERIOD_MS:
+		if (val < 1 || val > (S64_MAX / NSEC_PER_MSEC)) {
+			ret = -EINVAL;
+			break;
+		}
+		xcucg->period = val * NSEC_PER_MSEC;
+		xsched_quota_timeout_update(xcucg);
+		break;
+	case XCU_FILE_QUOTA_MS:
+		if (val < -1 || val > (S64_MAX / NSEC_PER_MSEC)) {
+			ret = -EINVAL;
+			break;
+		}
+		xcucg->quota = (val > 0) ? val * NSEC_PER_MSEC : val;
+		xsched_quota_timeout_update(xcucg);
+		break;
 	case XCU_FILE_SHARES:
-		if (val <= 0) {
+		if (val <= 0 || val > U64_MAX) {
 			ret = -EINVAL;
 			break;
 		}
@@ -565,11 +598,28 @@ static int xcu_stat(struct seq_file *sf, void *v)
 	seq_printf(sf, "exec_runtime:	%llu\n", exec_runtime);
 	seq_printf(sf, "shares cfg:	%llu/%llu x%u\n", xcucg->shares_cfg,
 		   xcucg->parent->children_shares_sum, xcucg->weight);
+	seq_printf(sf, "quota:	%lld\n", xcucg->quota);
+	seq_printf(sf, "used:	%lld\n", xcucg->rt_exec);
+	seq_printf(sf, "period:	%lld\n", xcucg->period);
 
 	return 0;
 }
 
 static struct cftype xcu_cg_files[] = {
+	{
+		.name = "period_ms",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.read_s64 = xcu_read_s64,
+		.write_s64 = xcu_write_s64,
+		.private = XCU_FILE_PERIOD_MS,
+	},
+	{
+		.name = "quota_ms",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.read_s64 = xcu_read_s64,
+		.write_s64 = xcu_write_s64,
+		.private = XCU_FILE_QUOTA_MS,
+	},
 	{
 		.name = "shares",
 		.flags = CFTYPE_NOT_ON_ROOT,
diff --git a/kernel/xsched/core.c b/kernel/xsched/core.c
index b28a11909fd6..98f4965d97e5 100644
--- a/kernel/xsched/core.c
+++ b/kernel/xsched/core.c
@@ -48,6 +48,7 @@ static void put_prev_ctx(struct xsched_entity *xse)
 
 	lockdep_assert_held(&xcu->xcu_lock);
 
+	xsched_quota_account(xse->parent_grp, xse->last_process_time);
 	xse->class->put_prev_ctx(xse);
 	XSCHED_INFO("Put current xse %d sum_exec_runtime %llu @ %s\n",
 		xse->tgid, xse->cfs.sum_exec_runtime, __func__);
@@ -617,7 +618,17 @@ static int xsched_schedule(void *input_xcu)
 				XSCHED_INFO(
 					"%s: Dequeue xse %d due to zero kicks on xcu %u\n",
 					__func__, curr_xse->tgid, xcu->id);
-				curr_xse = xcu->xrq.curr_xse = NULL;
+				xcu->xrq.curr_xse = NULL;
+			}
+			if (xsched_quota_exceed(curr_xse->parent_grp)) {
+				dequeue_ctx(&curr_xse->parent_grp
+						     ->perxcu_priv[xcu->id]
+						     .xse,
+					    xcu);
+				XSCHED_INFO(
+					"%s: Dequeue group of xse %d due to quota exceed on xcu %u\n",
+					__func__, curr_xse->tgid, xcu->id);
+				xcu->xrq.curr_xse = NULL;
 			}
 		}
 	}
-- 
2.34.1

    

[PATCH openEuler-25.03 16/18] xsched: Add support for CFS quota for cgroups

Liu Kai