
From: Alekseev Dmitry <alekseev.dmitry@huawei.com> hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IC5EHB ----------------------------------------- Add cgroup initialization inculing root cgroup. Add xcu cgroup callbacks: alloc, free, attach, detach, etc. Add xsched_group cgroup management files and methods for: - sched type - shares Add xcu cgroup subsys and option CONFIG_CGROUP_XCU Add cgroup.c in /kernel/xsched Makefile. Signed-off-by: Alekseev Dmitry <alekseev.dmitry@huawei.com> Signed-off-by: Hui Tang <tanghui20@.huawei.com> Signed-off-by: Liu Kai <liukai284@huawei.com> Signed-off-by: Xia Fukun <xiafukun@huawei.com> --- include/linux/cgroup_subsys.h | 4 + include/linux/xsched.h | 112 +++++- kernel/cgroup/cgroup.c | 2 +- kernel/xsched/Kconfig | 15 + kernel/xsched/Makefile | 1 + kernel/xsched/cfs.c | 73 +++- kernel/xsched/cgroup.c | 619 ++++++++++++++++++++++++++++++++++ kernel/xsched/core.c | 16 +- 8 files changed, 815 insertions(+), 27 deletions(-) create mode 100644 kernel/xsched/cgroup.c diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h index 85fa78049bd0..e65ae90946c2 100644 --- a/include/linux/cgroup_subsys.h +++ b/include/linux/cgroup_subsys.h @@ -61,6 +61,10 @@ SUBSYS(pids) SUBSYS(rdma) #endif +#if IS_ENABLED(CONFIG_CGROUP_XCU) +SUBSYS(xcu) +#endif + #if IS_ENABLED(CONFIG_CGROUP_MISC) SUBSYS(misc) #endif diff --git a/include/linux/xsched.h b/include/linux/xsched.h index b4b6274b4c06..e59e4fe5e4b4 100644 --- a/include/linux/xsched.h +++ b/include/linux/xsched.h @@ -5,7 +5,7 @@ #include <linux/hash.h> #include <linux/hashtable.h> #include <linux/xcu_group.h> -#include <linux/kref.h> +#include <linux/cgroup.h> #include <linux/vstream.h> #ifndef pr_fmt #define pr_fmt(fmt) fmt @@ -39,13 +39,11 @@ #define XSCHED_TIME_INF RUNTIME_INF #define XSCHED_CFS_ENTITY_WEIGHT_DFLT 1 #define XSCHED_CFS_MIN_TIMESLICE (10*NSEC_PER_MSEC) +#define XSCHED_CFG_SHARE_DFLT 1024 #define __GET_VS_TASK_TYPE(t) ((t)&0xFF) - #define __GET_VS_TASK_PRIO_RT(t) (((t) >> 8) & 0xFF) - #define GET_VS_TASK_TYPE(vs_ptr) __GET_VS_TASK_TYPE((vs_ptr)->task_type) - #define GET_VS_TASK_PRIO_RT(vs_ptr) __GET_VS_TASK_PRIO_RT((vs_ptr)->task_type) /* @@ -57,6 +55,8 @@ */ #define XSCHED_CFS_KICK_SLICE 10 +extern struct xsched_cu *xsched_cu_mgr[XSCHED_NR_CUS]; + enum xcu_sched_type { XSCHED_TYPE_RT, XSCHED_TYPE_DFLT = XSCHED_TYPE_RT, @@ -186,12 +186,15 @@ struct xsched_cu { struct mutex xcu_lock; - atomic_t has_active; - wait_queue_head_t wq_xcu_idle; wait_queue_head_t wq_xcu_running; }; +extern int num_active_xcu; +#define for_each_active_xcu(xcu, id) \ + for ((id) = 0, xcu = xsched_cu_mgr[(id)]; \ + (id) < num_active_xcu && (xcu = xsched_cu_mgr[(id)]); (id)++) + struct xsched_entity_rt { struct list_head list_node; enum xse_state state; @@ -260,6 +263,11 @@ struct xsched_entity { */ struct xsched_cu *xcu; + /* Link to list of xsched_group items */ + struct list_head group_node; + struct xsched_group *parent_grp; + bool is_group; + /* General purpose xse lock. */ spinlock_t xse_lock; }; @@ -274,6 +282,93 @@ static inline bool xse_is_cfs(const struct xsched_entity *xse) return xse && xse->class == &fair_xsched_class; } +/* xsched_group's xcu related stuff */ +struct xsched_group_xcu_priv { + /* Owner of this group */ + struct xsched_group *self; + + /* xcu id */ + int xcu_id; + + /* Link to scheduler */ + struct xsched_entity xse; /* xse of this group on runqueue */ + struct xsched_rq_cfs *cfs_rq; /* cfs runqueue "owned" by this group */ + struct xsched_rq_rt *rt_rq; /* rt runqueue "owned" by this group */ + + /* Statistics */ + int nr_throttled; + u64 throttled_time; + u64 overrun_time; +}; + +/* Xsched scheduling control group */ +struct xsched_group { + /* Cgroups controller structure */ + struct cgroup_subsys_state css; + + /* Control group settings: */ + int sched_type; + int prio; + + /* Bandwidth setting: shares value set by user */ + u64 shares_cfg; + u64 shares_cfg_red; + u32 weight; + u64 children_shares_sum; + + /* Bandwidth setting: maximal quota in period */ + s64 quota; + /* record the runtime of operators during the period */ + s64 runtime; + s64 period; + struct hrtimer quota_timeout; + struct work_struct refill_work; + u64 qoslevel; + + struct xsched_group_xcu_priv perxcu_priv[XSCHED_NR_CUS]; + + /* Groups hierarchcy */ + struct xsched_group *parent; + struct list_head children_groups; + struct list_head group_node; + + spinlock_t lock; + + /* for XSE to move in perxcu */ + struct list_head members; +}; + +#define XSCHED_RQ_OF(xse) \ + (container_of(((xse)->cfs.cfs_rq), struct xsched_rq, cfs)) + +#define XSCHED_RQ_OF_CFS_XSE(cfs_xse) \ + (container_of(((cfs_xse)->cfs_rq), struct xsched_rq, cfs)) + +#define XSCHED_SE_OF(cfs_xse) \ + (container_of((cfs_xse), struct xsched_entity, cfs)) + +#define xcg_parent_grp_xcu(xcg) \ + ((xcg)->self->parent->perxcu_priv[(xcg)->xcu_id]) + +#define xse_parent_grp_xcu(xse_cfs) \ + (&((XSCHED_SE_OF(xse_cfs) \ + ->parent_grp->perxcu_priv[(XSCHED_SE_OF(xse_cfs))->xcu->id]))) + +static inline struct xsched_group_xcu_priv * +xse_this_grp_xcu(struct xsched_entity_cfs *xse_cfs) +{ + struct xsched_entity *xse; + + xse = xse_cfs ? container_of(xse_cfs, struct xsched_entity, cfs) : NULL; + return xse ? container_of(xse, struct xsched_group_xcu_priv, xse) : NULL; +} + +static inline struct xsched_group * +xse_this_grp(struct xsched_entity_cfs *xse_cfs) +{ + return xse_cfs ? xse_this_grp_xcu(xse_cfs)->self : NULL; +} + /* Returns a pointer to an atomic_t variable representing a counter * of currently pending vstream kicks on a given XCU and for a * given xsched class. @@ -515,6 +610,11 @@ int xsched_vsm_add_tail(struct vstream_info *vs, vstream_args_t *arg); struct vstream_metadata *xsched_vsm_fetch_first(struct vstream_info *vs); void submit_kick(struct vstream_info *vs, struct xcu_op_handler_params *params, struct vstream_metadata *vsm); +/* Xsched group manage functions */ +int xsched_group_inherit(struct task_struct *tsk, struct xsched_entity *xse); +void xcu_cg_init_common(struct xsched_group *xcg); +void xcu_grp_shares_update(struct xsched_group *xg); +void xsched_group_xse_detach(struct xsched_entity *xse); void enqueue_ctx(struct xsched_entity *xse, struct xsched_cu *xcu); void dequeue_ctx(struct xsched_entity *xse, struct xsched_cu *xcu); #endif /* __LINUX_XSCHED_H__ */ diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index c26a9b3a3576..b632590eae0f 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -6190,7 +6190,7 @@ int __init cgroup_init(void) struct cgroup_subsys *ss; int ssid; - BUILD_BUG_ON(CGROUP_SUBSYS_COUNT > 16); + BUILD_BUG_ON(CGROUP_SUBSYS_COUNT > 17); BUG_ON(cgroup_init_cftypes(NULL, cgroup_base_files)); BUG_ON(cgroup_init_cftypes(NULL, cgroup_psi_files)); BUG_ON(cgroup_init_cftypes(NULL, cgroup1_base_files)); diff --git a/kernel/xsched/Kconfig b/kernel/xsched/Kconfig index fc5d7767d8e0..776c3dfbdaa4 100644 --- a/kernel/xsched/Kconfig +++ b/kernel/xsched/Kconfig @@ -34,3 +34,18 @@ config XSCHED_NR_CUS This option defines the maximum number of Compute Units (CUs) that can be managed by the XSched scheduler, consider changing this value proportionally to the number of available XCU cores. + +config CGROUP_XCU + bool "XCU bandwidth control and group scheduling for xsched_cfs" + default n + depends on XCU_SCHEDULER + help + This option enables the extended Compute Unit (XCU) resource controller for + CFS task groups, providing hierarchical scheduling and fine-grained bandwidth + allocation capabilities. Key features include: + - Proportional XCU time distribution across cgroups based on shares/quotas + - Nested group scheduling with latency isolation + - Integration with xsched_cfs for fair CPU resource management + + Required for systems requiring fine-grained resource control in cgroups. + If unsure, say N. diff --git a/kernel/xsched/Makefile b/kernel/xsched/Makefile index fe212f228cf6..c4c06b6038ff 100644 --- a/kernel/xsched/Makefile +++ b/kernel/xsched/Makefile @@ -1,3 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 obj-y += vstream.o obj-$(CONFIG_XCU_SCHEDULER) += core.o rt.o cfs.o +obj-$(CONFIG_CGROUP_XCU) += cgroup.o diff --git a/kernel/xsched/cfs.c b/kernel/xsched/cfs.c index 585462351d8a..0fad07e35246 100644 --- a/kernel/xsched/cfs.c +++ b/kernel/xsched/cfs.c @@ -19,6 +19,8 @@ #define CFS_INNER_RQ_EMPTY(cfs_xse) \ ((cfs_xse)->xruntime == XSCHED_TIME_INF) +extern struct xsched_group *root_xcg; + void xs_rq_add(struct xsched_entity_cfs *xse) { struct xsched_rq_cfs *cfs_rq = xse->cfs_rq; @@ -79,10 +81,46 @@ xs_pick_first(struct xsched_rq_cfs *cfs_rq) */ static void xs_update(struct xsched_entity_cfs *xse_cfs, u64 delta) { - u64 new_xrt = xse_cfs->xruntime + delta * xse_cfs->weight; + struct xsched_group_xcu_priv *xg = xse_parent_grp_xcu(xse_cfs); + + for (; xg; xse_cfs = &xg->xse.cfs, xg = &xcg_parent_grp_xcu(xg)) { + u64 new_xrt = xse_cfs->xruntime + delta * xse_cfs->weight; + + xs_cfs_rq_update(xse_cfs, new_xrt); + xse_cfs->sum_exec_runtime += delta; - xs_cfs_rq_update(xse_cfs, new_xrt); - xse_cfs->sum_exec_runtime += delta; + if (xg->self->parent == NULL) + break; + } +} + +/** + * xg_update() - Update container group's xruntime + * @gxcu: Descendant xsched group's private xcu control structure + * + * No locks required to access xsched_group_xcu_priv members, + * because only one worker thread works for one XCU. + */ +static void xg_update(struct xsched_group_xcu_priv *xg, int task_delta) +{ + u64 new_xrt; + struct xsched_entity_cfs *entry; + + for (; xg; xg = &xcg_parent_grp_xcu(xg)) { + xg->cfs_rq->nr_running += task_delta; + entry = xs_pick_first(xg->cfs_rq); + new_xrt = entry ? entry->xruntime * xg->xse.cfs.weight : XSCHED_TIME_INF; + + xg->cfs_rq->min_xruntime = new_xrt; + xg->xse.cfs.xruntime = new_xrt; + + if (!xg->xse.on_rq) + break; + if (!xg->self->parent) + break; + + xs_cfs_rq_update(&xg->xse.cfs, new_xrt); + } } /* @@ -92,19 +130,19 @@ static void xs_update(struct xsched_entity_cfs *xse_cfs, u64 delta) */ static void dequeue_ctx_fair(struct xsched_entity *xse) { + int task_delta; struct xsched_cu *xcu = xse->xcu; struct xsched_entity_cfs *first; struct xsched_entity_cfs *xse_cfs = &xse->cfs; + task_delta = + (xse->is_group) ? -(xse_this_grp_xcu(xse_cfs)->cfs_rq->nr_running) : -1; + xs_rq_remove(xse_cfs); + xg_update(xse_parent_grp_xcu(xse_cfs), task_delta); first = xs_pick_first(&xcu->xrq.cfs); xcu->xrq.cfs.min_xruntime = (first) ? first->xruntime : XSCHED_TIME_INF; - - if (xcu->xrq.cfs.min_xruntime == XSCHED_TIME_INF) { - atomic_set(&xcu->has_active, 0); - XSCHED_DEBUG("%s: set has_active to 0\n", __func__); - } } /** @@ -118,28 +156,27 @@ static void dequeue_ctx_fair(struct xsched_entity *xse) */ static void enqueue_ctx_fair(struct xsched_entity *xse, struct xsched_cu *xcu) { + int task_delta; struct xsched_entity_cfs *first; struct xsched_rq_cfs *rq; struct xsched_entity_cfs *xse_cfs = &xse->cfs; xse_cfs->weight = XSCHED_CFS_ENTITY_WEIGHT_DFLT; - rq = xse_cfs->cfs_rq = &xcu->xrq.cfs; + rq = xse_cfs->cfs_rq = xse_parent_grp_xcu(xse_cfs)->cfs_rq; + task_delta = + (xse->is_group) ? xse_this_grp_xcu(xse_cfs)->cfs_rq->nr_running : 1; - /* If no XSE of only empty groups */ + /* If no XSE or only empty groups */ if (xs_pick_first(rq) == NULL || rq->min_xruntime == XSCHED_TIME_INF) rq->min_xruntime = xse_cfs->xruntime; else xse_cfs->xruntime = max(xse_cfs->xruntime, rq->min_xruntime); xs_rq_add(xse_cfs); + xg_update(xse_parent_grp_xcu(xse_cfs), task_delta); first = xs_pick_first(&xcu->xrq.cfs); xcu->xrq.cfs.min_xruntime = (first) ? first->xruntime : XSCHED_TIME_INF; - - if (xcu->xrq.cfs.min_xruntime != XSCHED_TIME_INF) { - atomic_set(&xcu->has_active, 1); - XSCHED_DEBUG("%s: set has_active to 1\n", __func__); - } } static struct xsched_entity *pick_next_ctx_fair(struct xsched_cu *xcu) @@ -151,6 +188,12 @@ static struct xsched_entity *pick_next_ctx_fair(struct xsched_cu *xcu) if (!xse) return NULL; + for (; XSCHED_SE_OF(xse)->is_group; xse = xs_pick_first(rq)) { + if (!xse || CFS_INNER_RQ_EMPTY(xse)) + return NULL; + rq = xse_this_grp_xcu(xse)->cfs_rq; + } + return container_of(xse, struct xsched_entity, cfs); } diff --git a/kernel/xsched/cgroup.c b/kernel/xsched/cgroup.c new file mode 100644 index 000000000000..de0279daf952 --- /dev/null +++ b/kernel/xsched/cgroup.c @@ -0,0 +1,619 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Support cgroup for xpu device + * + * Copyright (C) 2025-2026 Huawei Technologies Co., Ltd + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ +#include <linux/err.h> +#include <linux/cgroup.h> +#include <linux/seq_file.h> +#include <linux/slab.h> +#include <linux/xsched.h> + +enum xcu_file_type { + XCU_FILE_PERIOD_MS, + XCU_FILE_QUOTA_MS, + XCU_FILE_SHARES, +}; + +static struct xsched_group root_xsched_group; +struct xsched_group *root_xcg = &root_xsched_group; +static bool root_cg_inited; + +static struct xsched_group *old_xcg; +static DECLARE_WAIT_QUEUE_HEAD(xcg_attach_wq); +static bool attach_in_progress; +static DEFINE_MUTEX(xcg_mutex); + +static const char xcu_sched_name[XSCHED_TYPE_NUM][4] = { + [XSCHED_TYPE_RT] = "rt", + [XSCHED_TYPE_CFS] = "cfs" +}; + +void xcu_cg_init_common(struct xsched_group *xcg) +{ + spin_lock_init(&xcg->lock); + INIT_LIST_HEAD(&xcg->members); + INIT_LIST_HEAD(&xcg->children_groups); +} + +static void xcu_cfs_root_cg_init(void) +{ + int id; + struct xsched_cu *xcu; + + for_each_active_xcu(xcu, id) { + root_xcg->perxcu_priv[id].xcu_id = id; + root_xcg->perxcu_priv[id].self = root_xcg; + root_xcg->perxcu_priv[id].cfs_rq = &xcu->xrq.cfs; + root_xcg->perxcu_priv[id].xse.cfs.weight = 1; + } + + root_xcg->sched_type = XSCHED_TYPE_DFLT; +} + +/** + * xcu_cfs_cg_init() - Initialize xsched_group cfs runqueues and bw control. + * @xcg: new xsched_cgroup + * @parent_xg: parent's group + * + * One xsched_group can host many processes with contexts on different devices. + * Function creates xsched_entity for every XCU, and places it in runqueue + * of parent group. Create new cfs rq for xse inside group. + */ +static int xcu_cfs_cg_init(struct xsched_group *xcg, + struct xsched_group *parent_xg) +{ + int id = 0, err, i; + struct xsched_cu *xcu; + struct xsched_rq_cfs *sub_cfs_rq; + + if (unlikely(!root_cg_inited)) { + xcu_cfs_root_cg_init(); + root_cg_inited = true; + } + + for_each_active_xcu(xcu, id) { + xcg->perxcu_priv[id].xcu_id = id; + xcg->perxcu_priv[id].self = xcg; + + sub_cfs_rq = kzalloc(sizeof(struct xsched_rq_cfs), GFP_KERNEL); + if (!sub_cfs_rq) { + XSCHED_ERR("Fail to alloc cfs runqueue on xcu %d\n", id); + err = -ENOMEM; + goto alloc_error; + } + xcg->perxcu_priv[id].cfs_rq = sub_cfs_rq; + xcg->perxcu_priv[id].cfs_rq->ctx_timeline = RB_ROOT_CACHED; + + xcg->perxcu_priv[id].xse.is_group = true; + xcg->perxcu_priv[id].xse.xcu = xcu; + xcg->perxcu_priv[id].xse.class = &fair_xsched_class; + + /* Put new empty groups to the right in parent's rbtree: */ + xcg->perxcu_priv[id].xse.cfs.xruntime = XSCHED_TIME_INF; + xcg->perxcu_priv[id].xse.cfs.weight = + XSCHED_CFS_ENTITY_WEIGHT_DFLT; + xcg->perxcu_priv[id].xse.parent_grp = parent_xg; + + mutex_lock(&xcu->xcu_lock); + enqueue_ctx(&xcg->perxcu_priv[id].xse, xcu); + mutex_unlock(&xcu->xcu_lock); + } + + xcg->shares_cfg = XSCHED_CFG_SHARE_DFLT; + xcu_grp_shares_update(parent_xg); + + return 0; + +alloc_error: + for (i = 0; i < id; i++) + kfree(xcg->perxcu_priv[i].cfs_rq); + return err; +} + +static void xcu_cfs_cg_deinit(struct xsched_group *xcg) +{ + uint32_t id; + struct xsched_cu *xcu; + + for_each_active_xcu(xcu, id) { + mutex_lock(&xcu->xcu_lock); + dequeue_ctx(&xcg->perxcu_priv[id].xse, xcu); + mutex_unlock(&xcu->xcu_lock); + kfree(xcg->perxcu_priv[id].cfs_rq); + } + xcu_grp_shares_update(xcg->parent); +} + +/** + * xcu_cg_init() - Initialize non-root xsched_group structure. + * @xcg: new xsched_cgroup + * @parent_xg: parent's group + */ +static int xcu_cg_init(struct xsched_group *xcg, + struct xsched_group *parent_xg) +{ + xcu_cg_init_common(xcg); + xcg->parent = parent_xg; + list_add_tail(&xcg->group_node, &parent_xg->children_groups); + xcg->sched_type = parent_xg->sched_type; + + switch (xcg->sched_type) { + case XSCHED_TYPE_CFS: + return xcu_cfs_cg_init(xcg, parent_xg); + default: + pr_info("xcu_cgroup: init RT group css=0x%lx\n", + (uintptr_t)&xcg->css); + break; + } + + return 0; +} + +inline struct xsched_group *xcu_cg_from_css(struct cgroup_subsys_state *css) +{ + return css ? container_of(css, struct xsched_group, css) : NULL; +} + +/** + * xcu_css_alloc() - Allocate and init xcu cgroup. + * @parent_css: css of parent xcu cgroup + * + * Called from kernel/cgroup.c with cgroup_lock() held. + * First called in subsys initialization to create root xcu cgroup, when + * XCUs haven't been initialized yet. Func used on every new cgroup creation, + * on second call to set root xsched_group runqueue. + * + * Return: pointer of new xcu cgroup css on success, -ENOMEM otherwise. + */ +static struct cgroup_subsys_state * +xcu_css_alloc(struct cgroup_subsys_state *parent_css) +{ + struct xsched_group *parent_xg; + struct xsched_group *xg; + int err; + + if (!parent_css) + return &root_xsched_group.css; + + xg = kzalloc(sizeof(*xg), GFP_KERNEL); + if (!xg) + return ERR_PTR(-ENOMEM); + + mutex_lock(&xcg_mutex); + parent_xg = xcu_cg_from_css(parent_css); + err = xcu_cg_init(xg, parent_xg); + mutex_unlock(&xcg_mutex); + if (err) { + kfree(xg); + XSCHED_ERR("Fail to alloc new xcu group %s\n", __func__); + return ERR_PTR(err); + } + + return &xg->css; +} + +static void xcu_css_free(struct cgroup_subsys_state *css) +{ + struct xsched_group *xcg; + + mutex_lock(&xcg_mutex); + xcg = xcu_cg_from_css(css); + if (xcg->parent != NULL) { + switch (xcg->sched_type) { + case XSCHED_TYPE_CFS: + xcu_cfs_cg_deinit(xcg); + break; + default: + pr_info("xcu_cgroup: deinit RT group css=0x%lx\n", + (uintptr_t)&xcg->css); + break; + } + } + list_del(&xcg->group_node); + mutex_unlock(&xcg_mutex); + + kfree(xcg); +} + +int xcu_css_online(struct cgroup_subsys_state *css) +{ + return 0; +} + +static void xcu_css_offline(struct cgroup_subsys_state *css) +{ + ; +} + +static void xsched_group_xse_attach(struct xsched_group *xg, + struct xsched_entity *xse) +{ + spin_lock(&xg->lock); + list_add_tail(&xse->group_node, &xg->members); + spin_unlock(&xg->lock); + xse->parent_grp = xg; +} + +void xsched_group_xse_detach(struct xsched_entity *xse) +{ + struct xsched_group *xcg = xse->parent_grp; + + spin_lock(&xcg->lock); + list_del(&xse->group_node); + spin_unlock(&xcg->lock); +} + +static int xcu_task_can_attach(struct task_struct *task, + struct xsched_group *old, struct xsched_group *dst) +{ + struct xsched_entity *xse; + bool has_xse = false; + + spin_lock(&old->lock); + list_for_each_entry(xse, &old->members, group_node) { + if (xse->owner_pid == task_pid_nr(task)) { + has_xse = true; + break; + } + } + spin_unlock(&old->lock); + + return has_xse ? -EINVAL : 0; +} + +static int xcu_can_attach(struct cgroup_taskset *tset) +{ + struct task_struct *task; + struct cgroup_subsys_state *dst_css, *old_css; + struct xsched_group *dst_xcg; + int ret = 0; + + mutex_lock(&xcg_mutex); + cgroup_taskset_for_each(task, dst_css, tset) { + old_css = task_css(task, xcu_cgrp_id); + dst_xcg = xcu_cg_from_css(dst_css); + old_xcg = xcu_cg_from_css(old_css); + ret = xcu_task_can_attach(task, old_xcg, dst_xcg); + if (ret) + break; + } + if (!ret) + attach_in_progress = true; + mutex_unlock(&xcg_mutex); + return ret; +} + +static void xcu_cancel_attach(struct cgroup_taskset *tset) +{ + mutex_lock(&xcg_mutex); + attach_in_progress = false; + wake_up(&xcg_attach_wq); + mutex_unlock(&xcg_mutex); +} + +void xcu_move_task(struct task_struct *task, struct xsched_group *old_xcg, + struct xsched_group *new_xcg) +{ + struct xsched_entity *xse, *tmp; + struct xsched_cu *xcu; + + spin_lock(&old_xcg->lock); + list_for_each_entry_safe(xse, tmp, &old_xcg->members, group_node) { + if (xse->owner_pid != task_pid_nr(task)) + continue; + + xcu = xse->xcu; + BUG_ON(old_xcg != xse->parent_grp); + + /* delete from the old_xcg */ + list_del(&xse->group_node); + + mutex_lock(&xcu->xcu_lock); + /* dequeue from the current runqueue */ + dequeue_ctx(xse, xcu); + /* attach to the new_xcg */ + xsched_group_xse_attach(new_xcg, xse); + /* enqueue to the runqueue in new_xcg */ + enqueue_ctx(xse, xcu); + mutex_unlock(&xcu->xcu_lock); + } + spin_unlock(&old_xcg->lock); +} + +static void xcu_attach(struct cgroup_taskset *tset) +{ + struct task_struct *task; + struct cgroup_subsys_state *css; + + mutex_lock(&xcg_mutex); + cgroup_taskset_for_each(task, css, tset) { + xcu_move_task(task, old_xcg, xcu_cg_from_css(css)); + } + attach_in_progress = false; + wake_up(&xcg_attach_wq); + mutex_unlock(&xcg_mutex); +} + +/** + * xsched_group_inherit() - Attach new entity to task's xsched_group. + * @task: task_struct + * @xse: xsched entity + * + * Called in xsched context initialization to attach xse to task's group + * and inherit its xse scheduling class and bandwidth control policy. + * + * Return: Zero on success. + */ +int xsched_group_inherit(struct task_struct *task, struct xsched_entity *xse) +{ + struct cgroup_subsys_state *css; + struct xsched_group *xg; + +retry: + wait_event(xcg_attach_wq, !attach_in_progress); + + mutex_lock(&xcg_mutex); + if (attach_in_progress) { + mutex_unlock(&xcg_mutex); + goto retry; + } + xse->owner_pid = task_pid_nr(task); + css = task_get_css(task, xcu_cgrp_id); + xg = xcu_cg_from_css(css); + xsched_group_xse_attach(xg, xse); + css_put(css); + mutex_unlock(&xcg_mutex); + + return 0; +} + +static int xcu_sched_show(struct seq_file *sf, void *v) +{ + struct cgroup_subsys_state *css = seq_css(sf); + struct xsched_group *xg = xcu_cg_from_css(css); + + seq_printf(sf, "%s\n", xcu_sched_name[xg->sched_type]); + return 0; +} + +/** + * xcu_cg_set_sched() - Set scheduling type for group. + * @xg: xsched group + * @type: scheduler type + * + * Scheduler type can be changed if task is child of root group + * and haven't got scheduling entities. + * + * Return: Zero on success or -EINVAL + */ +int xcu_cg_set_sched(struct xsched_group *xg, int type) +{ + if (type == xg->sched_type) + return 0; + + if (xg->parent != root_xcg) + return -EINVAL; + + if (!list_empty(&xg->members)) + return -EBUSY; + + if (xg->sched_type == XSCHED_TYPE_CFS) + xcu_cfs_cg_deinit(xg); + + xg->sched_type = type; + if (type != XSCHED_TYPE_CFS) + return 0; + + /* type is XSCHED_TYPE_CFS */ + return xcu_cfs_cg_init(xg, xg->parent); +} + +static ssize_t xcu_sched_write(struct kernfs_open_file *of, char *buf, + size_t nbytes, loff_t off) +{ + struct cgroup_subsys_state *css = of_css(of); + struct xsched_group *xg = xcu_cg_from_css(css); + char type_name[4]; + int type = -1; + + ssize_t ret = sscanf(buf, "%3s", type_name); + + if (ret < 1) + return -EINVAL; + + for (type = 0; type < XSCHED_TYPE_NUM; type++) { + if (!strcmp(type_name, xcu_sched_name[type])) + break; + } + + if (type == XSCHED_TYPE_NUM) + return -EINVAL; + + if (!list_empty(&css->children)) + return -EBUSY; + + mutex_lock(&xcg_mutex); + ret = xcu_cg_set_sched(xg, type); + mutex_unlock(&xcg_mutex); + + return (ret) ? ret : nbytes; +} + +static s64 xcu_read_s64(struct cgroup_subsys_state *css, struct cftype *cft) +{ + s64 ret = 0; + struct xsched_group *xcucg = xcu_cg_from_css(css); + + spin_lock(&xcucg->lock); + switch (cft->private) { + case XCU_FILE_SHARES: + ret = xcucg->shares_cfg; + break; + default: + break; + } + spin_unlock(&xcucg->lock); + return ret; +} + +static inline u64 gcd(u64 a, u64 b) +{ + u64 rem; + + while (a != 0 && b != 0) { + if (a > b) { + div64_u64_rem(a, b, &rem); + a = rem; + } else { + div64_u64_rem(b, a, &rem); + b = rem; + } + } + return (a) ? a : b; +} + +void xcu_grp_shares_update(struct xsched_group *xg) +{ + int id; + struct xsched_cu *xcu; + struct xsched_group *xgi, *parent = xg; + u64 rem, sh_sum = 0, sh_gcd = 0, w_gcd = 0, sh_prod_red = 1; + + spin_lock(&parent->lock); + list_for_each_entry((xgi), &(parent)->children_groups, group_node) { + if ((xgi)->sched_type == XSCHED_TYPE_CFS) + sh_gcd = gcd(sh_gcd, xgi->shares_cfg); + } + + list_for_each_entry((xgi), &(parent)->children_groups, group_node) { + if ((xgi)->sched_type == XSCHED_TYPE_CFS) { + sh_sum += xgi->shares_cfg; + xgi->shares_cfg_red = div64_u64(xgi->shares_cfg, sh_gcd); + div64_u64_rem(sh_prod_red, xgi->shares_cfg_red, &rem); + if (rem) + sh_prod_red *= xgi->shares_cfg_red; + } + } + + parent->children_shares_sum = sh_sum; + list_for_each_entry((xgi), &(parent)->children_groups, group_node) { + if ((xgi)->sched_type == XSCHED_TYPE_CFS) { + xgi->weight = div64_u64(sh_prod_red, xgi->shares_cfg_red); + w_gcd = gcd(w_gcd, xgi->weight); + } + } + + list_for_each_entry((xgi), &(parent)->children_groups, group_node) { + if ((xgi)->sched_type == XSCHED_TYPE_CFS) { + xgi->weight = div64_u64(xgi->weight, w_gcd); + for_each_active_xcu(xcu, id) { + mutex_lock(&xcu->xcu_lock); + xgi->perxcu_priv[id].xse.cfs.weight = xgi->weight; + mutex_unlock(&xcu->xcu_lock); + } + } + } + spin_unlock(&parent->lock); +} + +static int xcu_write_s64(struct cgroup_subsys_state *css, struct cftype *cft, + s64 val) +{ + int ret = 0; + struct xsched_group *xcucg = xcu_cg_from_css(css); + + spin_lock(&xcucg->lock); + switch (cft->private) { + case XCU_FILE_SHARES: + if (val <= 0) { + ret = -EINVAL; + break; + } + xcucg->shares_cfg = val; + xcu_grp_shares_update(xcucg->parent); + break; + default: + ret = -EINVAL; + break; + } + spin_unlock(&xcucg->lock); + + return ret; +} + +static int xcu_stat(struct seq_file *sf, void *v) +{ + struct cgroup_subsys_state *css = seq_css(sf); + struct xsched_group *xcucg = xcu_cg_from_css(css); + + u64 nr_throttled = 0; + u64 throttled_time = 0; + u64 exec_runtime = 0; + + int xcu_id; + struct xsched_cu *xcu; + + if (xcucg->sched_type == XSCHED_TYPE_RT) { + seq_printf(sf, "RT group stat is not supported\n"); + return 0; + } + + for_each_active_xcu(xcu, xcu_id) { + nr_throttled += xcucg->perxcu_priv[xcu_id].nr_throttled; + throttled_time += xcucg->perxcu_priv[xcu_id].throttled_time; + exec_runtime += + xcucg->perxcu_priv[xcu_id].xse.cfs.sum_exec_runtime; + } + + seq_printf(sf, "exec_runtime: %llu\n", exec_runtime); + seq_printf(sf, "shares cfg: %llu/%llu x%u\n", xcucg->shares_cfg, + xcucg->parent->children_shares_sum, xcucg->weight); + + return 0; +} + +static struct cftype xcu_cg_files[] = { + { + .name = "shares", + .flags = CFTYPE_NOT_ON_ROOT, + .read_s64 = xcu_read_s64, + .write_s64 = xcu_write_s64, + .private = XCU_FILE_SHARES, + }, + { + .name = "stat", + .seq_show = xcu_stat, + }, + { + .name = "sched", + .flags = CFTYPE_NOT_ON_ROOT, + .seq_show = xcu_sched_show, + .write = xcu_sched_write, + }, + {} /* terminate */ +}; + +struct cgroup_subsys xcu_cgrp_subsys = { + .css_alloc = xcu_css_alloc, + .css_online = xcu_css_online, + .css_offline = xcu_css_offline, + .css_free = xcu_css_free, + .can_attach = xcu_can_attach, + .cancel_attach = xcu_cancel_attach, + .attach = xcu_attach, + .dfl_cftypes = xcu_cg_files, + .legacy_cftypes = xcu_cg_files, + .early_init = false, +}; diff --git a/kernel/xsched/core.c b/kernel/xsched/core.c index 0af75328b92b..a05ab8dd320b 100644 --- a/kernel/xsched/core.c +++ b/kernel/xsched/core.c @@ -24,6 +24,7 @@ int num_active_xcu; spinlock_t xcu_mgr_lock; +extern struct xsched_group *root_xcg; /* Xsched XCU array and bitmask that represents which XCUs * are present and online. @@ -47,6 +48,7 @@ static void put_prev_ctx(struct xsched_entity *xse) xse->class->put_prev_ctx(xse); xse->last_exec_runtime = 0; atomic_set(&xse->submitted_one_kick, 0); + XSCHED_DEBUG("Put current xse %d @ %s\n", xse->tgid, __func__); } static size_t select_work_def(struct xsched_cu *xcu, struct xsched_entity *xse) @@ -192,6 +194,8 @@ static int delete_ctx(struct xsched_context *ctx) XSCHED_DEBUG("Deleting ctx %d, pending kicks left=%d @ %s\n", xse->tgid, atomic_read(&xse->kicks_pending_ctx_cnt), __func__); + xsched_group_xse_detach(xse); + return 0; } @@ -315,7 +319,10 @@ struct xsched_cu *xcu_find(uint32_t *type, int xsched_xse_set_class(struct xsched_entity *xse) { - switch (xse->task_type) { +#ifdef CONFIG_CGROUP_XCU + xsched_group_inherit(current, xse); +#endif + switch (xse->parent_grp->sched_type) { case XSCHED_TYPE_RT: xse->class = &rt_xsched_class; XSCHED_DEBUG("Context is in RT class %s\n", __func__); @@ -342,7 +349,7 @@ int xsched_ctx_init_xse(struct xsched_context *ctx, struct vstream_info *vs) xse->total_scheduled = 0; xse->total_submitted = 0; xse->last_exec_runtime = 0; - xse->task_type = XSCHED_TYPE_RT; + xse->task_type = GET_VS_TASK_TYPE(vs); xse->fd = ctx->fd; xse->tgid = ctx->tgid; @@ -713,10 +720,9 @@ EXPORT_SYMBOL(xsched_xcu_register); int __init xsched_init(void) { - /* Initializing global Xsched context list. */ + /* Initializing global XSched context list. */ INIT_LIST_HEAD(&xsched_ctx_list); - + xcu_cg_init_common(root_xcg); return 0; } - late_initcall(xsched_init); -- 2.34.1