
From: Alekseev Dmitry <alekseev.dmitry@huawei.com> hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IC5EHB ----------------------------------------- Add cgroup initialization inculing root cgroup. Add xcu cgroup callbacks: alloc, free, attach, detach, etc. Add xsched_group cgroup management files and methods for: - sched type - shares Add xcu cgroup subsys and option CONFIG_CGROUP_XCU Add cgroup.c in /kernel/xsched Makefile. Signed-off-by: Alekseev Dmitry <alekseev.dmitry@huawei.com> Signed-off-by: Hui Tang <tanghui20@.huawei.com> --- include/linux/cgroup_subsys.h | 4 + include/linux/xsched.h | 105 +++++- kernel/cgroup/cgroup.c | 2 +- kernel/xsched/Kconfig | 19 ++ kernel/xsched/Makefile | 1 + kernel/xsched/cfs.c | 50 ++- kernel/xsched/cgroup.c | 604 ++++++++++++++++++++++++++++++++++ kernel/xsched/core.c | 13 +- 8 files changed, 790 insertions(+), 8 deletions(-) create mode 100644 kernel/xsched/cgroup.c diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h index 85fa78049bd0..e65ae90946c2 100644 --- a/include/linux/cgroup_subsys.h +++ b/include/linux/cgroup_subsys.h @@ -61,6 +61,10 @@ SUBSYS(pids) SUBSYS(rdma) #endif +#if IS_ENABLED(CONFIG_CGROUP_XCU) +SUBSYS(xcu) +#endif + #if IS_ENABLED(CONFIG_CGROUP_MISC) SUBSYS(misc) #endif diff --git a/include/linux/xsched.h b/include/linux/xsched.h index 3186bae9d2c2..51be002b1970 100644 --- a/include/linux/xsched.h +++ b/include/linux/xsched.h @@ -5,7 +5,7 @@ #include <linux/hash.h> #include <linux/hashtable.h> #include <linux/xcu_group.h> -#include <linux/kref.h> +#include <linux/cgroup.h> #include <linux/vstream.h> #ifndef pr_fmt #define pr_fmt(fmt) fmt @@ -59,6 +59,7 @@ #define XSCHED_TIME_INF RUNTIME_INF #define XSCHED_CFS_ENTITY_WEIGHT_DFLT 1 #define XSCHED_CFS_MIN_TIMESLICE (10 * NSEC_PER_MSEC) +#define XSCHED_CFG_SHARE_DFLT 1024 #define __GET_VS_TASK_TYPE(t) ((t)&0xFF) @@ -68,6 +69,8 @@ #define GET_VS_TASK_PRIO_RT(vs_ptr) __GET_VS_TASK_PRIO_RT((vs_ptr)->task_type) +extern struct xsched_cu *xsched_cu_mgr[XSCHED_NR_CUS]; + #define XSCHED_RT_TIMESLICE_MS (10 * NSEC_PER_MSEC) /* * A default kick slice for RT class XSEs. @@ -112,6 +115,7 @@ enum xse_flag { extern const struct xsched_class rt_xsched_class; extern const struct xsched_class fair_xsched_class; +extern struct xsched_group *root_xcg; #define xsched_first_class (&rt_xsched_class) @@ -210,6 +214,11 @@ struct xsched_cu { wait_queue_head_t wq_xcore_running; }; +extern int num_active_xcu; +#define for_each_active_xcu(xcu, id) \ + for ((id) = 0, xcu = xsched_cu_mgr[(id)]; \ + (id) < num_active_xcu && (xcu = xsched_cu_mgr[(id)]); (id)++) + struct xsched_entity_rt { struct list_head list_node; enum xse_state state; @@ -279,6 +288,11 @@ struct xsched_entity { */ struct xsched_cu *xcu; + /* Link to list of xsched_group items */ + struct list_head group_node; + struct xsched_group *parent_grp; + bool is_group; + /* General purpose xse lock. */ spinlock_t xse_lock; }; @@ -292,6 +306,90 @@ static inline bool xse_is_cfs(const struct xsched_entity *xse) return xse && xse->class == &fair_xsched_class; } +/* xsched_group's xcu related stuff */ +struct xsched_group_xcu_priv { + /* Owner of this group */ + struct xsched_group *self; + /* xcu id */ + int32_t xcu_id; + /* Link to scheduler */ + struct xsched_entity xse; /* xse of this group on runqueue */ + struct xsched_rq_cfs *rq; /* runqueue "owned" by this group */ + /* Statistics */ + int nr_throttled; + u64 throttled_time; + u64 overrun_time; +}; + +/* Xsched scheduling control group */ +struct xsched_group { + /* Cgroups controller structure */ + struct cgroup_subsys_state css; + + /* Control group settings: */ + int sched_type; + int prio; + + /* Bandwidth setting: shares value set by user */ + u64 shares_cfg; + u64 shares_cfg_red; + u32 weight; + u64 children_shares_sum; + + /* Bandwidth setting: maximal quota in period */ + s64 quota; + s64 rt_exec; + s64 period; + struct hrtimer quota_timeout; + struct work_struct refill_work; + u64 qoslevel; + + struct xsched_group_xcu_priv perxcu_priv[XSCHED_NR_CUS]; + + /* Groups hierarchcy */ + struct xsched_group *parent; + struct list_head children_groups; + struct list_head group_node; + + spinlock_t lock; + + /* for XSE to move in perxcu ? */ + struct list_head members; +}; + +#define XSCHED_RQ_OF(xse) \ + (container_of(((xse)->cfs.cfs_rq), struct xsched_rq, cfs)) + +#define XSCHED_RQ_OF_CFS_XSE(cfs_xse) \ + (container_of(((cfs_xse)->cfs_rq), struct xsched_rq, cfs)) + +#define XSCHED_SE_OF(cfs_xse) \ + (container_of((cfs_xse), struct xsched_entity, cfs)) + +#define xcg_parent_grp_xcu(xcg) \ + ((xcg)->self->parent->perxcu_priv[(xcg)->xcu_id]) + +#define xse_parent_grp_xcu(xse_cfs) \ + (&((XSCHED_SE_OF(xse_cfs) \ + ->parent_grp \ + ->perxcu_priv[(XSCHED_SE_OF(xse_cfs))->xcu->id]))) + +static inline struct xsched_group_xcu_priv * +xse_this_grp_xcu(struct xsched_entity_cfs *xse_cfs) +{ + struct xsched_entity *xse; + + xse = xse_cfs ? container_of(xse_cfs, struct xsched_entity, cfs) : NULL; + return xse ? container_of(xse, struct xsched_group_xcu_priv, xse) : + NULL; +} + +static inline struct xsched_group * +xse_this_grp(struct xsched_entity_cfs *xse_cfs) +{ + return xse_cfs ? xse_this_grp_xcu(xse_cfs)->self : NULL; +} + /* Returns a pointer to an atomic_t variable representing a counter * of currently pending vstream kicks on a given XCU and for a * given xsched class. @@ -542,5 +640,10 @@ int xsched_vsm_add_tail(struct vstream_info *vs, vstream_args_t *arg); struct vstream_metadata *xsched_vsm_fetch_first(struct vstream_info *vs); void submit_kick(struct vstream_info *vs, struct xcu_op_handler_params *params, struct vstream_metadata *vsm); +/* Xsched group manage functions */ +int xsched_group_inherit(struct task_struct *tsk, struct xsched_entity *xse); +void xcu_cg_init_common(struct xsched_group *xcg); +void xcu_grp_shares_update(struct xsched_group *xg); +void xsched_group_xse_detach(struct xsched_entity *xse); void enqueue_ctx(struct xsched_entity *xse, struct xsched_cu *xcu); #endif /* !__LINUX_XSCHED_H__ */ diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index c26a9b3a3576..b632590eae0f 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -6190,7 +6190,7 @@ int __init cgroup_init(void) struct cgroup_subsys *ss; int ssid; - BUILD_BUG_ON(CGROUP_SUBSYS_COUNT > 16); + BUILD_BUG_ON(CGROUP_SUBSYS_COUNT > 17); BUG_ON(cgroup_init_cftypes(NULL, cgroup_base_files)); BUG_ON(cgroup_init_cftypes(NULL, cgroup_psi_files)); BUG_ON(cgroup_init_cftypes(NULL, cgroup1_base_files)); diff --git a/kernel/xsched/Kconfig b/kernel/xsched/Kconfig index cbd2eec8bfad..d09c77aa4cdd 100644 --- a/kernel/xsched/Kconfig +++ b/kernel/xsched/Kconfig @@ -41,3 +41,22 @@ config XSCHED_NR_CUS int "Number of CUs (a.k.a. XCUs) available to XSched mechanism" default 8 depends on XCU_SCHEDULER + help + This option defines the maximum number of Compute Units (CUs) that can be + managed by the XSched scheduler, consider changing this value proportionally + to the number of available XCU cores. + +config CGROUP_XCU + bool "XCU bandwidth control and group scheduling for xsched_cfs" + default n + depends on XCU_SCHEDULER + help + This option enables the eXtended Compute Unit (XCU) resource controller for + CFS task groups, providing hierarchical scheduling and fine-grained bandwidth + allocation capabilities. Key features include: + - Proportional XCU time distribution across cgroups based on shares/quotas + - Nested group scheduling with latency isolation + - Integration with xsched_cfs for fair CPU resource management + + Required for systems requiring fine-grained resource control in cgroups. + If unsure, say N. diff --git a/kernel/xsched/Makefile b/kernel/xsched/Makefile index fe212f228cf6..c4c06b6038ff 100644 --- a/kernel/xsched/Makefile +++ b/kernel/xsched/Makefile @@ -1,3 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 obj-y += vstream.o obj-$(CONFIG_XCU_SCHEDULER) += core.o rt.o cfs.o +obj-$(CONFIG_CGROUP_XCU) += cgroup.o diff --git a/kernel/xsched/cfs.c b/kernel/xsched/cfs.c index d20ba5cf3165..ea0f49488fb7 100644 --- a/kernel/xsched/cfs.c +++ b/kernel/xsched/cfs.c @@ -84,10 +84,45 @@ xs_pick_first(struct xsched_rq_cfs *cfs_rq) */ static void xs_update(struct xsched_entity_cfs *xse_cfs, u64 delta) { - u64 new_xrt = xse_cfs->xruntime + delta * xse_cfs->weight; + struct xsched_group_xcu_priv *xg = xse_parent_grp_xcu(xse_cfs); - xs_cfs_rq_update(xse_cfs, new_xrt); - xse_cfs->sum_exec_runtime += delta; + for (; xg; xse_cfs = &xg->xse.cfs, xg = &xcg_parent_grp_xcu(xg)) { + u64 new_xrt = xse_cfs->xruntime + delta * xse_cfs->weight; + + xs_cfs_rq_update(xse_cfs, new_xrt); + xse_cfs->sum_exec_runtime += delta; + + if (xg->self->parent == NULL) + break; + } +} + +/** + * xg_update() - Update container group's xruntime + * @gxcu: Descendant xsched group's private xcu control structure + * + * No locks required to access xsched_group_xcu_priv members, + * because only one worker thread works for one XCU. + */ +static void xg_update(struct xsched_group_xcu_priv *xg) +{ + u64 new_xrt; + struct xsched_entity_cfs *entry; + + for (; xg; xg = &xcg_parent_grp_xcu(xg)) { + entry = xs_pick_first(xg->rq); + if (entry) + new_xrt = entry->xruntime * xg->xse.cfs.weight; + else + new_xrt = XSCHED_TIME_INF; + + xg->rq->min_xruntime = new_xrt; + + if (xg->self->parent) + xs_cfs_rq_update(&xg->xse.cfs, new_xrt); + else + break; + } } /* @@ -102,6 +137,7 @@ static void dequeue_ctx_fair(struct xsched_entity *xse) struct xsched_entity_cfs *xse_cfs = &xse->cfs; xs_rq_remove(xse_cfs); + xg_update(xse_parent_grp_xcu(xse_cfs)); first = xs_pick_first(&xcu->xrq.cfs); xcu->xrq.cfs.min_xruntime = (first) ? first->xruntime : XSCHED_TIME_INF; @@ -128,7 +164,7 @@ static void enqueue_ctx_fair(struct xsched_entity *xse, struct xsched_cu *xcu) struct xsched_entity_cfs *xse_cfs = &xse->cfs; xse_cfs->weight = XSCHED_CFS_ENTITY_WEIGHT_DFLT; - rq = xse_cfs->cfs_rq = &xcu->xrq.cfs; + rq = xse_cfs->cfs_rq = xse_parent_grp_xcu(xse_cfs)->rq; /* If no XSE of only empty groups */ if (xs_pick_first(rq) == NULL || rq->min_xruntime == XSCHED_TIME_INF) @@ -137,6 +173,7 @@ static void enqueue_ctx_fair(struct xsched_entity *xse, struct xsched_cu *xcu) xse_cfs->xruntime = max(xse_cfs->xruntime, rq->min_xruntime); xs_rq_add(xse_cfs); + xg_update(xse_parent_grp_xcu(xse_cfs)); first = xs_pick_first(&xcu->xrq.cfs); xcu->xrq.cfs.min_xruntime = (first) ? first->xruntime : XSCHED_TIME_INF; @@ -155,6 +192,11 @@ static struct xsched_entity *pick_next_ctx_fair(struct xsched_cu *xcu) xse = xs_pick_first(rq); if (!xse) return NULL; + for (; XSCHED_SE_OF(xse)->is_group; xse = xs_pick_first(rq)) { + if (!xse || CFS_INNER_RQ_EMPTY(xse)) + return NULL; + rq = xse_this_grp_xcu(xse)->rq; + } return container_of(xse, struct xsched_entity, cfs); } diff --git a/kernel/xsched/cgroup.c b/kernel/xsched/cgroup.c new file mode 100644 index 000000000000..758b8b9c183d --- /dev/null +++ b/kernel/xsched/cgroup.c @@ -0,0 +1,604 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Support cgroup for xpu device + * + * Copyright (C) 2025-2026 Huawei Technologies Co., Ltd + * + * Author: Konstantin Meskhidze <konstantin.meskhidze@huawei.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ +#include <linux/err.h> +#include <linux/cgroup.h> +#include <linux/seq_file.h> +#include <linux/slab.h> +#include <linux/xsched.h> + +enum xcu_file_type { + XCU_FILE_PERIOD_MS, + XCU_FILE_QUOTA_MS, + XCU_FILE_SHARES +}; + +static struct xsched_group root_xsched_group; +struct xsched_group *root_xcg = &root_xsched_group; +static bool root_cg_inited; + +static struct xsched_group *old_xcg; +static DECLARE_WAIT_QUEUE_HEAD(xcg_attach_wq); +static bool attach_in_progress; +static DEFINE_MUTEX(xcg_mutex); + +static const char xcu_sched_name[XSCHED_TYPE_NUM][4] = { + [XSCHED_TYPE_RT] = "rt", + [XSCHED_TYPE_CFS] = "cfs" +}; + +void xcu_cg_init_common(struct xsched_group *xcg) +{ + spin_lock_init(&xcg->lock); + INIT_LIST_HEAD(&xcg->members); + INIT_LIST_HEAD(&xcg->children_groups); +} + +static void xcu_cfs_root_cg_init(void) +{ + uint32_t id; + struct xsched_cu *xcu; + + for_each_active_xcu(xcu, id) { + root_xcg->perxcu_priv[id].xcu_id = id; + root_xcg->perxcu_priv[id].self = root_xcg; + root_xcg->perxcu_priv[id].rq = &xcu->xrq.cfs; + root_xcg->perxcu_priv[id].xse.cfs.weight = 1; + } + + root_xcg->sched_type = XSCHED_TYPE_DFLT; +} + +/** + * xcu_cfs_cg_init() - Initialize xsched_group cfs runqueues and bw control. + * @xcg: new xsched_cgroup + * @parent_xg: parent's group + * + * One xsched_group can host many processes with contexts on different devices. + * Function creates xsched_entity for every XCU, and places it in runqueue + * of parent group. Create new cfs rq for xse inside group. + */ +static void xcu_cfs_cg_init(struct xsched_group *xcg, + struct xsched_group *parent_xg) +{ + uint32_t id; + struct xsched_cu *xcu; + struct xsched_rq_cfs *sub_cfs_rq; + + if (unlikely(!root_cg_inited)) { + xcu_cfs_root_cg_init(); + root_cg_inited = true; + } + + for_each_active_xcu(xcu, id) { + xcg->perxcu_priv[id].xcu_id = id; + xcg->perxcu_priv[id].self = xcg; + + sub_cfs_rq = kzalloc(sizeof(struct xsched_rq_cfs), GFP_KERNEL); + xcg->perxcu_priv[id].rq = sub_cfs_rq; + xcg->perxcu_priv[id].rq->ctx_timeline = RB_ROOT_CACHED; + + xcg->perxcu_priv[id].xse.is_group = true; + xcg->perxcu_priv[id].xse.xcu = xcu; + xcg->perxcu_priv[id].xse.class = &fair_xsched_class; + + /* Put new empty groups to the right in parent's rbtree: */ + xcg->perxcu_priv[id].xse.cfs.xruntime = XSCHED_TIME_INF; + xcg->perxcu_priv[id].xse.cfs.weight = + XSCHED_CFS_ENTITY_WEIGHT_DFLT; + xcg->perxcu_priv[id].xse.parent_grp = parent_xg; + + mutex_lock(&xcu->xcu_lock); + enqueue_ctx(&xcg->perxcu_priv[id].xse, xcu); + mutex_unlock(&xcu->xcu_lock); + } + + xcg->shares_cfg = XSCHED_CFG_SHARE_DFLT; + xcu_grp_shares_update(parent_xg); +} + +static void xcu_cfs_cg_deinit(struct xsched_group *xcg) +{ + uint32_t id; + struct xsched_cu *xcu; + + for_each_active_xcu(xcu, id) { + mutex_lock(&xcu->xcu_lock); + fair_xsched_class.dequeue_ctx(&xcg->perxcu_priv[id].xse); + mutex_unlock(&xcu->xcu_lock); + kfree(xcg->perxcu_priv[id].rq); + } + xcu_grp_shares_update(xcg->parent); +} + +/** + * xcu_cg_init() - Initialize non-root xsched_group structure. + * @xcg: new xsched_cgroup + * @parent_xg: parent's group + */ +static void xcu_cg_init(struct xsched_group *xcg, + struct xsched_group *parent_xg) +{ + xcu_cg_init_common(xcg); + xcg->parent = parent_xg; + list_add_tail(&xcg->group_node, &parent_xg->children_groups); + xcg->sched_type = parent_xg->sched_type; + + switch (xcg->sched_type) { + case XSCHED_TYPE_CFS: + xcu_cfs_cg_init(xcg, parent_xg); + break; + default: + pr_debug("xcu_cgroup: init RT group css=0x%lx\n", (uintptr_t)&xcg->css); + break; + } +} + +inline struct xsched_group *xcu_cg_from_css(struct cgroup_subsys_state *css) +{ + return css ? container_of(css, struct xsched_group, css) : NULL; +} + +/** + * xcu_css_alloc() - Allocate and init xcu cgroup. + * @parent_css: css of parent xcu cgroup + * + * Called from kernel/cgroup.c with cgroup_lock() held. + * First called in subsys initialization to create root xcu cgroup, when + * XCUs haven't been initialized yet. Func used on every new cgroup creation, + * on second call to set root xsched_group runqueue. + * + * Return: pointer of new xcu cgroup css on success, -ENOMEM otherwise. + */ +static struct cgroup_subsys_state * +xcu_css_alloc(struct cgroup_subsys_state *parent_css) +{ + struct xsched_group *parent_xg; + struct xsched_group *xg; + + if (!parent_css) + return &root_xsched_group.css; + + xg = kzalloc(sizeof(*xg), GFP_KERNEL); + if (!xg) + return ERR_PTR(-ENOMEM); + + mutex_lock(&xcg_mutex); + + parent_xg = xcu_cg_from_css(parent_css); + + xcu_cg_init(xg, parent_xg); + + mutex_unlock(&xcg_mutex); + return &xg->css; +} + +static void xcu_css_free(struct cgroup_subsys_state *css) +{ + struct xsched_group *xcg; + + mutex_lock(&xcg_mutex); + + xcg = xcu_cg_from_css(css); + + if (xcg->parent != NULL) { + switch (xcg->sched_type) { + case XSCHED_TYPE_CFS: + xcu_cfs_cg_deinit(xcg); + break; + default: + pr_debug("xcu_cgroup: deinit RT group css=0x%lx\n", (uintptr_t)&xcg->css); + break; + } + } + list_del(&xcg->group_node); + + mutex_unlock(&xcg_mutex); + + kfree(xcg); +} + +int xcu_css_online(struct cgroup_subsys_state *css) +{ + return 0; +} + +static void xcu_css_offline(struct cgroup_subsys_state *css) +{ + ; +} + +static void xsched_group_xse_attach(struct xsched_group *xg, + struct xsched_entity *xse) +{ + spin_lock(&xg->lock); + list_add_tail(&xse->group_node, &xg->members); + spin_unlock(&xg->lock); + xse->parent_grp = xg; +} + +void xsched_group_xse_detach(struct xsched_entity *xse) +{ + struct xsched_group *xcg = xse->parent_grp; + + spin_lock(&xcg->lock); + list_del(&xse->group_node); + spin_unlock(&xcg->lock); +} + +static int xcu_task_can_attach(struct task_struct *task, + struct xsched_group *old, + struct xsched_group *dst) +{ + struct xsched_entity *xse; + bool has_xse = false; + + spin_lock(&old->lock); + list_for_each_entry(xse, &old->members, group_node) { + if (xse->owner_pid == task_pid_nr(task)) { + has_xse = true; + break; + } + } + spin_unlock(&old->lock); + + return has_xse ? -EINVAL : 0; +} + +static int xcu_can_attach(struct cgroup_taskset *tset) +{ + struct task_struct *task; + struct cgroup_subsys_state *dst_css; + int ret = 0; + + mutex_lock(&xcg_mutex); + cgroup_taskset_for_each(task, dst_css, tset) { + struct cgroup_subsys_state *old_css = + task_css(task, xcu_cgrp_id); + struct xsched_group *dst_xcg = xcu_cg_from_css(dst_css); + + old_xcg = xcu_cg_from_css(old_css); + ret = xcu_task_can_attach(task, old_xcg, dst_xcg); + if (ret) + break; + } + if (!ret) + attach_in_progress = true; + mutex_unlock(&xcg_mutex); + return ret; +} + +static void xcu_cancel_attach(struct cgroup_taskset *tset) +{ + mutex_lock(&xcg_mutex); + attach_in_progress = false; + wake_up(&xcg_attach_wq); + mutex_unlock(&xcg_mutex); +} + +void xcu_move_task(struct task_struct *task, struct xsched_group *old_xcg, + struct xsched_group *new_xcg) +{ + struct xsched_entity *xse, *tmp; + struct xsched_cu *xcu; + + spin_lock(&old_xcg->lock); + list_for_each_entry_safe(xse, tmp, &old_xcg->members, group_node) { + if (xse->owner_pid == task_pid_nr(task)) { + xcu = xse->xcu; + + WARN_ON_ONCE(old_xcg != xse->parent_grp); + + /* delete from the old_xcg */ + list_del(&xse->group_node); + + mutex_lock(&xcu->xcu_lock); + + /* dequeue from the current runqueue */ + xse->class->dequeue_ctx(xse); + /* attach to the new_xcg */ + xsched_group_xse_attach(new_xcg, xse); + /* enqueue to the runqueue in new_xcg */ + enqueue_ctx(xse, xcu); + mutex_unlock(&xcu->xcu_lock); + } + } + spin_unlock(&old_xcg->lock); +} + +static void xcu_attach(struct cgroup_taskset *tset) +{ + struct task_struct *task; + struct cgroup_subsys_state *css; + + mutex_lock(&xcg_mutex); + cgroup_taskset_for_each(task, css, tset) { + xcu_move_task(task, old_xcg, xcu_cg_from_css(css)); + } + attach_in_progress = false; + wake_up(&xcg_attach_wq); + mutex_unlock(&xcg_mutex); +} + +/** + * xsched_group_inherit() - Attach new entity to task's xsched_group. + * @task: task_struct + * @xse: xsched entity + * + * Called in xsched context initialization to attach xse to task's group + * and inherit its xse scheduling class and bandwidth control policy. + * + * Return: Zero on success. + */ +int xsched_group_inherit(struct task_struct *task, struct xsched_entity *xse) +{ + struct cgroup_subsys_state *css; + struct xsched_group *xg; + +retry: + wait_event(xcg_attach_wq, !attach_in_progress); + mutex_lock(&xcg_mutex); + if (attach_in_progress) { + mutex_unlock(&xcg_mutex); + goto retry; + } + xse->owner_pid = task_pid_nr(task); + css = task_get_css(task, xcu_cgrp_id); + xg = xcu_cg_from_css(css); + xsched_group_xse_attach(xg, xse); + + css_put(css); + mutex_unlock(&xcg_mutex); + return 0; +} + +static int xcu_sched_show(struct seq_file *sf, void *v) +{ + struct cgroup_subsys_state *css = seq_css(sf); + struct xsched_group *xg = xcu_cg_from_css(css); + + seq_printf(sf, "%s\n", xcu_sched_name[xg->sched_type]); + return 0; +} + +/** + * xcu_cg_set_sched() - Set scheduling type for group. + * @xg: xsched group + * @type: scheduler type + * + * Scheduler type can be changed if task is child of root group + * and haven't got scheduling entities. + * + * Return: Zero on success or -EINVAL + */ +int xcu_cg_set_sched(struct xsched_group *xg, int type) +{ + if (type == xg->sched_type) + return 0; + + if (xg->parent != root_xcg || !list_empty(&xg->members)) + return -EINVAL; + + if (xg->sched_type == XSCHED_TYPE_CFS) + xcu_cfs_cg_deinit(xg); + + if (type == XSCHED_TYPE_CFS) { + xg->sched_type = XSCHED_TYPE_CFS; + xcu_cfs_cg_init(xg, xg->parent); + } + + xg->sched_type = type; + + return 0; +} + +static ssize_t xcu_sched_write(struct kernfs_open_file *of, char *buf, + size_t nbytes, loff_t off) +{ + struct cgroup_subsys_state *css = of_css(of); + struct xsched_group *xg = xcu_cg_from_css(css); + char type_name[4]; + int type = -1; + + ssize_t ret = sscanf(buf, "%3s", type_name); + + if (ret < 1) + return -EINVAL; + + for (type = 0; type < XSCHED_TYPE_NUM; type++) { + if (!strcmp(type_name, xcu_sched_name[type])) + break; + } + + if (type == XSCHED_TYPE_NUM) + return -EINVAL; + + if (!list_empty(&css->children)) + return -EINVAL; + + mutex_lock(&xcg_mutex); + ret = xcu_cg_set_sched(xg, type); + mutex_unlock(&xcg_mutex); + + return (ret) ? ret : nbytes; +} + +static s64 xcu_read_s64(struct cgroup_subsys_state *css, struct cftype *cft) +{ + s64 ret = 0; + struct xsched_group *xcucg = xcu_cg_from_css(css); + + spin_lock(&xcucg->lock); + switch (cft->private) { + case XCU_FILE_SHARES: + ret = xcucg->shares_cfg; + break; + default: + break; + } + spin_unlock(&xcucg->lock); + return ret; +} + +static inline u64 gcd(u64 a, u64 b) +{ + while (a != 0 && b != 0) { + if (a > b) + a %= b; + else + b %= a; + } + return (a) ? a : b; +} + +void xcu_grp_shares_update(struct xsched_group *xg) +{ + int id; + struct xsched_cu *xcu; + struct xsched_group *xgi, *parent = xg; + u64 sh_sum = 0, sh_gcd = 0, w_gcd = 0, sh_prod_red = 1; + + spin_lock(&parent->lock); + + list_for_each_entry((xgi), &(parent)->children_groups, group_node) { + if ((xgi)->sched_type == XSCHED_TYPE_CFS) + sh_gcd = gcd(sh_gcd, xgi->shares_cfg); + } + + list_for_each_entry((xgi), &(parent)->children_groups, group_node) { + if ((xgi)->sched_type == XSCHED_TYPE_CFS) { + sh_sum += xgi->shares_cfg; + xgi->shares_cfg_red = div_u64(xgi->shares_cfg, sh_gcd); + + if ((sh_prod_red % xgi->shares_cfg_red) != 0) + sh_prod_red *= xgi->shares_cfg_red; + } + } + + parent->children_shares_sum = sh_sum; + + list_for_each_entry((xgi), &(parent)->children_groups, group_node) { + if ((xgi)->sched_type == XSCHED_TYPE_CFS) { + xgi->weight = div_u64(sh_prod_red, xgi->shares_cfg_red); + w_gcd = gcd(w_gcd, xgi->weight); + } + } + + list_for_each_entry((xgi), &(parent)->children_groups, group_node) { + if ((xgi)->sched_type == XSCHED_TYPE_CFS) { + xgi->weight = div_u64(xgi->weight, w_gcd); + for_each_active_xcu(xcu, id) { + mutex_lock(&xcu->xcu_lock); + xgi->perxcu_priv[id].xse.cfs.weight = xgi->weight; + mutex_unlock(&xcu->xcu_lock); + } + } + } + + spin_unlock(&parent->lock); +} + +static int xcu_write_s64(struct cgroup_subsys_state *css, struct cftype *cft, + s64 val) +{ + int ret = 0; + struct xsched_group *xcucg = xcu_cg_from_css(css); + + spin_lock(&xcucg->lock); + switch (cft->private) { + case XCU_FILE_SHARES: + if (val <= 0) { + ret = -EINVAL; + break; + } + xcucg->shares_cfg = val; + xcu_grp_shares_update(xcucg->parent); + break; + default: + ret = -EINVAL; + break; + } + spin_unlock(&xcucg->lock); + + return ret; +} + +static int xcu_stat(struct seq_file *sf, void *v) +{ + struct cgroup_subsys_state *css = seq_css(sf); + struct xsched_group *xcucg = xcu_cg_from_css(css); + + u64 nr_throttled = 0; + u64 throttled_time = 0; + u64 exec_runtime = 0; + + int xcu_id; + struct xsched_cu *xcu; + + if (xcucg->sched_type == XSCHED_TYPE_RT) { + seq_puts(sf, "RT group stat is not supported\n"); + return 0; + } + + for_each_active_xcu(xcu, xcu_id) { + nr_throttled += xcucg->perxcu_priv[xcu_id].nr_throttled; + throttled_time += xcucg->perxcu_priv[xcu_id].throttled_time; + exec_runtime += + xcucg->perxcu_priv[xcu_id].xse.cfs.sum_exec_runtime; + } + + seq_printf(sf, "exec_runtime: %llu\n", exec_runtime); + seq_printf(sf, "shares cfg: %llu/%llu x%u\n", xcucg->shares_cfg, + xcucg->parent->children_shares_sum, xcucg->weight); + + return 0; +} + +static struct cftype xcu_cg_files[] = { + { + .name = "shares", + .flags = CFTYPE_NOT_ON_ROOT, + .read_s64 = xcu_read_s64, + .write_s64 = xcu_write_s64, + .private = XCU_FILE_SHARES, + }, + { + .name = "stat", + .seq_show = xcu_stat, + }, + { + .name = "sched", + .flags = CFTYPE_NOT_ON_ROOT, + .seq_show = xcu_sched_show, + .write = xcu_sched_write, + }, + {} /* terminate */ +}; + +struct cgroup_subsys xcu_cgrp_subsys = { + .css_alloc = xcu_css_alloc, + .css_online = xcu_css_online, + .css_offline = xcu_css_offline, + .css_free = xcu_css_free, + .can_attach = xcu_can_attach, + .cancel_attach = xcu_cancel_attach, + .attach = xcu_attach, + .dfl_cftypes = xcu_cg_files, + .legacy_cftypes = xcu_cg_files, + .early_init = false, +}; diff --git a/kernel/xsched/core.c b/kernel/xsched/core.c index 5d96a22337fa..b28a11909fd6 100644 --- a/kernel/xsched/core.c +++ b/kernel/xsched/core.c @@ -49,6 +49,8 @@ static void put_prev_ctx(struct xsched_entity *xse) lockdep_assert_held(&xcu->xcu_lock); xse->class->put_prev_ctx(xse); + XSCHED_INFO("Put current xse %d sum_exec_runtime %llu @ %s\n", + xse->tgid, xse->cfs.sum_exec_runtime, __func__); xse->last_process_time = 0; @@ -214,6 +216,8 @@ static int delete_ctx(struct xsched_context *ctx) XSCHED_INFO("Deleting ctx %d, pending kicks left=%d @ %s\n", xse->tgid, atomic_read(&xse->kicks_pending_ctx_cnt), __func__); + xsched_group_xse_detach(xse); + XSCHED_EXIT_STUB(); return 0; @@ -403,7 +407,10 @@ struct xsched_cu *xcu_find(__u32 *type, __u32 devId, __u32 channel_id) int xsched_xse_set_class(struct xsched_entity *xse) { - switch (xse->task_type) { +#ifdef CONFIG_CGROUP_XCU + xsched_group_inherit(current, xse); +#endif + switch (xse->parent_grp->sched_type) { case XSCHED_TYPE_RT: xse->class = &rt_xsched_class; XSCHED_INFO("Context is in RT class %s\n", __func__); @@ -428,7 +435,8 @@ int xsched_ctx_init_xse(struct xsched_context *ctx, struct vstream_info *vs) atomic_set(&xse->kicks_pending_ctx_cnt, 0); atomic_set(&xse->kicks_submited, 0); - xse->task_type = XSCHED_TYPE_RT; + + xse->task_type = GET_VS_TASK_TYPE(vs); xse->last_process_time = 0; xse->fd = ctx->fd; @@ -836,6 +844,7 @@ int __init xsched_init(void) /* Initializing global Xsched context list. */ INIT_LIST_HEAD(&xsched_ctx_list); + xcu_cg_init_common(root_xcg); return 0; } -- 2.34.1