From: Waiman Long longman@redhat.com
mainline inclusion from mainline-v6.11-rc commit 737bb142a00d53de3743ae389732721b3b9f0191 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IAGRJD CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
----------------------------------------------------------------------
The "cpuset.cpus.exclusive.effective" value is currently limited to a subset of its "cpuset.cpus". This makes the exclusive CPUs distribution hierarchy subsumed within the larger "cpuset.cpus" hierarchy. We have to decide on what CPUs are used locally and what CPUs can be passed down as exclusive CPUs down the hierarchy and combine them into "cpuset.cpus".
The advantage of the current scheme is to have only one hierarchy to worry about. However, it make it harder to use as all the "cpuset.cpus" values have to be properly set along the way down to the designated remote partition root. It also makes it more cumbersome to find out what CPUs can be used locally.
Make creation of remote partition simpler by breaking the dependency of "cpuset.cpus.exclusive" on "cpuset.cpus" and make them independent entities. Now we have two separate hierarchies - one for setting "cpuset.cpus.effective" and the other one for setting "cpuset.cpus.exclusive.effective". We may not need to set "cpuset.cpus" when we activate a partition root anymore.
Also update Documentation/admin-guide/cgroup-v2.rst and cpuset.c comment to document this change.
Suggested-by: Petr Malat oss@malat.biz Signed-off-by: Waiman Long longman@redhat.com Signed-off-by: Tejun Heo tj@kernel.org
Conflicts: kernel/cgroup/cpuset.c [isolated was not backported] Signed-off-by: Chen Ridong chenridong@huawei.com --- Documentation/admin-guide/cgroup-v2.rst | 4 +- kernel/cgroup/cpuset.c | 67 +++++++++++++++++-------- 2 files changed, 49 insertions(+), 22 deletions(-)
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index 69905668e06f..86e294970484 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -2305,8 +2305,8 @@ Cpuset Interface Files cpuset-enabled cgroups.
This file shows the effective set of exclusive CPUs that - can be used to create a partition root. The content of this - file will always be a subset of "cpuset.cpus" and its parent's + can be used to create a partition root. The content + of this file will always be a subset of its parent's "cpuset.cpus.exclusive.effective" if its parent is not the root cgroup. It will also be a subset of "cpuset.cpus.exclusive" if it is set. If "cpuset.cpus.exclusive" is not set, it is diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 26e95916ee8e..52ef54b1b268 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -86,7 +86,7 @@ static const char * const perr_strings[] = { [PERR_NOTEXCL] = "Cpu list in cpuset.cpus not exclusive", [PERR_NOCPUS] = "Parent unable to distribute cpu downstream", [PERR_HOTPLUG] = "No cpu available due to hotplug", - [PERR_CPUSEMPTY] = "cpuset.cpus is empty", + [PERR_CPUSEMPTY] = "cpuset.cpus and cpuset.cpus.exclusive are empty", [PERR_HKEEPING] = "partition config conflicts with housekeeping setup", };
@@ -129,19 +129,28 @@ struct cpuset { /* * Exclusive CPUs dedicated to current cgroup (default hierarchy only) * - * This exclusive CPUs must be a subset of cpus_allowed. A parent - * cgroup can only grant exclusive CPUs to one of its children. + * The effective_cpus of a valid partition root comes solely from its + * effective_xcpus and some of the effective_xcpus may be distributed + * to sub-partitions below & hence excluded from its effective_cpus. + * For a valid partition root, its effective_cpus have no relationship + * with cpus_allowed unless its exclusive_cpus isn't set. * - * When the cgroup becomes a valid partition root, effective_xcpus - * defaults to cpus_allowed if not set. The effective_cpus of a valid - * partition root comes solely from its effective_xcpus and some of the - * effective_xcpus may be distributed to sub-partitions below & hence - * excluded from its effective_cpus. + * This value will only be set if either exclusive_cpus is set or + * when this cpuset becomes a local partition root. */ cpumask_var_t effective_xcpus;
/* * Exclusive CPUs as requested by the user (default hierarchy only) + * + * Its value is independent of cpus_allowed and designates the set of + * CPUs that can be granted to the current cpuset or its children when + * it becomes a valid partition root. The effective set of exclusive + * CPUs granted (effective_xcpus) depends on whether those exclusive + * CPUs are passed down by its ancestors and not yet taken up by + * another sibling partition root along the way. + * + * If its value isn't set, it defaults to cpus_allowed. */ cpumask_var_t exclusive_cpus;
@@ -232,6 +241,17 @@ static struct list_head remote_children; * 2 - partition root without load balancing (isolated) * -1 - invalid partition root * -2 - invalid isolated partition root + * + * There are 2 types of partitions - local or remote. Local partitions are + * those whose parents are partition root themselves. Setting of + * cpuset.cpus.exclusive are optional in setting up local partitions. + * Remote partitions are those whose parents are not partition roots. Passing + * down exclusive CPUs by setting cpuset.cpus.exclusive along its ancestor + * nodes are mandatory in creating a remote partition. + * + * For simplicity, a local partition can be created under a local or remote + * partition but a remote partition cannot have any partition root in its + * ancestor chain except the cgroup root. */ #define PRS_MEMBER 0 #define PRS_ROOT 1 @@ -740,6 +760,19 @@ static inline void free_cpuset(struct cpuset *cs) kfree(cs); }
+/* Return user specified exclusive CPUs */ +static inline struct cpumask *user_xcpus(struct cpuset *cs) +{ + return cpumask_empty(cs->exclusive_cpus) ? cs->cpus_allowed + : cs->exclusive_cpus; +} + +static inline bool xcpus_empty(struct cpuset *cs) +{ + return cpumask_empty(cs->cpus_allowed) && + cpumask_empty(cs->exclusive_cpus); +} + static inline struct cpumask *fetch_xcpus(struct cpuset *cs) { return !cpumask_empty(cs->exclusive_cpus) ? cs->exclusive_cpus : @@ -1552,7 +1585,7 @@ static void reset_partition_data(struct cpuset *cs) * Return: true if xcpus is not empty, false otherwise. * * Starting with exclusive_cpus (cpus_allowed if exclusive_cpus is not set), - * it must be a subset of cpus_allowed and parent's effective_xcpus. + * it must be a subset of parent's effective_xcpus. */ static bool compute_effective_exclusive_cpumask(struct cpuset *cs, struct cpumask *xcpus) @@ -1562,12 +1595,7 @@ static bool compute_effective_exclusive_cpumask(struct cpuset *cs, if (!xcpus) xcpus = cs->effective_xcpus;
- if (!cpumask_empty(cs->exclusive_cpus)) - cpumask_and(xcpus, cs->exclusive_cpus, cs->cpus_allowed); - else - cpumask_copy(xcpus, cs->cpus_allowed); - - return cpumask_and(xcpus, xcpus, parent->effective_xcpus); + return cpumask_and(xcpus, user_xcpus(cs), parent->effective_xcpus); }
static inline bool is_remote_partition(struct cpuset *cs) @@ -1853,8 +1881,7 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd, */ adding = deleting = false; old_prs = new_prs = cs->partition_root_state; - xcpus = !cpumask_empty(cs->exclusive_cpus) - ? cs->effective_xcpus : cs->cpus_allowed; + xcpus = user_xcpus(cs);
if (cmd == partcmd_invalidate) { if (is_prs_invalid(old_prs)) @@ -1882,7 +1909,7 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd, return is_partition_invalid(parent) ? PERR_INVPARENT : PERR_NOTPART; } - if (!newmask && cpumask_empty(cs->cpus_allowed)) + if (!newmask && xcpus_empty(cs)) return PERR_CPUSEMPTY;
nocpu = tasks_nocpu_error(parent, cs, xcpus); @@ -3100,9 +3127,9 @@ static int update_prstate(struct cpuset *cs, int new_prs)
if (!old_prs) { /* - * cpus_allowed cannot be empty. + * cpus_allowed and exclusive_cpus cannot be both empty. */ - if (cpumask_empty(cs->cpus_allowed)) { + if (xcpus_empty(cs)) { err = PERR_CPUSEMPTY; goto out; }