hulk inclusion category: bugfix bugzilla: https://atomgit.com/openeuler/kernel/issues/8424 ---------------------------------------- Adding and enabling a new subsystem would increment CGROUP_SUBSYS_COUNT, which leads to Kernel ABI (KABI) breakage. This patch introduces a mechanism to share the same SUBSYS(dmem) slot between the 'dmem' and 'devices' subsystems. Since 'dmem' is a cgroup v2-only controller and 'devices' is a cgroup v1-only controller, they are mutually exclusive at runtime. We introduce a new kernel command line parameter, "dmem", to control this behavior dynamically. This approach allows us to enable both CONFIG_CGROUP_DMEM and CONFIG_CGROUP_DEVICE simultaneously without exceeding the subsystem limit. The behavior based on the "dmem" cmdline parameter is as follows: 1. dmem=disable, cgroup v1: - The legacy 'devices' subsystem is active and functional. - The 'dmem' subsystem remains dormant. 2. dmem=enable, cgroup v1: - The 'devices' subsystem is effectively disabled/blocked. 3. dmem=disable, cgroup v2: - The 'dmem' subsystem is blocked in the hierarchy. 4. dmem=enable, cgroup v2: - The 'dmem' subsystem is active and usable. - The 'devices' logic is bypassed. This ensures backward compatibility for v1 users while enabling the new functionality for v2, all within the constraints of the kernel subsystem limit. Fixes: b168ed458dde ("kernel/cgroup: Add "dmem" memory accounting cgroup") Signed-off-by: Liu Kai <liukai284@huawei.com> --- include/linux/cgroup_dmem.h | 5 ++ include/linux/cgroup_subsys.h | 8 +- include/linux/device_cgroup.h | 20 +++++ kernel/cgroup/cgroup.c | 2 +- kernel/cgroup/dmem.c | 146 +++++++++++++++++++++++++++++++++- security/device_cgroup.c | 63 +++++++++++++-- 6 files changed, 229 insertions(+), 15 deletions(-) diff --git a/include/linux/cgroup_dmem.h b/include/linux/cgroup_dmem.h index dd4869f1d736..b86ca6012516 100644 --- a/include/linux/cgroup_dmem.h +++ b/include/linux/cgroup_dmem.h @@ -63,4 +63,9 @@ static inline void dmem_cgroup_pool_state_put(struct dmem_cgroup_pool_state *poo { } #endif + +#ifdef CONFIG_CGROUP_DEVICE +bool dmem_cgroup_enabled(void); +#endif + #endif /* _CGROUP_DMEM_H */ diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h index be825ed8aba3..f6da5531a0ed 100644 --- a/include/linux/cgroup_subsys.h +++ b/include/linux/cgroup_subsys.h @@ -29,7 +29,9 @@ SUBSYS(io) SUBSYS(memory) #endif -#if IS_ENABLED(CONFIG_CGROUP_DEVICE) +#if IS_ENABLED(CONFIG_CGROUP_DMEM) +SUBSYS(dmem) +#elif IS_ENABLED(CONFIG_CGROUP_DEVICE) SUBSYS(devices) #endif @@ -67,10 +69,6 @@ SUBSYS(rdma) SUBSYS(misc) #endif -#if IS_ENABLED(CONFIG_CGROUP_DMEM) -SUBSYS(dmem) -#endif - /* * The following subsystems are not supported on the default hierarchy. */ diff --git a/include/linux/device_cgroup.h b/include/linux/device_cgroup.h index d02f32b7514e..aaaac1d99232 100644 --- a/include/linux/device_cgroup.h +++ b/include/linux/device_cgroup.h @@ -10,6 +10,9 @@ #define DEVCG_DEV_CHAR 2 #define DEVCG_DEV_ALL 4 /* this represents all devices */ +#ifdef CONFIG_CGROUP_DMEM +#include <linux/cgroup-defs.h> +#endif #if defined(CONFIG_CGROUP_DEVICE) || defined(CONFIG_CGROUP_BPF) int devcgroup_check_permission(short type, u32 major, u32 minor, @@ -65,3 +68,20 @@ static inline int devcgroup_inode_permission(struct inode *inode, int mask) static inline int devcgroup_inode_mknod(int mode, dev_t dev) { return 0; } #endif + +/* + * When CONFIG_CGROUP_DMEM is enabled, cpuacct_cgrp_subsys and dmem_cgrp_subsys + * share the same set of cgroup_subsys hook functions. Consequently, the hooks for + * cpuacct_cgrp_subsys must be exposed externally to allow linkage with the dmem + * cgroup_subsys. + */ +#ifdef CONFIG_CGROUP_DMEM +#define devices_cgrp_id dmem_cgrp_id + +int devcgroup_online(struct cgroup_subsys_state *css); +void devcgroup_offline(struct cgroup_subsys_state *css); +struct cgroup_subsys_state * +devcgroup_css_alloc(struct cgroup_subsys_state *parent_css); +void devcgroup_css_free(struct cgroup_subsys_state *css); +extern struct cftype dev_cgroup_files[]; +#endif diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 17521bc192ee..04301432e84a 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -6256,7 +6256,7 @@ int __init cgroup_init(void) struct cgroup_subsys *ss; int ssid; - BUILD_BUG_ON(CGROUP_SUBSYS_COUNT > 17); + BUILD_BUG_ON(CGROUP_SUBSYS_COUNT > 16); BUG_ON(cgroup_init_cftypes(NULL, cgroup_base_files)); BUG_ON(cgroup_init_cftypes(NULL, cgroup_psi_files)); BUG_ON(cgroup_init_cftypes(NULL, cgroup1_base_files)); diff --git a/kernel/cgroup/dmem.c b/kernel/cgroup/dmem.c index 4cc33e4d8257..ccc7a6c50920 100644 --- a/kernel/cgroup/dmem.c +++ b/kernel/cgroup/dmem.c @@ -17,6 +17,75 @@ #include <linux/refcount.h> #include <linux/slab.h> +#ifdef CONFIG_CGROUP_DEVICE +#include <linux/device_cgroup.h> +#endif + +static struct cgroup_subsys_state * +dmem_devices_compat_css_alloc(struct cgroup_subsys_state *parent_css); +static void dmem_devices_compat_css_free(struct cgroup_subsys_state *css); +static int dmem_devices_compat_css_online(struct cgroup_subsys_state *css); +static void dmem_devices_compat_css_offline(struct cgroup_subsys_state *css); + +/** + * Parse the "dmem=" kernel command line parameter. + * + * Usage: + * dmem=enable → enable dmem_cgrp_subsys + * Otherwise → enable devices_cgrp_subsys + * + * Returns: + * 1 (handled), 0 (not handled) + */ +static DEFINE_STATIC_KEY_FALSE(dmem_cgroup_key); + +static int __init dmem_setup(char *str) +{ + if (!str) + return 0; + + if (strcmp(str, "enable") == 0) + static_branch_enable(&dmem_cgroup_key); + + return 1; +} +__setup("dmem=", dmem_setup); + +bool dmem_cgroup_enabled(void) +{ + return static_branch_unlikely(&dmem_cgroup_key); +} + +/** + * dmem_cgroup_check_compat - Verify Dmem mode matches the cgroup hierarchy version. + * + * Validates that the Dmem enablement state matches the active cgroup hierarchy + * version when CONFIG_CGROUP_DMEM and CONFIG_CGROUP_DEVICE are both enabeld. + * The compatibility rules are strictly defined as: + * + * 1. Dmem Enabled (dmem=enable): Requires cgroup v2. + * 2. Dmem Disabled (default): Requires cgroup v1. + * + * Return: true if compatible, false otherwise (with a warning logged). + */ +static bool dmem_cgroup_check_compat(void) +{ +#ifdef CONFIG_CGROUP_DEVICE + bool dmem_mode = dmem_cgroup_enabled(); + + if (dmem_mode != cgroup_subsys_on_dfl(dmem_cgrp_subsys)) { + pr_warn("DMEM cgrp is incompatible with the cgroup version\n"); + return false; + } +#endif + + /* + * With only CONFIG_CGROUP_DMEM enabled, dmem is toggled via the command line. + * Skip the cgroup version check as it is not required. + */ + return true; +} + struct dmem_cgroup_region { /** * @ref: References keeping the region alive. @@ -877,9 +946,80 @@ static struct cftype files[] = { }; struct cgroup_subsys dmem_cgrp_subsys = { - .css_alloc = dmemcs_alloc, - .css_free = dmemcs_free, - .css_offline = dmemcs_offline, + .css_alloc = dmem_devices_compat_css_alloc, + .css_free = dmem_devices_compat_css_free, + .css_offline = dmem_devices_compat_css_offline, + .css_online = dmem_devices_compat_css_online, +#ifdef CONFIG_CGROUP_DEVICE + .legacy_cftypes = dev_cgroup_files, + .legacy_name = "devices", +#else .legacy_cftypes = files, +#endif .dfl_cftypes = files, }; + +/** + * The hooks of a cgroup_subsys are only invoked after a successful allocation. + * Therefore, we must handle all error conditions during the alloc phase to + * prevent invalid usage later. + * + * 1. If both CONFIG_CGROUP_DMEM and CONFIG_CGROUP_DEVICE are enabled: + * We verify that the dmem cmdline and cgroup version are compatible. + * If they do not match, return -EPERM. + * + * 2. If only CONFIG_CGROUP_DMEM is enabled: + * We verify the dmem cmdline. If the dmem is not enabled, + * return -EPERM to restrict access. + */ +static struct cgroup_subsys_state * +dmem_devices_compat_css_alloc(struct cgroup_subsys_state *parent_css) +{ + /* Skip allocation if DMEM cmdline mismatches the cgroup version. */ + if (parent_css && !dmem_cgroup_check_compat()) + return ERR_PTR(-EPERM); + + if (dmem_cgroup_enabled()) + return dmemcs_alloc(parent_css); + +#ifdef CONFIG_CGROUP_DEVICE + return devcgroup_css_alloc(parent_css); +#else /* CONFIG_CGROUP_DEVICE=n dmem=disable */ + if (!parent_css) + return dmemcs_alloc(parent_css); + else + return ERR_PTR(-EPERM); +#endif +} + +static void dmem_devices_compat_css_free(struct cgroup_subsys_state *css) +{ + if (dmem_cgroup_enabled()) + return dmemcs_free(css); + +#ifdef CONFIG_CGROUP_DEVICE + return devcgroup_css_free(css); +#endif +} + +static int dmem_devices_compat_css_online(struct cgroup_subsys_state *css) +{ + if (dmem_cgroup_enabled()) + return 0; + +#ifdef CONFIG_CGROUP_DEVICE + return devcgroup_online(css); +#else + return 0; +#endif +} + +static void dmem_devices_compat_css_offline(struct cgroup_subsys_state *css) +{ + if (dmem_cgroup_enabled()) + return dmemcs_offline(css); + +#ifdef CONFIG_CGROUP_DEVICE + return devcgroup_offline(css); +#endif +} diff --git a/security/device_cgroup.c b/security/device_cgroup.c index dc4df7475081..6bae6efefd9c 100644 --- a/security/device_cgroup.c +++ b/security/device_cgroup.c @@ -16,6 +16,14 @@ #include <linux/rcupdate.h> #include <linux/mutex.h> +#ifdef CONFIG_CGROUP_DMEM +#include <linux/cgroup_dmem.h> + +#define STATIC +#else +#define STATIC static +#endif + #ifdef CONFIG_CGROUP_DEVICE static DEFINE_MUTEX(devcgroup_mutex); @@ -185,7 +193,7 @@ static inline bool is_devcg_online(const struct dev_cgroup *devcg) * @css: css getting online * returns 0 in case of success, error code otherwise */ -static int devcgroup_online(struct cgroup_subsys_state *css) +STATIC int devcgroup_online(struct cgroup_subsys_state *css) { struct dev_cgroup *dev_cgroup = css_to_devcgroup(css); struct dev_cgroup *parent_dev_cgroup = css_to_devcgroup(css->parent); @@ -206,7 +214,7 @@ static int devcgroup_online(struct cgroup_subsys_state *css) return ret; } -static void devcgroup_offline(struct cgroup_subsys_state *css) +STATIC void devcgroup_offline(struct cgroup_subsys_state *css) { struct dev_cgroup *dev_cgroup = css_to_devcgroup(css); @@ -218,7 +226,7 @@ static void devcgroup_offline(struct cgroup_subsys_state *css) /* * called from kernel/cgroup/cgroup.c with cgroup_lock() held. */ -static struct cgroup_subsys_state * +STATIC struct cgroup_subsys_state * devcgroup_css_alloc(struct cgroup_subsys_state *parent_css) { struct dev_cgroup *dev_cgroup; @@ -232,7 +240,7 @@ devcgroup_css_alloc(struct cgroup_subsys_state *parent_css) return &dev_cgroup->css; } -static void devcgroup_css_free(struct cgroup_subsys_state *css) +STATIC void devcgroup_css_free(struct cgroup_subsys_state *css) { struct dev_cgroup *dev_cgroup = css_to_devcgroup(css); @@ -280,10 +288,20 @@ static void set_majmin(char *str, unsigned m) static int devcgroup_seq_show(struct seq_file *m, void *v) { - struct dev_cgroup *devcgroup = css_to_devcgroup(seq_css(m)); + struct dev_cgroup *devcgroup; struct dev_exception_item *ex; char maj[MAJMINLEN], min[MAJMINLEN], acc[ACCLEN]; +#ifdef CONFIG_CGROUP_DMEM + /* + * If dmem cgroup is enabled, it supersedes the legacy devices cgroup. + * Skip showing the legacy interface to avoid displaying conflicting info. + */ + if (dmem_cgroup_enabled()) + return -EPERM; +#endif + + devcgroup = css_to_devcgroup(seq_css(m)); rcu_read_lock(); /* * To preserve the compatibility: @@ -786,6 +804,15 @@ static ssize_t devcgroup_access_write(struct kernfs_open_file *of, { int retval; +#ifdef CONFIG_CGROUP_DMEM + /* + * If dmem cgroup is enabled, it supersedes the legacy devices cgroup. + * Reject operations to this interface with -EPERM to prevent conflicts. + */ + if (dmem_cgroup_enabled()) + return -EPERM; +#endif + mutex_lock(&devcgroup_mutex); retval = devcgroup_update_access(css_to_devcgroup(of_css(of)), of_cft(of)->private, strstrip(buf)); @@ -793,7 +820,7 @@ static ssize_t devcgroup_access_write(struct kernfs_open_file *of, return retval ?: nbytes; } -static struct cftype dev_cgroup_files[] = { +STATIC struct cftype dev_cgroup_files[] = { { .name = "allow", .write = devcgroup_access_write, @@ -812,6 +839,14 @@ static struct cftype dev_cgroup_files[] = { { } /* terminate */ }; +/* + * CRITICAL: Devices and Dmem may share the same SUBSYS slot when + * CONFIG_CGROUP_DEVICE and CONFIG_CGROUP_DMEM are both set. + * + * If the hook functions in the devices cgroup_subsys changes, you MUST + * synchronize those changes to the Dmem subsystem (kernel/cgroup/dmem.c) + * immediately. Failure to do so will result in inconsistencies or crashes. + */ struct cgroup_subsys devices_cgrp_subsys = { .css_alloc = devcgroup_css_alloc, .css_free = devcgroup_css_free, @@ -835,6 +870,22 @@ static int devcgroup_legacy_check_permission(short type, u32 major, u32 minor, struct dev_cgroup *dev_cgroup; bool rc; +#ifdef CONFIG_CGROUP_DMEM + /* + * If both CONFIG_CGROUP_DMEM and CONFIG_CGROUP_DEVICE are enabled, + * they share the same SUBSYS slot. + * + * When dmem cgroup is enabled, it takes precedence and replaces the + * devices cgroup functionality. This is effectively equivalent to + * CONFIG_CGROUP_DEVICE=n. + * + * Therefore, we bypass the legacy permission check and return 0 directly + * to allow the operation. + */ + if (dmem_cgroup_enabled()) + return 0; +#endif + rcu_read_lock(); dev_cgroup = task_devcgroup(current); if (dev_cgroup->behavior == DEVCG_DEFAULT_ALLOW) -- 2.34.1