From: Tejun Heo tj@kernel.org
mainline inclusion from mainline-v5.16-9 commit e57457641613fef0d147ede8bd6a3047df588b95 category: bugfix bugzilla: NA CVE: CVE-2021-4197
------------------------------------------------------------------------
cgroup process migration permission checks are performed at write time as whether a given operation is allowed or not is dependent on the content of the write - the PID. This currently uses current's cgroup namespace which is a potential security weakness as it may allow scenarios where a less privileged process tricks a more privileged one into writing into a fd that it created.
This patch makes cgroup remember the cgroup namespace at the time of open and uses it for migration permission checks instad of current's. Note that this only applies to cgroup2 as cgroup1 doesn't have namespace support.
This also fixes a use-after-free bug on cgroupns reported in
https://lore.kernel.org/r/00000000000048c15c05d0083397@google.com
Note that backporting this fix also requires the preceding patch.
Reported-by: "Eric W. Biederman" ebiederm@xmission.com Suggested-by: Linus Torvalds torvalds@linuxfoundation.org Cc: Michal Koutný mkoutny@suse.com Cc: Oleg Nesterov oleg@redhat.com Reviewed-by: Michal Koutný mkoutny@suse.com Reported-by: syzbot+50f5cf33a284ce738b62@syzkaller.appspotmail.com Link: https://lore.kernel.org/r/00000000000048c15c05d0083397@google.com Fixes: 5136f6365ce3 ("cgroup: implement "nsdelegate" mount option") Signed-off-by: Tejun Heo tj@kernel.org Conflicts: kernel/cgroup/cgroup-internal.h kernel/cgroup/cgroup.c Signed-off-by: Lu Jialin lujialin4@huawei.com Reviewed-by: weiyang wang wangweiyang2@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- kernel/cgroup/cgroup-internal.h | 2 ++ kernel/cgroup/cgroup.c | 24 +++++++++++++++++------- 2 files changed, 19 insertions(+), 7 deletions(-)
diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h index 88bc8352ae9f3..c52883e5cb8a4 100644 --- a/kernel/cgroup/cgroup-internal.h +++ b/kernel/cgroup/cgroup-internal.h @@ -37,6 +37,8 @@ extern char trace_cgroup_path[TRACE_CGROUP_PATH_LEN]; struct cgroup_pidlist;
struct cgroup_file_ctx { + struct cgroup_namespace *ns; + struct { bool started; struct css_task_iter iter; diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index e6c77e3df1041..7f4dab2a32360 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -3460,14 +3460,19 @@ static int cgroup_file_open(struct kernfs_open_file *of) ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) return -ENOMEM; + + ctx->ns = current->nsproxy->cgroup_ns; + get_cgroup_ns(ctx->ns); of->priv = ctx;
if (!cft->open) return 0;
ret = cft->open(of); - if (ret) + if (ret) { + put_cgroup_ns(ctx->ns); kfree(ctx); + } return ret; }
@@ -3478,13 +3483,14 @@ static void cgroup_file_release(struct kernfs_open_file *of)
if (cft->release) cft->release(of); + put_cgroup_ns(ctx->ns); kfree(ctx); }
static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { - struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; + struct cgroup_file_ctx *ctx = of->priv; struct cgroup *cgrp = of->kn->parent->priv; struct cftype *cft = of->kn->priv; struct cgroup_subsys_state *css; @@ -3498,7 +3504,7 @@ static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf, */ if ((cgrp->root->flags & CGRP_ROOT_NS_DELEGATE) && !(cft->flags & CFTYPE_NS_DELEGATABLE) && - ns != &init_cgroup_ns && ns->root_cset->dfl_cgrp == cgrp) + ctx->ns != &init_cgroup_ns && ctx->ns->root_cset->dfl_cgrp == cgrp) return -EPERM;
if (cft->write) @@ -4460,9 +4466,9 @@ static int cgroup_procs_show(struct seq_file *s, void *v)
static int cgroup_procs_write_permission(struct cgroup *src_cgrp, struct cgroup *dst_cgrp, - struct super_block *sb) + struct super_block *sb, + struct cgroup_namespace *ns) { - struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; struct cgroup *com_cgrp = src_cgrp; struct inode *inode; int ret; @@ -4498,6 +4504,7 @@ static int cgroup_procs_write_permission(struct cgroup *src_cgrp, static ssize_t cgroup_procs_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { + struct cgroup_file_ctx *ctx = of->priv; struct cgroup *src_cgrp, *dst_cgrp; struct task_struct *task; const struct cred *saved_cred; @@ -4525,7 +4532,8 @@ static ssize_t cgroup_procs_write(struct kernfs_open_file *of,
saved_cred = override_creds(of->file->f_cred); ret = cgroup_procs_write_permission(src_cgrp, dst_cgrp, - of->file->f_path.dentry->d_sb); + of->file->f_path.dentry->d_sb, + ctx->ns); revert_creds(saved_cred); if (ret) goto out_finish; @@ -4548,6 +4556,7 @@ static void *cgroup_threads_start(struct seq_file *s, loff_t *pos) static ssize_t cgroup_threads_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { + struct cgroup_file_ctx *ctx = of->priv; struct cgroup *src_cgrp, *dst_cgrp; struct task_struct *task; const struct cred *saved_cred; @@ -4577,7 +4586,8 @@ static ssize_t cgroup_threads_write(struct kernfs_open_file *of,
saved_cred = override_creds(of->file->f_cred); ret = cgroup_procs_write_permission(src_cgrp, dst_cgrp, - of->file->f_path.dentry->d_sb); + of->file->f_path.dentry->d_sb, + ctx->ns); revert_creds(saved_cred); if (ret) goto out_finish;