hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I9PVOZ
--------------------------------
Found a deadlock issue in mpam which is already fixed in x86 resctrl by commit 334b0f4e9b1b ("x86/resctrl: Fix a deadlock due to inaccurate reference").
There is a race condition which results in a deadlock when rmdir and mkdir concurrently:
Thread 1: rmdir /sys/fs/resctrl/p1 Thread 2: mkdir /sys/fs/resctrl/p1/mon_groups/m1
Thread 1 is deleting control group "p1". Holding rdtgroup_mutex, kernfs_remove() removes all kernfs nodes under directory "p1" recursively, then waits for sub kernfs node "mon_groups" to drop active reference.
Thread 2 is trying to create a subdirectory "m1" in the "mon_groups" directory. The wrapper kernfs_iop_mkdir() takes an active reference to the "mon_groups" directory but the code drops the active reference to the parent directory "p1" instead.
As a result, Thread 1 is blocked on waiting for active reference to drop and never release rdtgroup_mutex, while Thread 2 is also blocked on trying to get rdtgroup_mutex.
Thread 1 (rdtgroup_rmdir) Thread 2 (rdtgroup_mkdir) (rmdir /sys/fs/resctrl/p1) (mkdir /sys/fs/resctrl/p1/mon_groups/m1) ------------------------- ------------------------- kernfs_iop_mkdir /* * kn: "m1", parent_kn: "mon_groups", * prgrp_kn: parent_kn->parent: "p1", * * "mon_groups", parent_kn->active++: 1 */ kernfs_get_active(parent_kn) kernfs_iop_rmdir /* "p1", kn->active++ */ kernfs_get_active(kn)
rdtgroup_kn_lock_live atomic_inc(&rdtgrp->waitcount) /* "p1", kn->active-- */ kernfs_break_active_protection(kn) mutex_lock
rdtgroup_rmdir_ctrl free_all_child_rdtgrp sentry->flags = RDT_DELETED
rdtgroup_ctrl_remove rdtgrp->flags = RDT_DELETED kernfs_get(kn) kernfs_remove(rdtgrp->kn) __kernfs_remove /* "mon_groups", sub_kn */ atomic_add(KN_DEACTIVATED_BIAS, &sub_kn->active) kernfs_drain(sub_kn) /* * sub_kn->active == KN_DEACTIVATED_BIAS + 1, * waiting on sub_kn->active to drop, but it * never drops in Thread 2 which is blocked * on getting rdtgroup_mutex. */ Thread 1 hangs here ----> wait_event(sub_kn->active == KN_DEACTIVATED_BIAS) ... rdtgroup_mkdir rdtgroup_mkdir_mon(parent_kn, prgrp_kn) mkdir_rdt_prepare(parent_kn, prgrp_kn) rdtgroup_kn_lock_live(prgrp_kn) atomic_inc(&rdtgrp->waitcount) /* * "p1", prgrp_kn->active-- * * The active reference on "p1" is * dropped, but not matching the * actual active reference taken * on "mon_groups", thus causing * Thread 1 to wait forever while * holding rdtgroup_mutex. */ kernfs_break_active_protection( prgrp_kn) /* * Trying to get rdtgroup_mutex * which is held by Thread 1. */ Thread 2 hangs here ----> mutex_lock ...
The problem is that the creation of a subdirectory in the "mon_groups" directory incorrectly releases the active protection of its parent directory instead of itself before it starts waiting for rdtgroup_mutex. This is triggered by the rdtgroup_mkdir() flow calling rdtgroup_kn_lock_live()/rdtgroup_kn_unlock() with kernfs node of the parent control group ("p1") as argument. It should be called with kernfs node "mon_groups" instead. What is currently missing is that the kn->priv of "mon_groups" is NULL instead of pointing to the rdtgrp.
Fix it by pointing kn->priv to rdtgrp when "mon_groups" is created. Then it could be passed to rdtgroup_kn_lock_live()/rdtgroup_kn_unlock() instead. And then it operates on the same rdtgroup structure but handles the active reference of kernfs node "mon_groups" to prevent deadlock. The same changes are also made to the "mon_data" directories.
This patch involved associating the private data of the "mon_groups" and "mon_data" directories to the resource group to which they belong instead of NULL as before.
Improving the directory removal safeguard to ensure that subdirectories of the resctrl root directory can only be removed if they are a child of the resctrl filesystem's root _and_ not associated with the default resource group.
This patch results in some unused function parameters that will be cleaned up in follow-up patch, this patch focuses on the fix only in support of backport efforts.
Fixes: 6065f5513f78 ("resctrlfs: init support resctrlfs") Signed-off-by: Yu Liao liaoyu15@huawei.com --- fs/resctrlfs.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-)
diff --git a/fs/resctrlfs.c b/fs/resctrlfs.c index 56f2a63ea43b..74b73eaa13c8 100644 --- a/fs/resctrlfs.c +++ b/fs/resctrlfs.c @@ -360,7 +360,7 @@ static struct dentry *resctrl_mount(struct file_system_type *fs_type,
if (resctrl_mon_capable) { ret = mongroup_create_dir(resctrl_group_default.kn, - NULL, "mon_groups", + &resctrl_group_default, "mon_groups", &kn_mongrp); if (ret) { dentry = ERR_PTR(ret); @@ -594,7 +594,7 @@ static int mkdir_resctrl_prepare(struct kernfs_node *parent_kn, uint files = 0; int ret;
- prdtgrp = resctrl_group_kn_lock_live(prgrp_kn); + prdtgrp = resctrl_group_kn_lock_live(parent_kn); rdt_last_cmd_clear(); if (!prdtgrp) { ret = -ENODEV; @@ -683,7 +683,7 @@ static int mkdir_resctrl_prepare(struct kernfs_node *parent_kn, kernfs_activate(kn);
/* - * The caller unlocks the prgrp_kn upon success. + * The caller unlocks the parent_kn upon success. */ return 0;
@@ -700,7 +700,7 @@ static int mkdir_resctrl_prepare(struct kernfs_node *parent_kn, out_free_rdtgrp: kfree(rdtgrp); out_unlock: - resctrl_group_kn_unlock(prgrp_kn); + resctrl_group_kn_unlock(parent_kn); return ret; }
@@ -741,7 +741,7 @@ static int resctrl_group_mkdir_mon(struct kernfs_node *parent_kn, */ ret = resctrl_update_groups_config(prgrp);
- resctrl_group_kn_unlock(prgrp_kn); + resctrl_group_kn_unlock(parent_kn); return ret; }
@@ -775,7 +775,7 @@ static int resctrl_group_mkdir_ctrl_mon(struct kernfs_node *parent_kn, * Create an empty mon_groups directory to hold the subset * of tasks and cpus to monitor. */ - ret = mongroup_create_dir(kn, NULL, "mon_groups", NULL); + ret = mongroup_create_dir(kn, rdtgrp, "mon_groups", NULL); if (ret) { rdt_last_cmd_puts("kernfs subdir error\n"); goto out_list_del; @@ -789,7 +789,7 @@ static int resctrl_group_mkdir_ctrl_mon(struct kernfs_node *parent_kn, out_common_fail: mkdir_resctrl_prepare_clean(rdtgrp); out_unlock: - resctrl_group_kn_unlock(prgrp_kn); + resctrl_group_kn_unlock(parent_kn); return ret; }
@@ -948,7 +948,8 @@ static int resctrl_group_rmdir(struct kernfs_node *kn) * If the resctrl_group is a mon group and parent directory * is a valid "mon_groups" directory, remove the mon group. */ - if (rdtgrp->type == RDTCTRL_GROUP && parent_kn == resctrl_group_default.kn) + if (rdtgrp->type == RDTCTRL_GROUP && parent_kn == resctrl_group_default.kn && + rdtgrp != &resctrl_group_default) ret = resctrl_group_rmdir_ctrl(kn, rdtgrp, tmpmask); else if (rdtgrp->type == RDTMON_GROUP && is_mon_groups(parent_kn, kn->name))