[PATCH OLK-5.10 05/24] Revert "md: unlock mddev before reap sync_thread in action_store"

31 May 2023

From: Yu Kuai yukuai3@huawei.com
hulk inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I6OMCC
CVE: NA
--------------------------------
This reverts commit 9dfbdafda3b34e262e43e786077bab8e476a89d1.
Because it will introduce a defect that sync_thread can be running while
MD_RECOVERY_RUNNING is cleared, which will cause some unexpected problems,
for example:
list_add corruption. prev->next should be next (ffff0001ac1daba0), but was ffff0000ce1a02a0. (prev=ffff0000ce1a02a0).
Call trace:
 __list_add_valid+0xfc/0x140
 insert_work+0x78/0x1a0
 __queue_work+0x500/0xcf4
 queue_work_on+0xe8/0x12c
 md_check_recovery+0xa34/0xf30
 raid10d+0xb8/0x900 [raid10]
 md_thread+0x16c/0x2cc
 kthread+0x1a4/0x1ec
 ret_from_fork+0x10/0x18
This is because work is requeued while it's still inside workqueue:
t1:			t2:
action_store
 mddev_lock
  if (mddev->sync_thread)
   mddev_unlock
   md_unregister_thread
   // first sync_thread is done
    		md_check_recovery
    		 mddev_try_lock
    		 /*
    		  * once MD_RECOVERY_DONE is set, new sync_thread
    		  * can start.
    		  */
    		 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery)
    		 INIT_WORK(&mddev->del_work, md_start_sync)
    		 queue_work(md_misc_wq, &mddev->del_work)
    		  test_and_set_bit(WORK_STRUCT_PENDING_BIT, ...)
    		  // set pending bit
    		  insert_work
    		   list_add_tail
    		 mddev_unlock
   mddev_lock_nointr
   md_reap_sync_thread
   // MD_RECOVERY_RUNNING is cleared
 mddev_unlock
t3:
// before queued work started from t2
md_check_recovery
 // MD_RECOVERY_RUNNING is not set, a new sync_thread can be started
 INIT_WORK(&mddev->del_work, md_start_sync)
  work->data = 0
  // work pending bit is cleared
 queue_work(md_misc_wq, &mddev->del_work)
  insert_work
   list_add_tail
   // list is corrupted
This patch revert the commit to fix the problem, the deadlock this
commit tries to fix will be fixed in following patches.
Signed-off-by: Yu Kuai yukuai3@huawei.com
Signed-off-by: Song Liu song@kernel.org
Link: https://lore.kernel.org/r/20230322064122.2384589-2-yukuai1@huaweicloud.com
Reviewed-by: Hou Tao houtao1@huawei.com
Signed-off-by: Jialin Zhang zhangjialin11@huawei.com
---
 drivers/md/dm-raid.c |  1 -
 drivers/md/md.c      | 19 ++-----------------
 2 files changed, 2 insertions(+), 18 deletions(-)

diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 134c51027dce..a2d09c9c6e9f 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -3693,7 +3693,6 @@ static int raid_message(struct dm_target *ti, unsigned int argc, char **argv,
    if (!strcasecmp(argv[0], "idle") || !strcasecmp(argv[0], "frozen")) {
    	if (mddev->sync_thread) {
    		set_bit(MD_RECOVERY_INTR, &mddev->recovery);
-			md_unregister_thread(&mddev->sync_thread);
    		md_reap_sync_thread(mddev);
    	}
    } else if (decipher_sync_action(mddev, mddev->recovery) != st_idle)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index cf7ca756b216..e072ccb08735 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -4872,19 +4872,6 @@ action_store(struct mddev *mddev, const char *page, size_t len)
    		if (work_pending(&mddev->del_work))
    			flush_workqueue(md_misc_wq);
    		if (mddev->sync_thread) {
-				sector_t save_rp = mddev->reshape_position;
-
-				mddev_unlock(mddev);
-				set_bit(MD_RECOVERY_INTR, &mddev->recovery);
-				md_unregister_thread(&mddev->sync_thread);
-				mddev_lock_nointr(mddev);
-				/*
-				 * set RECOVERY_INTR again and restore reshape
-				 * position in case others changed them after
-				 * got lock, eg, reshape_position_store and
-				 * md_check_recovery.
-				 */
-				mddev->reshape_position = save_rp;
    			set_bit(MD_RECOVERY_INTR, &mddev->recovery);
    			md_reap_sync_thread(mddev);
    		}
@@ -6264,7 +6251,6 @@ static void __md_stop_writes(struct mddev *mddev)
    	flush_workqueue(md_misc_wq);
    if (mddev->sync_thread) {
    	set_bit(MD_RECOVERY_INTR, &mddev->recovery);
-		md_unregister_thread(&mddev->sync_thread);
    	md_reap_sync_thread(mddev);
    }
@@ -9335,7 +9321,6 @@ void md_check_recovery(struct mddev *mddev)
    		 * ->spare_active and clear saved_raid_disk
    		 */
    		set_bit(MD_RECOVERY_INTR, &mddev->recovery);
-			md_unregister_thread(&mddev->sync_thread);
    		md_reap_sync_thread(mddev);
    		clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
    		clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
@@ -9371,7 +9356,6 @@ void md_check_recovery(struct mddev *mddev)
    		goto unlock;
    	}
    	if (mddev->sync_thread) {
-			md_unregister_thread(&mddev->sync_thread);
    		md_reap_sync_thread(mddev);
    		goto unlock;
    	}
@@ -9451,7 +9435,8 @@ void md_reap_sync_thread(struct mddev *mddev)
    sector_t old_dev_sectors = mddev->dev_sectors;
    bool is_reshaped = false;
-	/* sync_thread should be unregistered, collect result */
+	/* resync has finished, collect result */
+	md_unregister_thread(&mddev->sync_thread);
    if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
        !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) &&
        mddev->degraded != mddev->raid_disks) {
-- 
2.25.1


    

2025

2024

2023

2022

2021

2020

2019

[PATCH OLK-5.10 05/24] Revert "md: unlock mddev before reap sync_thread in action_store"