Protect md_thread with rcu.
Yu Kuai (5): md: factor out a helper to wake up md_thread directly dm-raid: remove useless checking in raid_message() md/bitmap: always wake up md_thread in timeout_store md/bitmap: factor out a helper to set timeout md: protect md_thread with rcu
drivers/md/dm-raid.c | 4 +- drivers/md/md-bitmap.c | 43 +++++++++++-------- drivers/md/md-cluster.c | 17 +++++--- drivers/md/md-multipath.c | 4 +- drivers/md/md.c | 88 +++++++++++++++++++++------------------ drivers/md/md.h | 8 ++-- drivers/md/raid1.c | 7 ++-- drivers/md/raid1.h | 2 +- drivers/md/raid10.c | 20 +++++---- drivers/md/raid10.h | 2 +- drivers/md/raid5-cache.c | 22 ++++++---- drivers/md/raid5.c | 15 +++---- drivers/md/raid5.h | 2 +- 13 files changed, 132 insertions(+), 102 deletions(-)
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/3798 邮件列表地址:https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/G...
FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/3798 Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/G...
From: Yu Kuai yukuai3@huawei.com
mainline inclusion from mainline-v6.5-rc1 commit e5e9b9cb71a09d86d5e8d147e6a6457e1f8887b5 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I8OPEK CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
md_wakeup_thread() can't wakeup md_thread->tsk if md_thread->run is still in progress, and in some cases md_thread->tsk need to be woke up directly, like md_set_readonly() and do_md_stop().
Commit 9dfbdafda3b3 ("md: unlock mddev before reap sync_thread in action_store") introduce a new scenario where unregister sync_thread is not protected by 'reconfig_mutex', this can cause null-ptr-deference in theroy:
t1: md_set_readonly t2: action_store md_unregister_thread // 'reconfig_mutex' is not held // 'reconfig_mutex' is held by caller if (mddev->sync_thread) thread = *threadp *threadp = NULL wake_up_process(mddev->sync_thread->tsk) // null-ptr-deference
Fix this problem by factoring out a helper to wake up md_thread directly, so that 'sync_thread' won't be accessed multiple times from the reader side. This helper also prepare to protect md_thread with rcu.
Noted that later patches is going to fix that unregister sync_thread is not protected by 'reconfig_mutex' from action_store().
Signed-off-by: Yu Kuai yukuai3@huawei.com Signed-off-by: Song Liu song@kernel.org Link: https://lore.kernel.org/r/20230523021017.3048783-2-yukuai1@huaweicloud.com
Conflict: drivers/md/md.c commit 3ce94ce5d05a ("md: fix duplicate filename for rdev") adds declaration of export_rdev() Signed-off-by: Li Lingfeng lilingfeng3@huawei.com --- drivers/md/md.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-)
diff --git a/drivers/md/md.c b/drivers/md/md.c index 4563ef0df4c5..422558d88ed6 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -90,6 +90,7 @@ static struct workqueue_struct *md_rdev_misc_wq; static int remove_and_add_spares(struct mddev *mddev, struct md_rdev *this); static void mddev_detach(struct mddev *mddev); +static void md_wakeup_thread_directly(struct md_thread *thread);
/* * Default number of read corrections we'll attempt on an rdev @@ -6390,10 +6391,12 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev) } if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) set_bit(MD_RECOVERY_INTR, &mddev->recovery); - if (mddev->sync_thread) - /* Thread might be blocked waiting for metadata update - * which will now never happen */ - wake_up_process(mddev->sync_thread->tsk); + + /* + * Thread might be blocked waiting for metadata update which will now + * never happen + */ + md_wakeup_thread_directly(mddev->sync_thread);
if (mddev->external && test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)) return -EBUSY; @@ -6454,10 +6457,12 @@ static int do_md_stop(struct mddev *mddev, int mode, } if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) set_bit(MD_RECOVERY_INTR, &mddev->recovery); - if (mddev->sync_thread) - /* Thread might be blocked waiting for metadata update - * which will now never happen */ - wake_up_process(mddev->sync_thread->tsk); + + /* + * Thread might be blocked waiting for metadata update which will now + * never happen + */ + md_wakeup_thread_directly(mddev->sync_thread);
mddev_unlock(mddev); wait_event(resync_wait, (mddev->sync_thread == NULL && @@ -8004,6 +8009,12 @@ static int md_thread(void *arg) return 0; }
+static void md_wakeup_thread_directly(struct md_thread *thread) +{ + if (thread) + wake_up_process(thread->tsk); +} + void md_wakeup_thread(struct md_thread *thread) { if (thread) {
From: Yu Kuai yukuai3@huawei.com
mainline inclusion from mainline-v6.5-rc1 commit 955a257d69e44cea09b0375b8f2f3d4d9fcf7b4e category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I8OPEK CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
md_wakeup_thread() handle the case that pass in md_thread is NULL, there is no need to check this.
Signed-off-by: Yu Kuai yukuai3@huawei.com Signed-off-by: Song Liu song@kernel.org Link: https://lore.kernel.org/r/20230523021017.3048783-3-yukuai1@huaweicloud.com Signed-off-by: Li Lingfeng lilingfeng3@huawei.com --- drivers/md/dm-raid.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index a2d09c9c6e9f..13de4fc14c16 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -3717,11 +3717,11 @@ static int raid_message(struct dm_target *ti, unsigned int argc, char **argv, * canceling read-auto mode */ mddev->ro = 0; - if (!mddev->suspended && mddev->sync_thread) + if (!mddev->suspended) md_wakeup_thread(mddev->sync_thread); } set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); - if (!mddev->suspended && mddev->thread) + if (!mddev->suspended) md_wakeup_thread(mddev->thread);
return 0;
From: Yu Kuai yukuai3@huawei.com
mainline inclusion from mainline-v6.5-rc1 commit c333673a78307abe6b1f6998809288fcd86740ed category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I8OPEK CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
md_wakeup_thread() can handle the case that pass in md_thread is NULL, the only difference is that md_wakeup_thread() will be called when current timeout is 'MAX_SCHEDULE_TIMEOUT', this should not matter because timeout_store() is not hot path, and the daemon process is woke up more than demand from other context already.
Prepare to factor out a helper to set timeout.
Signed-off-by: Yu Kuai yukuai3@huawei.com Signed-off-by: Song Liu song@kernel.org Link: https://lore.kernel.org/r/20230523021017.3048783-4-yukuai1@huaweicloud.com Signed-off-by: Li Lingfeng lilingfeng3@huawei.com --- drivers/md/md-bitmap.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c index 4201d68b60f2..2eb63d2ffee8 100644 --- a/drivers/md/md-bitmap.c +++ b/drivers/md/md-bitmap.c @@ -2452,11 +2452,11 @@ timeout_store(struct mddev *mddev, const char *buf, size_t len) * the bitmap is all clean and we don't need to * adjust the timeout right now */ - if (mddev->thread->timeout < MAX_SCHEDULE_TIMEOUT) { + if (mddev->thread->timeout < MAX_SCHEDULE_TIMEOUT) mddev->thread->timeout = timeout; - md_wakeup_thread(mddev->thread); - } } + + md_wakeup_thread(mddev->thread); return len; }
From: Yu Kuai yukuai3@huawei.com
mainline inclusion from mainline-v6.5-rc1 commit 4eeb6535cd51100460ec8873bb68addef17b3e81 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I8OPEK CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
Register/unregister 'mddev->thread' are both under 'reconfig_mutex', however, some context didn't hold the mutex to access mddev->thread, which can cause null-ptr-deference:
1) md_bitmap_daemon_work() can be called from md_check_recovery() where 'reconfig_mutex' is not held, deference 'mddev->thread' might cause null-ptr-deference, because md_unregister_thread() reset the pointer before stopping the thread.
2) timeout_store() access 'mddev->thread' multiple times, null-ptr-deference can be triggered if 'mddev->thread' is reset in the middle.
This patch factor out a helper to set timeout, the new helper always check if 'mddev->thread' is null first, so that problem 1 can be fixed.
Now that this helper only access 'mddev->thread' once, but it's possible that 'mddev->thread' can be freed while this helper is still in progress, hence the problem is not fixed yet. Follow up patches will fix this by protecting md_thread with rcu.
Signed-off-by: Yu Kuai yukuai3@huawei.com Signed-off-by: Song Liu song@kernel.org Link: https://lore.kernel.org/r/20230523021017.3048783-5-yukuai1@huaweicloud.com Signed-off-by: Li Lingfeng lilingfeng3@huawei.com --- drivers/md/md-bitmap.c | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-)
diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c index 2eb63d2ffee8..d2a726b02c24 100644 --- a/drivers/md/md-bitmap.c +++ b/drivers/md/md-bitmap.c @@ -1219,11 +1219,22 @@ static bitmap_counter_t *md_bitmap_get_counter(struct bitmap_counts *bitmap, sector_t offset, sector_t *blocks, int create);
+static void mddev_set_timeout(struct mddev *mddev, unsigned long timeout, + bool force) +{ + struct md_thread *thread = mddev->thread; + + if (!thread) + return; + + if (force || thread->timeout < MAX_SCHEDULE_TIMEOUT) + thread->timeout = timeout; +} + /* * bitmap daemon -- periodically wakes up to clean bits and flush pages * out to disk */ - void md_bitmap_daemon_work(struct mddev *mddev) { struct bitmap *bitmap; @@ -1247,7 +1258,7 @@ void md_bitmap_daemon_work(struct mddev *mddev)
bitmap->daemon_lastrun = jiffies; if (bitmap->allclean) { - mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT; + mddev_set_timeout(mddev, MAX_SCHEDULE_TIMEOUT, true); goto done; } bitmap->allclean = 1; @@ -1344,8 +1355,7 @@ void md_bitmap_daemon_work(struct mddev *mddev)
done: if (bitmap->allclean == 0) - mddev->thread->timeout = - mddev->bitmap_info.daemon_sleep; + mddev_set_timeout(mddev, mddev->bitmap_info.daemon_sleep, true); mutex_unlock(&mddev->bitmap_info.mutex); }
@@ -1801,8 +1811,7 @@ void md_bitmap_destroy(struct mddev *mddev) mddev->bitmap = NULL; /* disconnect from the md device */ spin_unlock(&mddev->lock); mutex_unlock(&mddev->bitmap_info.mutex); - if (mddev->thread) - mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT; + mddev_set_timeout(mddev, MAX_SCHEDULE_TIMEOUT, true);
md_bitmap_free(bitmap); } @@ -1945,7 +1954,7 @@ int md_bitmap_load(struct mddev *mddev) /* Kick recovery in case any bits were set */ set_bit(MD_RECOVERY_NEEDED, &bitmap->mddev->recovery);
- mddev->thread->timeout = mddev->bitmap_info.daemon_sleep; + mddev_set_timeout(mddev, mddev->bitmap_info.daemon_sleep, true); md_wakeup_thread(mddev->thread);
md_bitmap_update_sb(bitmap); @@ -2446,17 +2455,11 @@ timeout_store(struct mddev *mddev, const char *buf, size_t len) timeout = MAX_SCHEDULE_TIMEOUT-1; if (timeout < 1) timeout = 1; - mddev->bitmap_info.daemon_sleep = timeout; - if (mddev->thread) { - /* if thread->timeout is MAX_SCHEDULE_TIMEOUT, then - * the bitmap is all clean and we don't need to - * adjust the timeout right now - */ - if (mddev->thread->timeout < MAX_SCHEDULE_TIMEOUT) - mddev->thread->timeout = timeout; - }
+ mddev->bitmap_info.daemon_sleep = timeout; + mddev_set_timeout(mddev, timeout, false); md_wakeup_thread(mddev->thread); + return len; }
From: Yu Kuai yukuai3@huawei.com
mainline inclusion from mainline-v6.5-rc1 commit 4469315439827290923fce4f3f672599cabeb366 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I8OPEK CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
Currently, there are many places that md_thread can be accessed without protection, following are known scenarios that can cause null-ptr-dereference or uaf:
1) sync_thread that is allocated and started from md_start_sync() 2) mddev->thread can be accessed directly from timeout_store() and md_bitmap_daemon_work() 3) md_unregister_thread() from action_store().
Currently, a global spinlock 'pers_lock' is borrowed to protect 'mddev->thread' in some places, this problem can be fixed likewise, however, use a global lock for all the cases is not good.
Fix this problem by protecting all md_thread with rcu.
Signed-off-by: Yu Kuai yukuai3@huawei.com Signed-off-by: Song Liu song@kernel.org Link: https://lore.kernel.org/r/20230523021017.3048783-6-yukuai1@huaweicloud.com
Conflicts: 1) drivers/md/md.c commit 3ce94ce5d05a ("md: fix duplicate filename for rdev") adds declaration of export_rdev(); commit 3ce94ce5d05a ("md: fix duplicate filename for rdev") adds md_free_rdev(); commit 72adae23a72c ("md: Change active_io to percpu") remove synchronize_rcu() in mddev_suspend(); 2) drivers/md/raid5-cache.c commit ad831a16b08c ("md/raid5: use bdev_write_cache instead of open coding it") remove the use of request_queue; commit 913cce5a1e58 ("md: remove most calls to bdevname") remove the array of bdevname and change the formart of display. 3) drivers/md/raid10.c commit 1e61a8cd633175b7942781fcaf90f6ed7968686e ("md/raid10: fix memleak of md thread") set 'mddev->thread' right after setup_conf() Signed-off-by: Li Lingfeng lilingfeng3@huawei.com --- drivers/md/md-bitmap.c | 10 ++++-- drivers/md/md-cluster.c | 17 ++++++---- drivers/md/md-multipath.c | 4 +-- drivers/md/md.c | 69 ++++++++++++++++++--------------------- drivers/md/md.h | 8 ++--- drivers/md/raid1.c | 7 ++-- drivers/md/raid1.h | 2 +- drivers/md/raid10.c | 20 +++++++----- drivers/md/raid10.h | 2 +- drivers/md/raid5-cache.c | 22 ++++++++----- drivers/md/raid5.c | 15 +++++---- drivers/md/raid5.h | 2 +- 12 files changed, 97 insertions(+), 81 deletions(-)
diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c index d2a726b02c24..e8d0f57f4bf3 100644 --- a/drivers/md/md-bitmap.c +++ b/drivers/md/md-bitmap.c @@ -1222,13 +1222,19 @@ static bitmap_counter_t *md_bitmap_get_counter(struct bitmap_counts *bitmap, static void mddev_set_timeout(struct mddev *mddev, unsigned long timeout, bool force) { - struct md_thread *thread = mddev->thread; + struct md_thread *thread; + + rcu_read_lock(); + thread = rcu_dereference(mddev->thread);
if (!thread) - return; + goto out;
if (force || thread->timeout < MAX_SCHEDULE_TIMEOUT) thread->timeout = timeout; + +out: + rcu_read_unlock(); }
/* diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c index f0e64e76fd79..fe2fbf91ee20 100644 --- a/drivers/md/md-cluster.c +++ b/drivers/md/md-cluster.c @@ -75,14 +75,14 @@ struct md_cluster_info { sector_t suspend_hi; int suspend_from; /* the slot which broadcast suspend_lo/hi */
- struct md_thread *recovery_thread; + struct md_thread __rcu *recovery_thread; unsigned long recovery_map; /* communication loc resources */ struct dlm_lock_resource *ack_lockres; struct dlm_lock_resource *message_lockres; struct dlm_lock_resource *token_lockres; struct dlm_lock_resource *no_new_dev_lockres; - struct md_thread *recv_thread; + struct md_thread __rcu *recv_thread; struct completion newdisk_completion; wait_queue_head_t wait; unsigned long state; @@ -362,8 +362,8 @@ static void __recover_slot(struct mddev *mddev, int slot)
set_bit(slot, &cinfo->recovery_map); if (!cinfo->recovery_thread) { - cinfo->recovery_thread = md_register_thread(recover_bitmaps, - mddev, "recover"); + rcu_assign_pointer(cinfo->recovery_thread, + md_register_thread(recover_bitmaps, mddev, "recover")); if (!cinfo->recovery_thread) { pr_warn("md-cluster: Could not create recovery thread\n"); return; @@ -526,11 +526,15 @@ static void process_add_new_disk(struct mddev *mddev, struct cluster_msg *cmsg) static void process_metadata_update(struct mddev *mddev, struct cluster_msg *msg) { int got_lock = 0; + struct md_thread *thread; struct md_cluster_info *cinfo = mddev->cluster_info; mddev->good_device_nr = le32_to_cpu(msg->raid_slot);
dlm_lock_sync(cinfo->no_new_dev_lockres, DLM_LOCK_CR); - wait_event(mddev->thread->wqueue, + + /* daemaon thread must exist */ + thread = rcu_dereference_protected(mddev->thread, true); + wait_event(thread->wqueue, (got_lock = mddev_trylock(mddev)) || test_bit(MD_CLUSTER_HOLDING_MUTEX_FOR_RECVD, &cinfo->state)); md_reload_sb(mddev, mddev->good_device_nr); @@ -890,7 +894,8 @@ static int join(struct mddev *mddev, int nodes) } /* Initiate the communication resources */ ret = -ENOMEM; - cinfo->recv_thread = md_register_thread(recv_daemon, mddev, "cluster_recv"); + rcu_assign_pointer(cinfo->recv_thread, + md_register_thread(recv_daemon, mddev, "cluster_recv")); if (!cinfo->recv_thread) { pr_err("md-cluster: cannot allocate memory for recv_thread!\n"); goto err; diff --git a/drivers/md/md-multipath.c b/drivers/md/md-multipath.c index 776bbe542db5..a4cb69e1c2dc 100644 --- a/drivers/md/md-multipath.c +++ b/drivers/md/md-multipath.c @@ -407,8 +407,8 @@ static int multipath_run (struct mddev *mddev) if (ret) goto out_free_conf;
- mddev->thread = md_register_thread(multipathd, mddev, - "multipath"); + rcu_assign_pointer(mddev->thread, + md_register_thread(multipathd, mddev, "multipath")); if (!mddev->thread) goto out_free_conf;
diff --git a/drivers/md/md.c b/drivers/md/md.c index 422558d88ed6..953782b26398 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -68,11 +68,7 @@ #include "md-bitmap.h" #include "md-cluster.h"
-/* pers_list is a list of registered personalities protected - * by pers_lock. - * pers_lock does extra service to protect accesses to - * mddev->thread when the mutex cannot be held. - */ +/* pers_list is a list of registered personalities protected by pers_lock. */ static LIST_HEAD(pers_list); static DEFINE_SPINLOCK(pers_lock);
@@ -90,7 +86,7 @@ static struct workqueue_struct *md_rdev_misc_wq; static int remove_and_add_spares(struct mddev *mddev, struct md_rdev *this); static void mddev_detach(struct mddev *mddev); -static void md_wakeup_thread_directly(struct md_thread *thread); +static void md_wakeup_thread_directly(struct md_thread __rcu *thread);
/* * Default number of read corrections we'll attempt on an rdev @@ -500,8 +496,10 @@ static blk_qc_t md_submit_bio(struct bio *bio) */ void mddev_suspend(struct mddev *mddev) { - WARN_ON_ONCE(mddev->thread && current == mddev->thread->tsk); - lockdep_assert_held(&mddev->reconfig_mutex); + struct md_thread *thread = rcu_dereference_protected(mddev->thread, + lockdep_is_held(&mddev->reconfig_mutex)); + + WARN_ON_ONCE(thread && current == thread->tsk); if (mddev->suspended++) return; synchronize_rcu(); @@ -851,13 +849,8 @@ void mddev_unlock(struct mddev *mddev) } else mutex_unlock(&mddev->reconfig_mutex);
- /* As we've dropped the mutex we need a spinlock to - * make sure the thread doesn't disappear - */ - spin_lock(&pers_lock); md_wakeup_thread(mddev->thread); wake_up(&mddev->sb_wait); - spin_unlock(&pers_lock); } EXPORT_SYMBOL_GPL(mddev_unlock);
@@ -8009,19 +8002,29 @@ static int md_thread(void *arg) return 0; }
-static void md_wakeup_thread_directly(struct md_thread *thread) +static void md_wakeup_thread_directly(struct md_thread __rcu *thread) { - if (thread) - wake_up_process(thread->tsk); + struct md_thread *t; + + rcu_read_lock(); + t = rcu_dereference(thread); + if (t) + wake_up_process(t->tsk); + rcu_read_unlock(); }
-void md_wakeup_thread(struct md_thread *thread) +void md_wakeup_thread(struct md_thread __rcu *thread) { - if (thread) { - pr_debug("md: waking up MD thread %s.\n", thread->tsk->comm); - set_bit(THREAD_WAKEUP, &thread->flags); - wake_up(&thread->wqueue); + struct md_thread *t; + + rcu_read_lock(); + t = rcu_dereference(thread); + if (t) { + pr_debug("md: waking up MD thread %s.\n", t->tsk->comm); + set_bit(THREAD_WAKEUP, &t->flags); + wake_up(&t->wqueue); } + rcu_read_unlock(); } EXPORT_SYMBOL(md_wakeup_thread);
@@ -8051,22 +8054,15 @@ struct md_thread *md_register_thread(void (*run) (struct md_thread *), } EXPORT_SYMBOL(md_register_thread);
-void md_unregister_thread(struct md_thread **threadp) +void md_unregister_thread(struct md_thread __rcu **threadp) { - struct md_thread *thread; + struct md_thread *thread = rcu_dereference_protected(*threadp, true);
- /* - * Locking ensures that mddev_unlock does not wake_up a - * non-existent thread - */ - spin_lock(&pers_lock); - thread = *threadp; - if (!thread) { - spin_unlock(&pers_lock); + if (!thread) return; - } - *threadp = NULL; - spin_unlock(&pers_lock); + + rcu_assign_pointer(*threadp, NULL); + synchronize_rcu();
pr_debug("interrupting MD-thread pid %d\n", task_pid_nr(thread->tsk)); kthread_stop(thread->tsk); @@ -9267,9 +9263,8 @@ static void md_start_sync(struct work_struct *ws) { struct mddev *mddev = container_of(ws, struct mddev, del_work);
- mddev->sync_thread = md_register_thread(md_do_sync, - mddev, - "resync"); + rcu_assign_pointer(mddev->sync_thread, + md_register_thread(md_do_sync, mddev, "resync")); if (!mddev->sync_thread) { pr_warn("%s: could not start resync thread...\n", mdname(mddev)); diff --git a/drivers/md/md.h b/drivers/md/md.h index 0954b9fe4cf6..845ccd842975 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -361,8 +361,8 @@ struct mddev { int new_chunk_sectors; int reshape_backwards;
- struct md_thread *thread; /* management thread */ - struct md_thread *sync_thread; /* doing resync or reconstruct */ + struct md_thread __rcu *thread; /* management thread */ + struct md_thread __rcu *sync_thread; /* doing resync or reconstruct */
/* 'last_sync_action' is initialized to "none". It is set when a * sync operation (i.e "data-check", "requested-resync", "resync", @@ -732,8 +732,8 @@ extern struct md_thread *md_register_thread( void (*run)(struct md_thread *thread), struct mddev *mddev, const char *name); -extern void md_unregister_thread(struct md_thread **threadp); -extern void md_wakeup_thread(struct md_thread *thread); +extern void md_unregister_thread(struct md_thread __rcu **threadp); +extern void md_wakeup_thread(struct md_thread __rcu *thread); extern void md_check_recovery(struct mddev *mddev); extern void md_reap_sync_thread(struct mddev *mddev); extern int mddev_init_writes_pending(struct mddev *mddev); diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 00915e6ec410..e511354c6e8c 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -3078,7 +3078,8 @@ static struct r1conf *setup_conf(struct mddev *mddev) }
err = -ENOMEM; - conf->thread = md_register_thread(raid1d, mddev, "raid1"); + rcu_assign_pointer(conf->thread, + md_register_thread(raid1d, mddev, "raid1")); if (!conf->thread) goto abort;
@@ -3176,8 +3177,8 @@ static int raid1_run(struct mddev *mddev) /* * Ok, everything is just fine now */ - mddev->thread = conf->thread; - conf->thread = NULL; + rcu_assign_pointer(mddev->thread, conf->thread); + rcu_assign_pointer(conf->thread, NULL); mddev->private = conf; set_bit(MD_FAILFAST_SUPPORTED, &mddev->flags);
diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h index ccf10e59b116..ff30681d753c 100644 --- a/drivers/md/raid1.h +++ b/drivers/md/raid1.h @@ -131,7 +131,7 @@ struct r1conf { /* When taking over an array from a different personality, we store * the new thread here until we fully activate the array. */ - struct md_thread *thread; + struct md_thread __rcu *thread;
/* Keep track of cluster resync window to send to other * nodes. diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index babc5d29f0e3..1fcd1efabea9 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -970,6 +970,7 @@ static void lower_barrier(struct r10conf *conf) static bool stop_waiting_barrier(struct r10conf *conf) { struct bio_list *bio_list = current->bio_list; + struct md_thread *thread;
/* barrier is dropped */ if (!conf->barrier) @@ -985,12 +986,14 @@ static bool stop_waiting_barrier(struct r10conf *conf) (!bio_list_empty(&bio_list[0]) || !bio_list_empty(&bio_list[1]))) return true;
+ /* daemon thread must exist while handling io */ + thread = rcu_dereference_protected(conf->mddev->thread, true); /* * move on if io is issued from raid10d(), nr_pending is not released * from original io(see handle_read_error()). All raise barrier is * blocked until this io is done. */ - if (conf->mddev->thread->tsk == current) { + if (thread->tsk == current) { WARN_ON_ONCE(atomic_read(&conf->nr_pending) == 0); return true; } @@ -3758,7 +3761,8 @@ static struct r10conf *setup_conf(struct mddev *mddev) atomic_set(&conf->nr_pending, 0);
err = -ENOMEM; - conf->thread = md_register_thread(raid10d, mddev, "raid10"); + rcu_assign_pointer(conf->thread, + md_register_thread(raid10d, mddev, "raid10")); if (!conf->thread) goto out;
@@ -3816,8 +3820,8 @@ static int raid10_run(struct mddev *mddev) } }
- mddev->thread = conf->thread; - conf->thread = NULL; + rcu_assign_pointer(mddev->thread, conf->thread); + rcu_assign_pointer(conf->thread, NULL);
if (mddev->queue) { blk_queue_max_discard_sectors(mddev->queue, @@ -3962,8 +3966,8 @@ static int raid10_run(struct mddev *mddev) clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); - mddev->sync_thread = md_register_thread(md_do_sync, mddev, - "reshape"); + rcu_assign_pointer(mddev->sync_thread, + md_register_thread(md_do_sync, mddev, "reshape")); if (!mddev->sync_thread) goto out_free_conf; } @@ -4364,8 +4368,8 @@ static int raid10_start_reshape(struct mddev *mddev) set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
- mddev->sync_thread = md_register_thread(md_do_sync, mddev, - "reshape"); + rcu_assign_pointer(mddev->sync_thread, + md_register_thread(md_do_sync, mddev, "reshape")); if (!mddev->sync_thread) { ret = -EAGAIN; goto abort; diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h index 73d243e12363..4f627ad16bec 100644 --- a/drivers/md/raid10.h +++ b/drivers/md/raid10.h @@ -101,7 +101,7 @@ struct r10conf { /* When taking over an array from a different personality, we store * the new thread here until we fully activate the array. */ - struct md_thread *thread; + struct md_thread __rcu *thread;
/* * Keep track of cluster resync window to send to other nodes. diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index 4337ae0e6af2..d879fc525f50 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c @@ -120,7 +120,7 @@ struct r5l_log { struct bio_set bs; mempool_t meta_pool;
- struct md_thread *reclaim_thread; + struct md_thread __rcu *reclaim_thread; unsigned long reclaim_target; /* number of space that need to be * reclaimed. if it's 0, reclaim spaces * used by io_units which are in @@ -1575,17 +1575,18 @@ void r5l_wake_reclaim(struct r5l_log *log, sector_t space)
void r5l_quiesce(struct r5l_log *log, int quiesce) { - struct mddev *mddev; + struct mddev *mddev = log->rdev->mddev; + struct md_thread *thread = rcu_dereference_protected( + log->reclaim_thread, lockdep_is_held(&mddev->reconfig_mutex));
if (quiesce) { /* make sure r5l_write_super_and_discard_space exits */ - mddev = log->rdev->mddev; wake_up(&mddev->sb_wait); - kthread_park(log->reclaim_thread->tsk); + kthread_park(thread->tsk); r5l_wake_reclaim(log, MaxSector); r5l_do_reclaim(log); } else - kthread_unpark(log->reclaim_thread->tsk); + kthread_unpark(thread->tsk); }
bool r5l_log_disk_error(struct r5conf *conf) @@ -3067,6 +3068,7 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev) { struct request_queue *q = bdev_get_queue(rdev->bdev); struct r5l_log *log; + struct md_thread *thread; char b[BDEVNAME_SIZE]; int ret;
@@ -3129,11 +3131,13 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev) spin_lock_init(&log->tree_lock); INIT_RADIX_TREE(&log->big_stripe_tree, GFP_NOWAIT | __GFP_NOWARN);
- log->reclaim_thread = md_register_thread(r5l_reclaim_thread, - log->rdev->mddev, "reclaim"); - if (!log->reclaim_thread) + thread = md_register_thread(r5l_reclaim_thread, log->rdev->mddev, + "reclaim"); + if (!thread) goto reclaim_thread; - log->reclaim_thread->timeout = R5C_RECLAIM_WAKEUP_INTERVAL; + + thread->timeout = R5C_RECLAIM_WAKEUP_INTERVAL; + rcu_assign_pointer(log->reclaim_thread, thread);
init_waitqueue_head(&log->iounit_wait);
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index b4d004b623e7..bef14340f9c1 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -7387,7 +7387,8 @@ static struct r5conf *setup_conf(struct mddev *mddev) }
sprintf(pers_name, "raid%d", mddev->new_level); - conf->thread = md_register_thread(raid5d, mddev, pers_name); + rcu_assign_pointer(conf->thread, + md_register_thread(raid5d, mddev, pers_name)); if (!conf->thread) { pr_warn("md/raid:%s: couldn't allocate thread.\n", mdname(mddev)); @@ -7597,8 +7598,8 @@ static int raid5_run(struct mddev *mddev) }
conf->min_offset_diff = min_offset_diff; - mddev->thread = conf->thread; - conf->thread = NULL; + rcu_assign_pointer(mddev->thread, conf->thread); + rcu_assign_pointer(conf->thread, NULL); mddev->private = conf;
for (i = 0; i < conf->raid_disks && conf->previous_raid_disks; @@ -7696,8 +7697,8 @@ static int raid5_run(struct mddev *mddev) clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); - mddev->sync_thread = md_register_thread(md_do_sync, mddev, - "reshape"); + rcu_assign_pointer(mddev->sync_thread, + md_register_thread(md_do_sync, mddev, "reshape")); if (!mddev->sync_thread) goto abort; } @@ -8259,8 +8260,8 @@ static int raid5_start_reshape(struct mddev *mddev) clear_bit(MD_RECOVERY_DONE, &mddev->recovery); set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); - mddev->sync_thread = md_register_thread(md_do_sync, mddev, - "reshape"); + rcu_assign_pointer(mddev->sync_thread, + md_register_thread(md_do_sync, mddev, "reshape")); if (!mddev->sync_thread) { mddev->recovery = 0; spin_lock_irq(&conf->device_lock); diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index 5c05acf20e1f..d1780d086f6e 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h @@ -674,7 +674,7 @@ struct r5conf { /* When taking over an array from a different personality, we store * the new thread here until we fully activate the array. */ - struct md_thread *thread; + struct md_thread __rcu *thread; struct list_head temp_inactive_list[NR_STRIPE_HASH_LOCKS]; struct r5worker_group *worker_groups; int group_cnt;