From: Yu Kuai yukuai3@huawei.com
mainline inclusion from mainline-next-20231220 commit c891f1fd90e66e584bb1353e1859cef7c9eb36f8 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I8T02O
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
rcu is not used correctly here, because synchronize_rcu() is called before replacing old value, for example:
remove_and_add_spares // other path synchronize_rcu // called before replacing old value set_bit(RemoveSynchronized) rcu_read_lock() rdev = conf->mirros[].rdev pers->hot_remove_disk conf->mirros[].rdev = NULL; if (!test_bit(RemoveSynchronized)) synchronize_rcu /* * won't be called, and won't wait * for concurrent readers to be done. */ // access rdev after remove_and_add_spares() rcu_read_unlock()
Fortunately, there is a separate rcu protection to prevent such rdev to be freed:
md_kick_rdev_from_array //other path rcu_read_lock() rdev = conf->mirros[].rdev list_del_rcu(&rdev->same_set)
rcu_read_unlock() /* * rdev can be removed from conf, but * rdev won't be freed. */ synchronize_rcu() free rdev
Hence remove this useless flag and prepare to remove rcu protection to access rdev from 'conf'.
Signed-off-by: Yu Kuai yukuai3@huawei.com Signed-off-by: Song Liu song@kernel.org Link: https://lore.kernel.org/r/20231125081604.3939938-2-yukuai1@huaweicloud.com Signed-off-by: Li Nan linan122@huawei.com --- drivers/md/md.h | 5 ----- drivers/md/md-multipath.c | 9 --------- drivers/md/md.c | 37 ++++++------------------------------- drivers/md/raid1.c | 9 --------- drivers/md/raid10.c | 9 --------- drivers/md/raid5.c | 9 --------- 6 files changed, 6 insertions(+), 72 deletions(-)
diff --git a/drivers/md/md.h b/drivers/md/md.h index ade83af123a2..8d881cc59799 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -190,11 +190,6 @@ enum flag_bits { * than other devices in the array */ ClusterRemove, - RemoveSynchronized, /* synchronize_rcu() was called after - * this device was known to be faulty, - * so it is safe to remove without - * another synchronize_rcu() call. - */ ExternalBbl, /* External metadata provides bad * block management for a disk */ diff --git a/drivers/md/md-multipath.c b/drivers/md/md-multipath.c index d22276870283..aa77133f3188 100644 --- a/drivers/md/md-multipath.c +++ b/drivers/md/md-multipath.c @@ -258,15 +258,6 @@ static int multipath_remove_disk(struct mddev *mddev, struct md_rdev *rdev) goto abort; } p->rdev = NULL; - if (!test_bit(RemoveSynchronized, &rdev->flags)) { - synchronize_rcu(); - if (atomic_read(&rdev->nr_pending)) { - /* lost the race, try later */ - err = -EBUSY; - p->rdev = rdev; - goto abort; - } - } err = md_integrity_register(mddev); } abort: diff --git a/drivers/md/md.c b/drivers/md/md.c index 2860a43e56be..945a35ea3b5f 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -9245,44 +9245,19 @@ static int remove_and_add_spares(struct mddev *mddev, struct md_rdev *rdev; int spares = 0; int removed = 0; - bool remove_some = false;
if (this && test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) /* Mustn't remove devices when resync thread is running */ return 0;
rdev_for_each(rdev, mddev) { - if ((this == NULL || rdev == this) && - rdev->raid_disk >= 0 && - !test_bit(Blocked, &rdev->flags) && - test_bit(Faulty, &rdev->flags) && - atomic_read(&rdev->nr_pending)==0) { - /* Faulty non-Blocked devices with nr_pending == 0 - * never get nr_pending incremented, - * never get Faulty cleared, and never get Blocked set. - * So we can synchronize_rcu now rather than once per device - */ - remove_some = true; - set_bit(RemoveSynchronized, &rdev->flags); - } - } - - if (remove_some) - synchronize_rcu(); - rdev_for_each(rdev, mddev) { - if ((this == NULL || rdev == this) && - (test_bit(RemoveSynchronized, &rdev->flags) || - rdev_removeable(rdev))) { - if (mddev->pers->hot_remove_disk( - mddev, rdev) == 0) { - sysfs_unlink_rdev(mddev, rdev); - rdev->saved_raid_disk = rdev->raid_disk; - rdev->raid_disk = -1; - removed++; - } + if ((this == NULL || rdev == this) && rdev_removeable(rdev) && + !mddev->pers->hot_remove_disk(mddev, rdev)) { + sysfs_unlink_rdev(mddev, rdev); + rdev->saved_raid_disk = rdev->raid_disk; + rdev->raid_disk = -1; + removed++; } - if (remove_some && test_bit(RemoveSynchronized, &rdev->flags)) - clear_bit(RemoveSynchronized, &rdev->flags); }
if (removed && mddev->kobj.sd) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 35d12948e0a9..a678e0e6e102 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1863,15 +1863,6 @@ static int raid1_remove_disk(struct mddev *mddev, struct md_rdev *rdev) goto abort; } p->rdev = NULL; - if (!test_bit(RemoveSynchronized, &rdev->flags)) { - synchronize_rcu(); - if (atomic_read(&rdev->nr_pending)) { - /* lost the race, try later */ - err = -EBUSY; - p->rdev = rdev; - goto abort; - } - } if (conf->mirrors[conf->raid_disks + number].rdev) { /* We just removed a device that is being replaced. * Move down the replacement. We drain all IO before diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index a5927e98dc67..132a79523338 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -2247,15 +2247,6 @@ static int raid10_remove_disk(struct mddev *mddev, struct md_rdev *rdev) goto abort; } *rdevp = NULL; - if (!test_bit(RemoveSynchronized, &rdev->flags)) { - synchronize_rcu(); - if (atomic_read(&rdev->nr_pending)) { - /* lost the race, try later */ - err = -EBUSY; - *rdevp = rdev; - goto abort; - } - } if (p->replacement) { /* We must have just cleared 'rdev' */ p->rdev = p->replacement; diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 3302191e4255..37072adac05d 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -8229,15 +8229,6 @@ static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev) goto abort; } *rdevp = NULL; - if (!test_bit(RemoveSynchronized, &rdev->flags)) { - lockdep_assert_held(&mddev->reconfig_mutex); - synchronize_rcu(); - if (atomic_read(&rdev->nr_pending)) { - /* lost the race, try later */ - err = -EBUSY; - rcu_assign_pointer(*rdevp, rdev); - } - } if (!err) { err = log_modify(conf, rdev, false); if (err)