From: Li Nan linan122@huawei.com
hulk inclusion category: bugfix bugzilla: 188533, https://gitee.com/openeuler/kernel/issues/I6O7YB CVE: NA
--------------------------------
commit ceff49d9cb24 ("md/raid1: fix a race between removing rdev and access conf->mirrors[i].rdev") fix a null-ptr-deref about raid1. There is same bug in raid10 and fix it in the same way.
There is no sync_thread running while removing rdev, no need to check the flag in raid10_sync_request().
Signed-off-by: Li Nan linan122@huawei.com Reviewed-by: Yu Kuai yukuai3@huawei.com Reviewed-by: Hou Tao houtao1@huawei.com --- drivers/md/raid10.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-)
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 4bdf14f459fe..e298903aa72f 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -753,9 +753,11 @@ static struct md_rdev *read_balance(struct r10conf *conf, disk = r10_bio->devs[slot].devnum; rdev = rcu_dereference(conf->mirrors[disk].replacement); if (rdev == NULL || test_bit(Faulty, &rdev->flags) || + test_bit(WantRemove, &rdev->flags) || r10_bio->devs[slot].addr + sectors > rdev->recovery_offset) rdev = rcu_dereference(conf->mirrors[disk].rdev); if (rdev == NULL || + test_bit(WantRemove, &rdev->flags) || test_bit(Faulty, &rdev->flags)) continue; if (!test_bit(In_sync, &rdev->flags) && @@ -1376,9 +1378,11 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio, blocked_rdev = rrdev; break; } - if (rdev && (test_bit(Faulty, &rdev->flags))) + if (rdev && (test_bit(Faulty, &rdev->flags) || + test_bit(WantRemove, &rdev->flags))) rdev = NULL; - if (rrdev && (test_bit(Faulty, &rrdev->flags))) + if (rrdev && (test_bit(Faulty, &rrdev->flags) || + test_bit(WantRemove, &rrdev->flags))) rrdev = NULL;
r10_bio->devs[i].bio = NULL; @@ -1790,6 +1794,7 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev) continue; clear_bit(In_sync, &rdev->flags); set_bit(Replacement, &rdev->flags); + clear_bit(WantRemove, &rdev->flags); rdev->raid_disk = mirror; err = 0; if (mddev->gendisk) @@ -1807,6 +1812,7 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev) p->head_position = 0; p->recovery_disabled = mddev->recovery_disabled - 1; rdev->raid_disk = mirror; + clear_bit(WantRemove, &rdev->flags); err = 0; if (rdev->saved_raid_disk != mirror) conf->fullsync = 1; @@ -1855,16 +1861,22 @@ static int raid10_remove_disk(struct mddev *mddev, struct md_rdev *rdev) err = -EBUSY; goto abort; } - *rdevp = NULL; + /* + * Before set p->rdev = NULL, we set WantRemove bit avoiding + * race between rdev remove and issue bio, which can cause + * NULL pointer deference of rdev by conf->mirrors[i].rdev. + */ + set_bit(WantRemove, &rdev->flags); if (!test_bit(RemoveSynchronized, &rdev->flags)) { synchronize_rcu(); if (atomic_read(&rdev->nr_pending)) { /* lost the race, try later */ err = -EBUSY; - *rdevp = rdev; + clear_bit(WantRemove, &rdev->flags); goto abort; } } + *rdevp = NULL; if (p->replacement) { /* We must have just cleared 'rdev' */ p->rdev = p->replacement;