From: Yu Kuai yukuai3@huawei.com
mainline inclusion from mainline-v6.7-rc1 commit b8494823e236326500aa1004155e83f748dd10da category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I8T02O
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
Currently 'writes_pending' is initialized in pers->run for raid1/5/10, and it's freed while deleing mddev, instead of pers->free. pers->run can be called multiple times before mddev is deleted, and a helper mddev_init_writes_pending() is used to prevent 'writes_pending' to be initialized multiple times, this usage is safe but a litter weird.
On the other hand, 'writes_pending' is only initialized for raid1/5/10, however, it's used in common layer, for example:
array_state_store set_in_sync if (!mddev->in_sync) -> in_sync is used for all levels // access writes_pending
There might be some implicit dependency that I don't recognized to make sure 'writes_pending' can only be accessed for raid1/5/10, but there are no comments about that.
By the way, it make sense to initialize 'writes_pending' in common layer because there are already three levels use it.
Signed-off-by: Yu Kuai yukuai3@huawei.com Signed-off-by: Song Liu song@kernel.org Link: https://lore.kernel.org/r/20230825030956.1527023-3-yukuai1@huaweicloud.com Signed-off-by: Li Nan linan122@huawei.com --- drivers/md/md.h | 1 - drivers/md/md.c | 29 ++++++++++++----------------- drivers/md/raid1.c | 3 +-- drivers/md/raid10.c | 3 --- drivers/md/raid5.c | 3 --- 5 files changed, 13 insertions(+), 26 deletions(-)
diff --git a/drivers/md/md.h b/drivers/md/md.h index bd5b5db1e579..eca6dd41ad73 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -769,7 +769,6 @@ extern void md_unregister_thread(struct mddev *mddev, struct md_thread __rcu **t extern void md_wakeup_thread(struct md_thread __rcu *thread); extern void md_check_recovery(struct mddev *mddev); extern void md_reap_sync_thread(struct mddev *mddev); -extern int mddev_init_writes_pending(struct mddev *mddev); extern bool md_write_start(struct mddev *mddev, struct bio *bi); extern void md_write_inc(struct mddev *mddev, struct bio *bi); extern void md_write_end(struct mddev *mddev); diff --git a/drivers/md/md.c b/drivers/md/md.c index 3d3887870cb0..f5d93e90277c 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -646,6 +646,8 @@ static void active_io_release(struct percpu_ref *ref) wake_up(&mddev->sb_wait); }
+static void no_op(struct percpu_ref *r) {} + int mddev_init(struct mddev *mddev) {
@@ -653,6 +655,15 @@ int mddev_init(struct mddev *mddev) PERCPU_REF_ALLOW_REINIT, GFP_KERNEL)) return -ENOMEM;
+ if (percpu_ref_init(&mddev->writes_pending, no_op, + PERCPU_REF_ALLOW_REINIT, GFP_KERNEL)) { + percpu_ref_exit(&mddev->active_io); + return -ENOMEM; + } + + /* We want to start with the refcount at zero */ + percpu_ref_put(&mddev->writes_pending); + mutex_init(&mddev->open_mutex); mutex_init(&mddev->reconfig_mutex); mutex_init(&mddev->sync_mutex); @@ -685,6 +696,7 @@ EXPORT_SYMBOL_GPL(mddev_init); void mddev_destroy(struct mddev *mddev) { percpu_ref_exit(&mddev->active_io); + percpu_ref_exit(&mddev->writes_pending); } EXPORT_SYMBOL_GPL(mddev_destroy);
@@ -5621,21 +5633,6 @@ static void mddev_delayed_delete(struct work_struct *ws) kobject_put(&mddev->kobj); }
-static void no_op(struct percpu_ref *r) {} - -int mddev_init_writes_pending(struct mddev *mddev) -{ - if (mddev->writes_pending.percpu_count_ptr) - return 0; - if (percpu_ref_init(&mddev->writes_pending, no_op, - PERCPU_REF_ALLOW_REINIT, GFP_KERNEL) < 0) - return -ENOMEM; - /* We want to start with the refcount at zero */ - percpu_ref_put(&mddev->writes_pending); - return 0; -} -EXPORT_SYMBOL_GPL(mddev_init_writes_pending); - struct mddev *md_alloc(dev_t dev, char *name) { /* @@ -6316,7 +6313,6 @@ void md_stop(struct mddev *mddev) */ __md_stop_writes(mddev); __md_stop(mddev); - percpu_ref_exit(&mddev->writes_pending); }
EXPORT_SYMBOL_GPL(md_stop); @@ -7900,7 +7896,6 @@ static void md_free_disk(struct gendisk *disk) { struct mddev *mddev = disk->private_data;
- percpu_ref_exit(&mddev->writes_pending); mddev_free(mddev); }
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 2aabac773fe7..3a78f79ee6d5 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -3122,8 +3122,7 @@ static int raid1_run(struct mddev *mddev) mdname(mddev)); return -EIO; } - if (mddev_init_writes_pending(mddev) < 0) - return -ENOMEM; + /* * copy the already verified devices into our private RAID1 * bookkeeping area. [whatever we allocate in run(), diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 023413120851..a5927e98dc67 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -4154,9 +4154,6 @@ static int raid10_run(struct mddev *mddev) sector_t min_offset_diff = 0; int first = 1;
- if (mddev_init_writes_pending(mddev) < 0) - return -ENOMEM; - if (mddev->private == NULL) { conf = setup_conf(mddev); if (IS_ERR(conf)) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 284cd71bcc68..0644b83fd3f4 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -7785,9 +7785,6 @@ static int raid5_run(struct mddev *mddev) long long min_offset_diff = 0; int first = 1;
- if (mddev_init_writes_pending(mddev) < 0) - return -ENOMEM; - if (mddev->recovery_cp != MaxSector) pr_notice("md/raid:%s: not clean -- starting background reconstruction\n", mdname(mddev));