From: Zhong Jinghua zhongjinghua@huawei.com
hulk inclusion category: bugfix bugzilla: 188586, https://gitee.com/openeuler/kernel/issues/I6TFPJ CVE: NA
----------------------------------------
We found that in loop_control_ioctl, the kernel panic can be easily caused:
1. syscall(__NR_ioctl, r[1], 0x4c80, 0x80000200000ul); Create a loop device 0x80000200000ul. In fact, in the code, it is used as the first_minor number, and the first_minor number is 0. So the created loop device number is 7:0.
2. syscall(__NR_ioctl, r[2], 0x4c80, 0ul); Create a loop device 0x0ul. Since the 7:0 device has been created in 1, add_disk will fail because the major and first_minor numbers are consistent.
3. syscall(__NR_ioctl, r[5], 0x4c81, 0ul); Delete the device that failed to create, the kernel panics.
Panic like below: BUG: KASAN: null-ptr-deref in device_del+0xb3/0x840 drivers/base/core.c:3107 Call Trace: kill_device drivers/base/core.c:3079 [inline] device_del+0xb3/0x840 drivers/base/core.c:3107 del_gendisk+0x463/0x5f0 block/genhd.c:971 loop_remove drivers/block/loop.c:2190 [inline] loop_control_ioctl drivers/block/loop.c:2289 [inline]
The stack like below: Create loop device: loop_control_ioctl loop_add add_disk device_add_disk bdi_register bdi_register_va device_create device_create_groups_vargs device_add kfree(dev->p); dev->p = NULL;
Remove loop device: loop_control_ioctl loop_remove del_gendisk device_del kill_device if (dev->p->dead) // p is null
Fix it by adding a check for parm.
Fixes: 770fe30a46a1 ("loop: add management interface for on-demand device allocation") Signed-off-by: Zhong Jinghua zhongjinghua@huawei.com Reviewed-by: Yu Kuai yukuai3@huawei.com Reviewed-by: Hou Tao houtao1@huawei.com Signed-off-by: Yongqiang Liu liuyongqiang13@huawei.com --- drivers/block/loop.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 108a4ff27bcd..826633aa328c 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1972,6 +1972,17 @@ static int loop_add(struct loop_device **l, int i) struct gendisk *disk; int err;
+ /* + * i << part_shift is actually used as the first_minor. + * So here should avoid i << part_shift overflow. + * And, MKDEV() expect that the max bits of + * first_minor is 20. + */ + if (i > 0 && i > MINORMASK >> part_shift) { + err = -EINVAL; + goto out; + } + err = -ENOMEM; lo = kzalloc(sizeof(*lo), GFP_KERNEL); if (!lo) @@ -1985,7 +1996,8 @@ static int loop_add(struct loop_device **l, int i) if (err == -ENOSPC) err = -EEXIST; } else { - err = idr_alloc(&loop_index_idr, lo, 0, 0, GFP_KERNEL); + err = idr_alloc(&loop_index_idr, lo, 0, + (MINORMASK >> part_shift) + 1, GFP_KERNEL); } if (err < 0) goto out_free_dev;
From: Logan Gunthorpe logang@deltatee.com
mainline inclusion from mainline-v6.0-rc1 commit eac58d08d4937d2eab8f71c663d98d0759845bde category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I6SJI1 CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
Comments in the code document special values used for mddev->curr_resync. Make this clearer by using an enum to label these values.
The only functional change is a couple places use the wrong comparison operator that implied 3 is another special value. They are all fixed to imply that 3 or greater is an active resync.
Signed-off-by: Logan Gunthorpe logang@deltatee.com Reviewed-by: Christoph Hellwig hch@lst.de Signed-off-by: Song Liu song@kernel.org Signed-off-by: Jens Axboe axboe@kernel.dk Signed-off-by: Yu Kuai yukuai3@huawei.com Reviewed-by: Hou Tao houtao1@huawei.com Signed-off-by: Yongqiang Liu liuyongqiang13@huawei.com --- drivers/md/md.c | 40 ++++++++++++++++++---------------------- drivers/md/md.h | 15 +++++++++++++++ 2 files changed, 33 insertions(+), 22 deletions(-)
diff --git a/drivers/md/md.c b/drivers/md/md.c index 629d3f346128..718731b68ff3 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -4850,7 +4850,7 @@ static ssize_t sync_speed_show(struct mddev *mddev, char *page) { unsigned long resync, dt, db; - if (mddev->curr_resync == 0) + if (mddev->curr_resync == MD_RESYNC_NONE) return sprintf(page, "none\n"); resync = mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active); dt = (jiffies - mddev->resync_mark) / HZ; @@ -4869,8 +4869,8 @@ sync_completed_show(struct mddev *mddev, char *page) if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) return sprintf(page, "none\n");
- if (mddev->curr_resync == 1 || - mddev->curr_resync == 2) + if (mddev->curr_resync == MD_RESYNC_YIELDED || + mddev->curr_resync == MD_RESYNC_DELAYED) return sprintf(page, "delayed\n");
if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) || @@ -7774,7 +7774,7 @@ static int status_resync(struct seq_file *seq, struct mddev *mddev) max_sectors = mddev->dev_sectors;
resync = mddev->curr_resync; - if (resync <= 3) { + if (resync < MD_RESYNC_ACTIVE) { if (test_bit(MD_RECOVERY_DONE, &mddev->recovery)) /* Still cleaning up */ resync = max_sectors; @@ -7783,7 +7783,7 @@ static int status_resync(struct seq_file *seq, struct mddev *mddev) else resync -= atomic_read(&mddev->recovery_active);
- if (resync == 0) { + if (resync == MD_RESYNC_NONE) { if (test_bit(MD_RESYNCING_REMOTE, &mddev->recovery)) { struct md_rdev *rdev;
@@ -7807,7 +7807,7 @@ static int status_resync(struct seq_file *seq, struct mddev *mddev) } return 0; } - if (resync < 3) { + if (resync < MD_RESYNC_ACTIVE) { seq_printf(seq, "\tresync=DELAYED"); return 1; } @@ -8412,13 +8412,7 @@ void md_do_sync(struct md_thread *thread)
mddev->last_sync_action = action ?: desc;
- /* we overload curr_resync somewhat here. - * 0 == not engaged in resync at all - * 2 == checking that there is no conflict with another sync - * 1 == like 2, but have yielded to allow conflicting resync to - * commense - * other == active in resync - this many blocks - * + /* * Before starting a resync we must have set curr_resync to * 2, and then checked that every "conflicting" array has curr_resync * less than ours. When we find one that is the same or higher @@ -8430,7 +8424,7 @@ void md_do_sync(struct md_thread *thread)
do { int mddev2_minor = -1; - mddev->curr_resync = 2; + mddev->curr_resync = MD_RESYNC_DELAYED;
try_again: if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) @@ -8442,12 +8436,14 @@ void md_do_sync(struct md_thread *thread) && mddev2->curr_resync && match_mddev_units(mddev, mddev2)) { DEFINE_WAIT(wq); - if (mddev < mddev2 && mddev->curr_resync == 2) { + if (mddev < mddev2 && + mddev->curr_resync == MD_RESYNC_DELAYED) { /* arbitrarily yield */ - mddev->curr_resync = 1; + mddev->curr_resync = MD_RESYNC_YIELDED; wake_up(&resync_wait); } - if (mddev > mddev2 && mddev->curr_resync == 1) + if (mddev > mddev2 && + mddev->curr_resync == MD_RESYNC_YIELDED) /* no need to wait here, we can wait the next * time 'round when curr_resync == 2 */ @@ -8475,7 +8471,7 @@ void md_do_sync(struct md_thread *thread) finish_wait(&resync_wait, &wq); } } - } while (mddev->curr_resync < 2); + } while (mddev->curr_resync < MD_RESYNC_DELAYED);
j = 0; if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { @@ -8551,7 +8547,7 @@ void md_do_sync(struct md_thread *thread) desc, mdname(mddev)); mddev->curr_resync = j; } else - mddev->curr_resync = 3; /* no longer delayed */ + mddev->curr_resync = MD_RESYNC_ACTIVE; /* no longer delayed */ mddev->curr_resync_completed = j; sysfs_notify_dirent_safe(mddev->sysfs_completed); md_new_event(mddev); @@ -8686,14 +8682,14 @@ void md_do_sync(struct md_thread *thread)
if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && !test_bit(MD_RECOVERY_INTR, &mddev->recovery) && - mddev->curr_resync > 3) { + mddev->curr_resync >= MD_RESYNC_ACTIVE) { mddev->curr_resync_completed = mddev->curr_resync; sysfs_notify_dirent_safe(mddev->sysfs_completed); } mddev->pers->sync_request(mddev, max_sectors, &skipped);
if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) && - mddev->curr_resync > 3) { + mddev->curr_resync >= MD_RESYNC_ACTIVE) { if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { if (mddev->curr_resync >= mddev->recovery_cp) { @@ -8756,7 +8752,7 @@ void md_do_sync(struct md_thread *thread) } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) mddev->resync_min = mddev->curr_resync_completed; set_bit(MD_RECOVERY_DONE, &mddev->recovery); - mddev->curr_resync = 0; + mddev->curr_resync = MD_RESYNC_NONE; spin_unlock(&mddev->lock);
wake_up(&resync_wait); diff --git a/drivers/md/md.h b/drivers/md/md.h index 422af63f1e1e..916b4ff4d9e0 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -270,6 +270,21 @@ enum mddev_sb_flags { MD_SB_NEED_REWRITE, /* metadata write needs to be repeated */ };
+/* + * mddev->curr_resync stores the current sector of the resync but + * also has some overloaded values. + */ +enum { + /* No resync in progress */ + MD_RESYNC_NONE = 0, + /* Yielded to allow another conflicting resync to commence */ + MD_RESYNC_YIELDED = 1, + /* Delayed to check that there is no conflict with another sync */ + MD_RESYNC_DELAYED = 2, + /* Any value greater than or equal to this is in an active resync */ + MD_RESYNC_ACTIVE = 3, +}; + struct mddev { void *private; struct md_personality *pers;
From: Logan Gunthorpe logang@deltatee.com
mainline inclusion from mainline-v6.0-rc1 commit b368856aab02c8fcaabb809aad401b2cf96504f2 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I6SJI1 CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
The 07layouts test in mdadm fails on some systems. The failure presents itself as the backup file not being removed before the next layout is grown into:
mdadm: /dev/md0: cannot create backup file /tmp/md-test-backup: File exists
This is because the background mdadm process, which is responsible for cleaning up this backup file gets into an infinite loop waiting for the reshape to start. mdadm checks the mdstat file if a reshape is going and, if it is not, it waits for an event on the file or times out in 5 seconds. On faster machines, the reshape may complete before the 5 seconds times out, and thus the background mdadm process loops waiting for a reshape to start that has already occurred.
mdadm reads the mdstat file to start, but mdstat does not report that the reshape has begun, even though it has indeed begun. So the mdstat_wait() call (in mdadm) which polls on the mdstat file won't ever return until timing out.
The reason mdstat reports the reshape has started is due to an issue in status_resync(). recovery_active is subtracted from curr_resync which will result in a value of zero for the first chunk of reshaped data, and the resulting read will report no reshape in progress.
To fix this, if "resync - recovery_active" is an overloaded value, force the value to be MD_RESYNC_ACTIVE so the code reports a resync in progress.
Signed-off-by: Logan Gunthorpe logang@deltatee.com Reviewed-by: Christoph Hellwig hch@lst.de Signed-off-by: Song Liu song@kernel.org Signed-off-by: Jens Axboe axboe@kernel.dk Signed-off-by: Yu Kuai yukuai3@huawei.com Reviewed-by: Hou Tao houtao1@huawei.com Signed-off-by: Yongqiang Liu liuyongqiang13@huawei.com --- drivers/md/md.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-)
diff --git a/drivers/md/md.c b/drivers/md/md.c index 718731b68ff3..60f835c6ed3b 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -7778,10 +7778,20 @@ static int status_resync(struct seq_file *seq, struct mddev *mddev) if (test_bit(MD_RECOVERY_DONE, &mddev->recovery)) /* Still cleaning up */ resync = max_sectors; - } else if (resync > max_sectors) + } else if (resync > max_sectors) { resync = max_sectors; - else + } else { resync -= atomic_read(&mddev->recovery_active); + if (resync < MD_RESYNC_ACTIVE) { + /* + * Resync has started, but the subtraction has + * yielded one of the special values. Force it + * to active to ensure the status reports an + * active resync. + */ + resync = MD_RESYNC_ACTIVE; + } + }
if (resync == MD_RESYNC_NONE) { if (test_bit(MD_RESYNCING_REMOTE, &mddev->recovery)) {
From: Hou Tao houtao1@huawei.com
mainline inclusion from mainline-v6.3-rc1 commit 1d1f25bfda432a6b61bd0205d426226bbbd73504 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I6SJI1 CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
Don't update recovery_cp when curr_resync is MD_RESYNC_ACTIVE, otherwise md may skip the resync of the first 3 sectors if the resync procedure is interrupted before the first calling of ->sync_request() as shown below:
md_do_sync thread control thread // setup resync mddev->recovery_cp = 0 j = 0 mddev->curr_resync = MD_RESYNC_ACTIVE
// e.g., set array as idle set_bit(MD_RECOVERY_INTR, &&mddev_recovery) // resync loop // check INTR before calling sync_request !test_bit(MD_RECOVERY_INTR, &mddev->recovery
// resync interrupted // update recovery_cp from 0 to 3 // the resync of three 3 sectors will be skipped mddev->recovery_cp = 3
Fixes: eac58d08d493 ("md: Use enum for overloaded magic numbers used by mddev->curr_resync") Cc: stable@vger.kernel.org # 6.0+ Signed-off-by: Hou Tao houtao1@huawei.com Reviewed-by: Logan Gunthorpe logang@deltatee.com Signed-off-by: Song Liu song@kernel.org Signed-off-by: Yu Kuai yukuai3@huawei.com Reviewed-by: Hou Tao houtao1@huawei.com Signed-off-by: Yongqiang Liu liuyongqiang13@huawei.com --- drivers/md/md.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/md/md.c b/drivers/md/md.c index 60f835c6ed3b..365ca3f14972 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -8699,7 +8699,7 @@ void md_do_sync(struct md_thread *thread) mddev->pers->sync_request(mddev, max_sectors, &skipped);
if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) && - mddev->curr_resync >= MD_RESYNC_ACTIVE) { + mddev->curr_resync > MD_RESYNC_ACTIVE) { if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { if (mddev->curr_resync >= mddev->recovery_cp) {
From: Yu Kuai yukuai3@huawei.com
Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I6SJI1 CVE: NA
--------------------------------
status_resync() will calculate 'curr_resync - recovery_active' to show user a progress bar like following:
[============>........] resync = 61.4%
'curr_resync' and 'recovery_active' is updated in md_do_sync(), and status_resync() can read them concurrently, hence it's possible that 'curr_resync - recovery_active' can overflow to a huge number. In this case status_resync() will be stuck in the loop to print a large amount of '=', which will end up soft lockup.
Fix the problem by setting 'resync' to MD_RESYNC_ACTIVE in this case, this way resync in progress will be reported to user.
Signed-off-by: Yu Kuai yukuai3@huawei.com Reviewed-by: Hou Tao houtao1@huawei.com Signed-off-by: Yongqiang Liu liuyongqiang13@huawei.com --- drivers/md/md.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-)
diff --git a/drivers/md/md.c b/drivers/md/md.c index 365ca3f14972..2532780d0dca 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -7781,16 +7781,16 @@ static int status_resync(struct seq_file *seq, struct mddev *mddev) } else if (resync > max_sectors) { resync = max_sectors; } else { - resync -= atomic_read(&mddev->recovery_active); - if (resync < MD_RESYNC_ACTIVE) { - /* - * Resync has started, but the subtraction has - * yielded one of the special values. Force it - * to active to ensure the status reports an - * active resync. - */ + res = atomic_read(&mddev->recovery_active); + /* + * Resync has started, but the subtraction has overflowed or + * yielded one of the special values. Force it to active to + * ensure the status reports an active resync. + */ + if (resync < res || resync - res < MD_RESYNC_ACTIVE) resync = MD_RESYNC_ACTIVE; - } + else + resync -= res; }
if (resync == MD_RESYNC_NONE) {