From: Li Nan linan122@huawei.com
hulk inclusion category: bugfix bugzilla: 188377, https://gitee.com/openeuler/kernel/issues/I6GOYF CVE: NA
--------------------------------
After commit 4ca40c2ce099 ("md/raid10: Allow replacement device to be replace old drive.") mirrors->replacement can replace rdev during replacement's io pending, and repl_bio will write rdev (see raid10_write_one_disk()). We will get wrong device by r10conf in raid10_end_write_request(). In which case, r10_bio->devs[slot].repl_bio will be put but not set to IO_MADE_GOOD, and it will be put again later in raid_end_bio_io(), uaf occurs.
Fix it by using r10_bio to record rdev. Put the operations of io fail and no replacement together, so no need to change repl.
================================================================== BUG: KASAN: use-after-free in bio_flagged include/linux/bio.h:238 [inline] BUG: KASAN: use-after-free in bio_put+0x78/0x80 block/bio.c:650 Read of size 2 at addr ffff888116524dd4 by task md0_raid10/2618
CPU: 0 PID: 2618 Comm: md0_raid10 Not tainted 5.10.0+ #3 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014 sd 0:0:0:0: rejecting I/O to offline device Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x107/0x167 lib/dump_stack.c:118 print_address_description.constprop.0+0x1c/0x270 mm/kasan/report.c:390 __kasan_report mm/kasan/report.c:550 [inline] kasan_report.cold+0x22/0x3a mm/kasan/report.c:567 bio_flagged include/linux/bio.h:238 [inline] bio_put+0x78/0x80 block/bio.c:650 put_all_bios drivers/md/raid10.c:248 [inline] free_r10bio drivers/md/raid10.c:257 [inline] raid_end_bio_io+0x3b5/0x590 drivers/md/raid10.c:309 handle_write_completed drivers/md/raid10.c:2699 [inline] raid10d+0x2f85/0x5af0 drivers/md/raid10.c:2759 md_thread+0x444/0x4b0 drivers/md/md.c:7932 kthread+0x38c/0x470 kernel/kthread.c:313 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:299
Allocated by task 1400: kasan_save_stack+0x1b/0x40 mm/kasan/common.c:48 kasan_set_track mm/kasan/common.c:56 [inline] set_alloc_info mm/kasan/common.c:498 [inline] __kasan_kmalloc.constprop.0+0xb5/0xe0 mm/kasan/common.c:530 slab_post_alloc_hook mm/slab.h:512 [inline] slab_alloc_node mm/slub.c:2923 [inline] slab_alloc mm/slub.c:2931 [inline] kmem_cache_alloc+0x144/0x360 mm/slub.c:2936 mempool_alloc+0x146/0x360 mm/mempool.c:391 bio_alloc_bioset+0x375/0x610 block/bio.c:486 bio_clone_fast+0x20/0x50 block/bio.c:711 raid10_write_one_disk+0x166/0xd30 drivers/md/raid10.c:1240 raid10_write_request+0x1600/0x2c90 drivers/md/raid10.c:1484 __make_request drivers/md/raid10.c:1508 [inline] raid10_make_request+0x376/0x620 drivers/md/raid10.c:1537 md_handle_request+0x699/0x970 drivers/md/md.c:451 md_submit_bio+0x204/0x400 drivers/md/md.c:489 __submit_bio block/blk-core.c:959 [inline] __submit_bio_noacct block/blk-core.c:1007 [inline] submit_bio_noacct+0x2e3/0xcf0 block/blk-core.c:1086 submit_bio+0x1a0/0x3a0 block/blk-core.c:1146 submit_bh_wbc+0x685/0x8e0 fs/buffer.c:3053 ext4_commit_super+0x37e/0x6c0 fs/ext4/super.c:5696 flush_stashed_error_work+0x28b/0x400 fs/ext4/super.c:791 process_one_work+0x9a6/0x1590 kernel/workqueue.c:2280 worker_thread+0x61d/0x1310 kernel/workqueue.c:2426 kthread+0x38c/0x470 kernel/kthread.c:313 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:299
Freed by task 2618: kasan_save_stack+0x1b/0x40 mm/kasan/common.c:48 kasan_set_track+0x1c/0x30 mm/kasan/common.c:56 kasan_set_free_info+0x20/0x40 mm/kasan/generic.c:361 __kasan_slab_free+0x151/0x180 mm/kasan/common.c:482 slab_free_hook mm/slub.c:1569 [inline] slab_free_freelist_hook+0xa9/0x180 mm/slub.c:1608 slab_free mm/slub.c:3179 [inline] kmem_cache_free+0xcd/0x3d0 mm/slub.c:3196 mempool_free+0xe3/0x3b0 mm/mempool.c:500 bio_free+0xe2/0x140 block/bio.c:266 bio_put+0x58/0x80 block/bio.c:651 raid10_end_write_request+0x885/0xb60 drivers/md/raid10.c:516 bio_endio+0x376/0x6a0 block/bio.c:1465 req_bio_endio block/blk-core.c:289 [inline] blk_update_request+0x5f5/0xf40 block/blk-core.c:1525 blk_mq_end_request+0x4c/0x510 block/blk-mq.c:654 blk_flush_complete_seq+0x835/0xd80 block/blk-flush.c:204 flush_end_io+0x7b7/0xb90 block/blk-flush.c:261 __blk_mq_end_request+0x282/0x4c0 block/blk-mq.c:645 scsi_end_request+0x3a8/0x850 drivers/scsi/scsi_lib.c:607 scsi_io_completion+0x3f5/0x1320 drivers/scsi/scsi_lib.c:970 scsi_softirq_done+0x11b/0x490 drivers/scsi/scsi_lib.c:1448 blk_mq_complete_request block/blk-mq.c:788 [inline] blk_mq_complete_request+0x84/0xb0 block/blk-mq.c:785 scsi_mq_done+0x155/0x360 drivers/scsi/scsi_lib.c:1603 virtscsi_vq_done drivers/scsi/virtio_scsi.c:184 [inline] virtscsi_req_done+0x14c/0x220 drivers/scsi/virtio_scsi.c:199 vring_interrupt drivers/virtio/virtio_ring.c:2061 [inline] vring_interrupt+0x27a/0x300 drivers/virtio/virtio_ring.c:2047 __handle_irq_event_percpu+0x2f8/0x830 kernel/irq/handle.c:156 handle_irq_event_percpu kernel/irq/handle.c:196 [inline] handle_irq_event+0x105/0x280 kernel/irq/handle.c:213 handle_edge_irq+0x258/0xd20 kernel/irq/chip.c:828 asm_call_irq_on_stack+0xf/0x20 __run_irq_on_irqstack arch/x86/include/asm/irq_stack.h:48 [inline] run_irq_on_irqstack_cond arch/x86/include/asm/irq_stack.h:101 [inline] handle_irq arch/x86/kernel/irq.c:230 [inline] __common_interrupt arch/x86/kernel/irq.c:249 [inline] common_interrupt+0xe2/0x190 arch/x86/kernel/irq.c:239 asm_common_interrupt+0x1e/0x40 arch/x86/include/asm/idtentry.h:626
Fixes: 4ca40c2ce099 ("md/raid10: Allow replacement device to be replace old drive.") Signed-off-by: Li Nan linan122@huawei.com Reviewed-by: Hou Tao houtao1@huawei.com --- drivers/md/raid10.c | 86 +++++++++++++++++++++++++-------------------- drivers/md/raid10.h | 12 +++---- 2 files changed, 53 insertions(+), 45 deletions(-)
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 38f4d427a919..878e7b07fc48 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -441,47 +441,52 @@ static void raid10_end_write_request(struct bio *bio)
dev = find_bio_disk(conf, r10_bio, bio, &slot, &repl);
- if (repl) - rdev = conf->mirrors[dev].replacement; - if (!rdev) { - smp_rmb(); - repl = 0; - rdev = conf->mirrors[dev].rdev; + if (repl) { + rdev = r10_bio->devs[slot].replacement; + if (rdev == conf->mirrors[dev].replacement) { + if (bio->bi_status && !discard_error) { + /* + * Never record new bad blocks to replacement, + * just fail it. + */ + md_error(rdev->mddev, rdev); + goto out; + } + } else { + WARN_ON_ONCE(rdev != conf->mirrors[dev].rdev); + } + } else { + rdev = r10_bio->devs[slot].rdev; } /* * this branch is our 'one mirror IO has finished' event handler: */ if (bio->bi_status && !discard_error) { - if (repl) - /* Never record new bad blocks to replacement, - * just fail it. - */ - md_error(rdev->mddev, rdev); - else { - set_bit(WriteErrorSeen, &rdev->flags); - if (!test_and_set_bit(WantReplacement, &rdev->flags)) - set_bit(MD_RECOVERY_NEEDED, - &rdev->mddev->recovery); + set_bit(WriteErrorSeen, &rdev->flags); + if (!test_and_set_bit(WantReplacement, &rdev->flags)) + set_bit(MD_RECOVERY_NEEDED, + &rdev->mddev->recovery);
- dec_rdev = 0; - if (test_bit(FailFast, &rdev->flags) && - (bio->bi_opf & MD_FAILFAST)) { - md_error(rdev->mddev, rdev); - } + dec_rdev = 0; + if (test_bit(FailFast, &rdev->flags) && + (bio->bi_opf & MD_FAILFAST)) + md_error(rdev->mddev, rdev);
- /* - * When the device is faulty, it is not necessary to - * handle write error. - */ - if (!test_bit(Faulty, &rdev->flags)) - set_bit(R10BIO_WriteError, &r10_bio->state); - else { - /* Fail the request */ - set_bit(R10BIO_Degraded, &r10_bio->state); + /* + * When the device is faulty, it is not necessary to + * handle write error. + */ + if (!test_bit(Faulty, &rdev->flags)) { + set_bit(R10BIO_WriteError, &r10_bio->state); + } else { + /* Fail the request */ + set_bit(R10BIO_Degraded, &r10_bio->state); + if (repl) + r10_bio->devs[slot].repl_bio = NULL; + else r10_bio->devs[slot].bio = NULL; - to_put = bio; - dec_rdev = 1; - } + to_put = bio; + dec_rdev = 1; } } else { /* @@ -513,16 +518,17 @@ static void raid10_end_write_request(struct bio *bio) r10_bio->devs[slot].addr, r10_bio->sectors, &first_bad, &bad_sectors) && !discard_error) { - bio_put(bio); if (repl) r10_bio->devs[slot].repl_bio = IO_MADE_GOOD; else r10_bio->devs[slot].bio = IO_MADE_GOOD; + bio_put(bio); dec_rdev = 0; set_bit(R10BIO_MadeGood, &r10_bio->state); } }
+out: /* * * Let's see if all mirrored write operations have finished @@ -1259,10 +1265,13 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio, rdev = conf->mirrors[devnum].rdev;
mbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set); - if (replacement) + if (replacement) { r10_bio->devs[n_copy].repl_bio = mbio; - else + r10_bio->devs[n_copy].replacement = rdev; + } else { r10_bio->devs[n_copy].bio = mbio; + r10_bio->devs[n_copy].rdev = rdev; + }
mbio->bi_iter.bi_sector = (r10_bio->devs[n_copy].addr + choose_data_offset(r10_bio, rdev)); @@ -2703,9 +2712,8 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio) } else { bool fail = false; for (m = 0; m < conf->copies; m++) { - int dev = r10_bio->devs[m].devnum; struct bio *bio = r10_bio->devs[m].bio; - rdev = conf->mirrors[dev].rdev; + rdev = r10_bio->devs[m].rdev; if (bio == IO_MADE_GOOD) { rdev_clear_badblocks( rdev, @@ -2722,7 +2730,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio) rdev_dec_pending(rdev, conf->mddev); } bio = r10_bio->devs[m].repl_bio; - rdev = conf->mirrors[dev].replacement; + rdev = r10_bio->devs[m].replacement; if (rdev && bio == IO_MADE_GOOD) { rdev_clear_badblocks( rdev, diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h index 5420250d4bd6..73d243e12363 100644 --- a/drivers/md/raid10.h +++ b/drivers/md/raid10.h @@ -146,12 +146,12 @@ struct r10bio { */ struct r10dev { struct bio *bio; - union { - struct bio *repl_bio; /* used for resync and - * writes */ - struct md_rdev *rdev; /* used for reads - * (read_slot >= 0) */ - }; + /* Currently just used for normal reads and writes */ + struct md_rdev *rdev; + /* used for resync and writes */ + struct bio *repl_bio; + /* Currently just used for normal writes */ + struct md_rdev *replacement; sector_t addr; int devnum; } devs[];