From: Zheng Yejian zhengyejian1@huawei.com
hulk inclusion category: bugfix bugzilla: 187209, https://gitee.com/openeuler/kernel/issues/I5GWFT CVE: NA
--------------------------------
Syzkaller report a softlockup problem, see following logs: [ 41.463870] watchdog: BUG: soft lockup - CPU#0 stuck for 22s! [ksoftirqd/0:9] [ 41.509763] Modules linked in: [ 41.512295] CPU: 0 PID: 9 Comm: ksoftirqd/0 Not tainted 4.19.90 #13 [ 41.516134] Hardware name: linux,dummy-virt (DT) [ 41.519182] pstate: 80c00005 (Nzcv daif +PAN +UAO) [ 41.522415] pc : perf_trace_buf_alloc+0x138/0x238 [ 41.525583] lr : perf_trace_buf_alloc+0x138/0x238 [ 41.528656] sp : ffff8000c137e880 [ 41.531050] x29: ffff8000c137e880 x28: ffff20000850ced0 [ 41.534759] x27: 0000000000000000 x26: ffff8000c137e9c0 [ 41.538456] x25: ffff8000ce5c2ae0 x24: ffff200008358b08 [ 41.542151] x23: 0000000000000000 x22: ffff2000084a50ac [ 41.545834] x21: ffff8000c137e880 x20: 000000000000001c [ 41.549516] x19: ffff7dffbfdf88e8 x18: 0000000000000000 [ 41.553202] x17: 0000000000000000 x16: 0000000000000000 [ 41.556892] x15: 1ffff00036e07805 x14: 0000000000000000 [ 41.560592] x13: 0000000000000004 x12: 0000000000000000 [ 41.564315] x11: 1fffefbff7fbf120 x10: ffff0fbff7fbf120 [ 41.568003] x9 : dfff200000000000 x8 : ffff7dffbfdf8904 [ 41.571699] x7 : 0000000000000000 x6 : ffff0fbff7fbf121 [ 41.575398] x5 : ffff0fbff7fbf121 x4 : ffff0fbff7fbf121 [ 41.579086] x3 : ffff20000850cdc8 x2 : 0000000000000008 [ 41.582773] x1 : ffff8000c1376000 x0 : 0000000000000100 [ 41.586495] Call trace: [ 41.588922] perf_trace_buf_alloc+0x138/0x238 [ 41.591912] perf_ftrace_function_call+0x1ac/0x248 [ 41.595123] ftrace_ops_no_ops+0x3a4/0x488 [ 41.597998] ftrace_graph_call+0x0/0xc [ 41.600715] rcu_dynticks_curr_cpu_in_eqs+0x14/0x70 [ 41.603962] rcu_is_watching+0xc/0x20 [ 41.606635] ftrace_ops_no_ops+0x240/0x488 [ 41.609530] ftrace_graph_call+0x0/0xc [ 41.612249] __read_once_size_nocheck.constprop.0+0x1c/0x38 [ 41.615905] unwind_frame+0x140/0x358 [ 41.618597] walk_stackframe+0x34/0x60 [ 41.621359] __save_stack_trace+0x204/0x3b8 [ 41.624328] save_stack_trace+0x2c/0x38 [ 41.627112] __kasan_slab_free+0x120/0x228 [ 41.630018] kasan_slab_free+0x10/0x18 [ 41.632752] kfree+0x84/0x250 [ 41.635107] skb_free_head+0x70/0xb0 [ 41.637772] skb_release_data+0x3f8/0x730 [ 41.640626] skb_release_all+0x50/0x68 [ 41.643350] kfree_skb+0x84/0x278 [ 41.645890] kfree_skb_list+0x4c/0x78 [ 41.648595] __dev_queue_xmit+0x1a4c/0x23a0 [ 41.651541] dev_queue_xmit+0x28/0x38 [ 41.654254] ip6_finish_output2+0xeb0/0x1630 [ 41.657261] ip6_finish_output+0x2d8/0x7f8 [ 41.660174] ip6_output+0x19c/0x348 [ 41.663850] mld_sendpack+0x560/0x9e0 [ 41.666564] mld_ifc_timer_expire+0x484/0x8a8 [ 41.669624] call_timer_fn+0x68/0x4b0 [ 41.672355] expire_timers+0x168/0x498 [ 41.675126] run_timer_softirq+0x230/0x7a8 [ 41.678052] __do_softirq+0x2d0/0xba0 [ 41.680763] run_ksoftirqd+0x110/0x1a0 [ 41.683512] smpboot_thread_fn+0x31c/0x620 [ 41.686429] kthread+0x2c8/0x348 [ 41.688927] ret_from_fork+0x10/0x18
Look into above call stack, we found a recursive call in 'ftrace_graph_call', see a snippet: __read_once_size_nocheck.constprop.0 ftrace_graph_call ...... rcu_dynticks_curr_cpu_in_eqs ftrace_graph_call
We analyze that 'rcu_dynticks_curr_cpu_in_eqs' should not be tracable, and we verify that mark related functions as 'notrace' can avoid the problem.
Comparing mainline kernel, we find that commit ff5c4f5cad33 ("rcu/tree: Mark the idle relevant functions noinstr") mark related functions as 'noinstr' which implies notrace, noinline and sticks things in the .noinstr.text section. Link: https://lore.kernel.org/all/20200416114706.625340212@infradead.org/
Currently 'noinstr' mechanism has not been introduced, so we would not directly backport that commit (otherwise more changes may be introduced). Instead, we mark the functions as 'notrace' where it is 'noinstr' in that commit.
Signed-off-by: Zheng Yejian zhengyejian1@huawei.com Reviewed-by: Zhen Lei thunder.leizhen@huawei.com Signed-off-by: Yongqiang Liu liuyongqiang13@huawei.com --- kernel/rcu/tree.c | 22 +++++++++++----------- kernel/rcu/tree_plugin.h | 4 ++-- 2 files changed, 13 insertions(+), 13 deletions(-)
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 594d6ea99024..ea05c59096a2 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -275,7 +275,7 @@ static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { * Record entry into an extended quiescent state. This is only to be * called when not already in an extended quiescent state. */ -static void rcu_dynticks_eqs_enter(void) +static notrace void rcu_dynticks_eqs_enter(void) { struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); int seq; @@ -298,7 +298,7 @@ static void rcu_dynticks_eqs_enter(void) * Record exit from an extended quiescent state. This is only to be * called from an extended quiescent state. */ -static void rcu_dynticks_eqs_exit(void) +static notrace void rcu_dynticks_eqs_exit(void) { struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); int seq; @@ -343,7 +343,7 @@ static void rcu_dynticks_eqs_online(void) * * No ordering, as we are sampling CPU-local information. */ -bool rcu_dynticks_curr_cpu_in_eqs(void) +static __always_inline bool rcu_dynticks_curr_cpu_in_eqs(void) { struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
@@ -706,7 +706,7 @@ static struct rcu_node *rcu_get_root(struct rcu_state *rsp) * the possibility of usermode upcalls having messed up our count * of interrupt nesting level during the prior busy period. */ -static void rcu_eqs_enter(bool user) +static notrace void rcu_eqs_enter(bool user) { struct rcu_state *rsp; struct rcu_data *rdp; @@ -763,7 +763,7 @@ void rcu_idle_enter(void) * If you add or remove a call to rcu_user_enter(), be sure to test with * CONFIG_RCU_EQS_DEBUG=y. */ -void rcu_user_enter(void) +notrace void rcu_user_enter(void) { lockdep_assert_irqs_disabled(); rcu_eqs_enter(true); @@ -781,7 +781,7 @@ void rcu_user_enter(void) * If you add or remove a call to rcu_nmi_exit(), be sure to test * with CONFIG_RCU_EQS_DEBUG=y. */ -void rcu_nmi_exit(void) +notrace void rcu_nmi_exit(void) { struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
@@ -829,7 +829,7 @@ void rcu_nmi_exit(void) * If you add or remove a call to rcu_irq_exit(), be sure to test with * CONFIG_RCU_EQS_DEBUG=y. */ -void rcu_irq_exit(void) +notrace void rcu_irq_exit(void) { struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
@@ -864,7 +864,7 @@ void rcu_irq_exit_irqson(void) * allow for the possibility of usermode upcalls messing up our count of * interrupt nesting level during the busy period that is just now starting. */ -static void rcu_eqs_exit(bool user) +static notrace void rcu_eqs_exit(bool user) { struct rcu_dynticks *rdtp; long oldval; @@ -914,7 +914,7 @@ void rcu_idle_exit(void) * If you add or remove a call to rcu_user_exit(), be sure to test with * CONFIG_RCU_EQS_DEBUG=y. */ -void rcu_user_exit(void) +void notrace rcu_user_exit(void) { rcu_eqs_exit(1); } @@ -932,7 +932,7 @@ void rcu_user_exit(void) * If you add or remove a call to rcu_nmi_enter(), be sure to test * with CONFIG_RCU_EQS_DEBUG=y. */ -void rcu_nmi_enter(void) +notrace void rcu_nmi_enter(void) { struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); long incby = 2; @@ -982,7 +982,7 @@ void rcu_nmi_enter(void) * If you add or remove a call to rcu_irq_enter(), be sure to test with * CONFIG_RCU_EQS_DEBUG=y. */ -void rcu_irq_enter(void) +notrace void rcu_irq_enter(void) { struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 5f6de49dc78e..568818bef28f 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -2677,7 +2677,7 @@ static void rcu_bind_gp_kthread(void) }
/* Record the current task on dyntick-idle entry. */ -static void rcu_dynticks_task_enter(void) +static notrace void rcu_dynticks_task_enter(void) { #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) WRITE_ONCE(current->rcu_tasks_idle_cpu, smp_processor_id()); @@ -2685,7 +2685,7 @@ static void rcu_dynticks_task_enter(void) }
/* Record no current task on dyntick-idle exit. */ -static void rcu_dynticks_task_exit(void) +static notrace void rcu_dynticks_task_exit(void) { #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) WRITE_ONCE(current->rcu_tasks_idle_cpu, -1);
From: Yu Kuai yukuai3@huawei.com
hulk inclusion category: bugfix bugzilla: 187190, https://gitee.com/src-openeuler/kernel/issues/I5GWOV CVE: NA
--------------------------------
This reverts commit 64ba823fd4272f7d624bbfb4bd6e3c89aa6f423c.
The patches that broke kabi will be reverted together.
Signed-off-by: Yu Kuai yukuai3@huawei.com Reviewed-by: Jason Yan yanaijie@huawei.com Signed-off-by: Yongqiang Liu liuyongqiang13@huawei.com --- include/linux/fs.h | 1 - include/linux/genhd.h | 7 +++---- 2 files changed, 3 insertions(+), 5 deletions(-)
diff --git a/include/linux/fs.h b/include/linux/fs.h index bcd2131ca06c..025b98fbab05 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -479,7 +479,6 @@ struct block_device { struct hd_struct * bd_part; /* number of times partitions within this device have been opened. */ unsigned bd_part_count; - int bd_invalidated; struct gendisk * bd_disk; struct request_queue * bd_queue; struct backing_dev_info *bd_bdi; diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 50c76a59d7e2..f3fc01f0e84c 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -180,8 +180,6 @@ struct blk_integrity {
#endif /* CONFIG_BLK_DEV_INTEGRITY */
-#define GD_NEED_PART_SCAN 0 - struct gendisk { /* major, first_minor and minors are input parameters only, * don't use directly. Use disk_devt() and disk_max_parts(). @@ -210,6 +208,8 @@ struct gendisk { void *private_data;
int flags; + unsigned long state; +#define GD_NEED_PART_SCAN 0 struct rw_semaphore lookup_sem; struct kobject *slave_dir;
@@ -226,12 +226,11 @@ struct gendisk { #ifndef __GENKSYMS__ unsigned long *user_ro_bitmap; atomic64_t sync_io_sectors; /* RAID */ - unsigned long state; #else KABI_RESERVE(1) KABI_RESERVE(2) - KABI_RESERVE(3) #endif + KABI_RESERVE(3) KABI_RESERVE(4) };
From: Yu Kuai yukuai3@huawei.com
hulk inclusion category: bugfix bugzilla: 187190, https://gitee.com/src-openeuler/kernel/issues/I5GWOV CVE: NA
--------------------------------
This reverts commit b2f0e44fd4aeba9d03a0e152cd50271bff9119b3.
Because it will introduce following problem in ltp zram tests:
BUG: unable to handle kernel NULL pointer dereference at 0000000000000600 PGD 0 P4D 0 Oops: 0002 [#1] SMP PTI CPU: 28 PID: 172121 Comm: sh Kdump: loaded Tainted: G OE --------- - - 4.18.0+ #2 Hardware name: Huawei RH2288H V3/BC11HGSA0, BIOS 5.15 05/21/2019 RIP: 0010:flush_disk+0x1d/0x50 RSP: 0018:ffffaf14a516fe20 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff899e26bac380 RCX: 0000000000000000 RDX: 0000000000000001 RSI: 0000000000000000 RDI: ffff899e26bac380 RBP: ffff899e26bac380 R08: 00000000000006a9 R09: 0000000000000004 R10: ffff89cd878ff440 R11: 0000000000000001 R12: 0000000000000000 R13: ffff899e26bac398 R14: ffffaf14a516ff00 R15: ffff89cd8709c3e0 FS: 00007f78d6840740(0000) GS:ffff89fcbf480000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000600 CR3: 000000308afc0002 CR4: 00000000001606e0 Call Trace: revalidate_disk+0x57/0x80 reset_store+0xaf/0x120 [zram] kernfs_fop_write+0x10f/0x190 vfs_write+0xad/0x1a0 ksys_write+0x52/0xc0 do_syscall_64+0x5d/0x1d0 entry_SYSCALL_64_after_hwframe+0x65/0xca
This is because "bdev->bd_disk" is not ensured to exist, just convert "set_bit(BDEV_NEED_PART_SCAN, &bdev->bd_flags)" to "set_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state)" is wrong.
The reason to backport it is that commit 2a57456c8973 ("block: Fix warning in bd_link_disk_holder()") has a regression that part scan is disabled in device_add_disk(), and this problem will be fixed in later patch.
Signed-off-by: Yu Kuai yukuai3@huawei.com Reviewed-by: Jason Yan yanaijie@huawei.com Signed-off-by: Yongqiang Liu liuyongqiang13@huawei.com --- block/genhd.c | 2 +- block/partition-generic.c | 6 +++--- drivers/block/nbd.c | 8 ++++---- fs/block_dev.c | 11 ++++++----- include/linux/fs.h | 3 +++ include/linux/genhd.h | 2 -- 6 files changed, 17 insertions(+), 15 deletions(-)
diff --git a/block/genhd.c b/block/genhd.c index 94b0fe34a755..4211722e1c85 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -646,7 +646,7 @@ static void register_disk(struct device *parent, struct gendisk *disk) if (!bdev) goto exit;
- set_bit(GD_NEED_PART_SCAN, &disk->state); + set_bit(BDEV_NEED_PART_SCAN, &bdev->bd_flags); err = blkdev_get(bdev, FMODE_READ, NULL); if (err < 0) goto exit; diff --git a/block/partition-generic.c b/block/partition-generic.c index b8481079c58a..ae3761fed854 100644 --- a/block/partition-generic.c +++ b/block/partition-generic.c @@ -546,7 +546,7 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev) if (disk->fops->revalidate_disk) disk->fops->revalidate_disk(disk); check_disk_size_change(disk, bdev, true); - clear_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state); + clear_bit(BDEV_NEED_PART_SCAN, &bdev->bd_flags); if (!get_capacity(disk) || !(state = check_partition(disk, bdev))) return 0; if (IS_ERR(state)) { @@ -662,7 +662,7 @@ int invalidate_partitions(struct gendisk *disk, struct block_device *bdev) { int res;
- if (!test_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state)) + if (!test_bit(BDEV_NEED_PART_SCAN, &bdev->bd_flags)) return 0;
res = drop_partitions(disk, bdev); @@ -671,7 +671,7 @@ int invalidate_partitions(struct gendisk *disk, struct block_device *bdev)
set_capacity(disk, 0); check_disk_size_change(disk, bdev, false); - clear_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state); + clear_bit(BDEV_NEED_PART_SCAN, &bdev->bd_flags); /* tell userspace that the media / partition table may have changed */ kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE);
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 833525f41005..b0a45edddafd 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -317,7 +317,7 @@ static void nbd_size_update(struct nbd_device *nbd, bool start) if (start) set_blocksize(bdev, config->blksize); } else - set_bit(GD_NEED_PART_SCAN, &nbd->disk->state); + set_bit(BDEV_NEED_PART_SCAN, &bdev->bd_flags); bdput(bdev); } kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE); @@ -1343,7 +1343,7 @@ static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *b return ret;
if (max_part) - set_bit(GD_NEED_PART_SCAN, &nbd->disk->state); + set_bit(BDEV_NEED_PART_SCAN, &bdev->bd_flags); mutex_unlock(&nbd->config_lock); ret = wait_event_interruptible(config->recv_wq, atomic_read(&config->recv_threads) == 0); @@ -1524,9 +1524,9 @@ static int nbd_open(struct block_device *bdev, fmode_t mode) refcount_set(&nbd->config_refs, 1); refcount_inc(&nbd->refs); mutex_unlock(&nbd->config_lock); - set_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state); + set_bit(BDEV_NEED_PART_SCAN, &bdev->bd_flags); } else if (nbd_disconnected(nbd->config)) { - set_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state); + set_bit(BDEV_NEED_PART_SCAN, &bdev->bd_flags); } out: mutex_unlock(&nbd_index_mutex); diff --git a/fs/block_dev.c b/fs/block_dev.c index e255c3b65224..f521b7cf907f 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -922,6 +922,7 @@ struct block_device *bdget(dev_t dev) bdev->bd_inode = inode; bdev->bd_block_size = i_blocksize(inode); bdev->bd_part_count = 0; + bdev->bd_flags = 0; inode->i_mode = S_IFBLK; inode->i_rdev = dev; inode->i_bdev = bdev; @@ -1403,7 +1404,7 @@ static void flush_disk(struct block_device *bdev, bool kill_dirty) "resized disk %s\n", bdev->bd_disk ? bdev->bd_disk->disk_name : ""); } - set_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state); + set_bit(BDEV_NEED_PART_SCAN, &bdev->bd_flags); }
/** @@ -1456,7 +1457,7 @@ int revalidate_disk(struct gendisk *disk)
mutex_lock(&bdev->bd_mutex); check_disk_size_change(disk, bdev, ret == 0); - clear_bit(GD_NEED_PART_SCAN, &disk->state); + clear_bit(BDEV_NEED_PART_SCAN, &bdev->bd_flags); mutex_unlock(&bdev->bd_mutex); bdput(bdev); return ret; @@ -1519,7 +1520,7 @@ static void bdev_disk_changed(struct block_device *bdev, bool invalidate) up_read(&disk->lookup_sem); } else { check_disk_size_change(bdev->bd_disk, bdev, !invalidate); - clear_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state); + clear_bit(BDEV_NEED_PART_SCAN, &bdev->bd_flags); } }
@@ -1604,7 +1605,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) * The latter is necessary to prevent ghost * partitions on a removed medium. */ - if (test_bit(GD_NEED_PART_SCAN, &disk->state) && + if (test_bit(BDEV_NEED_PART_SCAN, &bdev->bd_flags) && (!ret || ret == -ENOMEDIUM)) bdev_disk_changed(bdev, ret == -ENOMEDIUM);
@@ -1641,7 +1642,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) if (bdev->bd_disk->fops->open) ret = bdev->bd_disk->fops->open(bdev, mode); /* the same as first opener case, read comment there */ - if (test_bit(GD_NEED_PART_SCAN, &disk->state) && + if (test_bit(BDEV_NEED_PART_SCAN, &bdev->bd_flags) && (!ret || ret == -ENOMEDIUM)) bdev_disk_changed(bdev, ret == -ENOMEDIUM); if (ret) diff --git a/include/linux/fs.h b/include/linux/fs.h index 025b98fbab05..480936c2d938 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -459,6 +459,8 @@ struct address_space { */ struct request_queue;
+#define BDEV_NEED_PART_SCAN 0 + struct block_device { dev_t bd_dev; /* not a kdev_t - it's a search key */ int bd_openers; @@ -479,6 +481,7 @@ struct block_device { struct hd_struct * bd_part; /* number of times partitions within this device have been opened. */ unsigned bd_part_count; + unsigned long bd_flags; struct gendisk * bd_disk; struct request_queue * bd_queue; struct backing_dev_info *bd_bdi; diff --git a/include/linux/genhd.h b/include/linux/genhd.h index f3fc01f0e84c..58a819484fb4 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -208,8 +208,6 @@ struct gendisk { void *private_data;
int flags; - unsigned long state; -#define GD_NEED_PART_SCAN 0 struct rw_semaphore lookup_sem; struct kobject *slave_dir;
From: Yu Kuai yukuai3@huawei.com
hulk inclusion category: bugfix bugzilla: 187190, https://gitee.com/src-openeuler/kernel/issues/I5GWOV CVE: NA
--------------------------------
This reverts commit b6113052c914f6035a8dbc5a15dcd529eeef0ace.
Signed-off-by: Yu Kuai yukuai3@huawei.com Reviewed-by: Jason Yan yanaijie@huawei.com Signed-off-by: Yongqiang Liu liuyongqiang13@huawei.com --- block/genhd.c | 2 +- block/partition-generic.c | 6 +++--- drivers/block/nbd.c | 8 ++++---- fs/block_dev.c | 12 ++++++------ include/linux/fs.h | 4 +--- 5 files changed, 15 insertions(+), 17 deletions(-)
diff --git a/block/genhd.c b/block/genhd.c index 4211722e1c85..a158298b759f 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -646,7 +646,7 @@ static void register_disk(struct device *parent, struct gendisk *disk) if (!bdev) goto exit;
- set_bit(BDEV_NEED_PART_SCAN, &bdev->bd_flags); + bdev->bd_invalidated = 1; err = blkdev_get(bdev, FMODE_READ, NULL); if (err < 0) goto exit; diff --git a/block/partition-generic.c b/block/partition-generic.c index ae3761fed854..2261566741f4 100644 --- a/block/partition-generic.c +++ b/block/partition-generic.c @@ -546,7 +546,7 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev) if (disk->fops->revalidate_disk) disk->fops->revalidate_disk(disk); check_disk_size_change(disk, bdev, true); - clear_bit(BDEV_NEED_PART_SCAN, &bdev->bd_flags); + bdev->bd_invalidated = 0; if (!get_capacity(disk) || !(state = check_partition(disk, bdev))) return 0; if (IS_ERR(state)) { @@ -662,7 +662,7 @@ int invalidate_partitions(struct gendisk *disk, struct block_device *bdev) { int res;
- if (!test_bit(BDEV_NEED_PART_SCAN, &bdev->bd_flags)) + if (!bdev->bd_invalidated) return 0;
res = drop_partitions(disk, bdev); @@ -671,7 +671,7 @@ int invalidate_partitions(struct gendisk *disk, struct block_device *bdev)
set_capacity(disk, 0); check_disk_size_change(disk, bdev, false); - clear_bit(BDEV_NEED_PART_SCAN, &bdev->bd_flags); + bdev->bd_invalidated = 0; /* tell userspace that the media / partition table may have changed */ kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE);
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index b0a45edddafd..c0a4a8c123b0 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -317,7 +317,7 @@ static void nbd_size_update(struct nbd_device *nbd, bool start) if (start) set_blocksize(bdev, config->blksize); } else - set_bit(BDEV_NEED_PART_SCAN, &bdev->bd_flags); + bdev->bd_invalidated = 1; bdput(bdev); } kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE); @@ -1343,7 +1343,7 @@ static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *b return ret;
if (max_part) - set_bit(BDEV_NEED_PART_SCAN, &bdev->bd_flags); + bdev->bd_invalidated = 1; mutex_unlock(&nbd->config_lock); ret = wait_event_interruptible(config->recv_wq, atomic_read(&config->recv_threads) == 0); @@ -1524,9 +1524,9 @@ static int nbd_open(struct block_device *bdev, fmode_t mode) refcount_set(&nbd->config_refs, 1); refcount_inc(&nbd->refs); mutex_unlock(&nbd->config_lock); - set_bit(BDEV_NEED_PART_SCAN, &bdev->bd_flags); + bdev->bd_invalidated = 1; } else if (nbd_disconnected(nbd->config)) { - set_bit(BDEV_NEED_PART_SCAN, &bdev->bd_flags); + bdev->bd_invalidated = 1; } out: mutex_unlock(&nbd_index_mutex); diff --git a/fs/block_dev.c b/fs/block_dev.c index f521b7cf907f..9868b21b8ef9 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -922,7 +922,7 @@ struct block_device *bdget(dev_t dev) bdev->bd_inode = inode; bdev->bd_block_size = i_blocksize(inode); bdev->bd_part_count = 0; - bdev->bd_flags = 0; + bdev->bd_invalidated = 0; inode->i_mode = S_IFBLK; inode->i_rdev = dev; inode->i_bdev = bdev; @@ -1404,7 +1404,7 @@ static void flush_disk(struct block_device *bdev, bool kill_dirty) "resized disk %s\n", bdev->bd_disk ? bdev->bd_disk->disk_name : ""); } - set_bit(BDEV_NEED_PART_SCAN, &bdev->bd_flags); + bdev->bd_invalidated = 1; }
/** @@ -1457,7 +1457,7 @@ int revalidate_disk(struct gendisk *disk)
mutex_lock(&bdev->bd_mutex); check_disk_size_change(disk, bdev, ret == 0); - clear_bit(BDEV_NEED_PART_SCAN, &bdev->bd_flags); + bdev->bd_invalidated = 0; mutex_unlock(&bdev->bd_mutex); bdput(bdev); return ret; @@ -1520,7 +1520,7 @@ static void bdev_disk_changed(struct block_device *bdev, bool invalidate) up_read(&disk->lookup_sem); } else { check_disk_size_change(bdev->bd_disk, bdev, !invalidate); - clear_bit(BDEV_NEED_PART_SCAN, &bdev->bd_flags); + bdev->bd_invalidated = 0; } }
@@ -1605,7 +1605,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) * The latter is necessary to prevent ghost * partitions on a removed medium. */ - if (test_bit(BDEV_NEED_PART_SCAN, &bdev->bd_flags) && + if (bdev->bd_invalidated && (!ret || ret == -ENOMEDIUM)) bdev_disk_changed(bdev, ret == -ENOMEDIUM);
@@ -1642,7 +1642,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) if (bdev->bd_disk->fops->open) ret = bdev->bd_disk->fops->open(bdev, mode); /* the same as first opener case, read comment there */ - if (test_bit(BDEV_NEED_PART_SCAN, &bdev->bd_flags) && + if (bdev->bd_invalidated && (!ret || ret == -ENOMEDIUM)) bdev_disk_changed(bdev, ret == -ENOMEDIUM); if (ret) diff --git a/include/linux/fs.h b/include/linux/fs.h index 480936c2d938..bcd2131ca06c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -459,8 +459,6 @@ struct address_space { */ struct request_queue;
-#define BDEV_NEED_PART_SCAN 0 - struct block_device { dev_t bd_dev; /* not a kdev_t - it's a search key */ int bd_openers; @@ -481,7 +479,7 @@ struct block_device { struct hd_struct * bd_part; /* number of times partitions within this device have been opened. */ unsigned bd_part_count; - unsigned long bd_flags; + int bd_invalidated; struct gendisk * bd_disk; struct request_queue * bd_queue; struct backing_dev_info *bd_bdi;
From: Yu Kuai yukuai3@huawei.com
hulk inclusion category: bugfix bugzilla: 187190, https://gitee.com/src-openeuler/kernel/issues/I5GWOV CVE: NA
--------------------------------
Commit f20a726bedfc ("block: Fix warning in bd_link_disk_holder()") moves the setting of flag 'GENHD_FL_UP' behind blkdev_get, which will disabled part scan:
devcie_add_disk register_disk blkdev_get __blkdev_get bdev_get_gendisk get_gendisk -> failed because 'GENHD_FL_UP' is not set
And this will cause tests block/017, block/018 and scsi/004 to fail.
Fix the problem by moving part scan as well.
Fixes: f20a726bedfc ("block: Fix warning in bd_link_disk_holder()") Signed-off-by: Yu Kuai yukuai3@huawei.com Reviewed-by: Jason Yan yanaijie@huawei.com Signed-off-by: Yongqiang Liu liuyongqiang13@huawei.com --- block/blk-sysfs.c | 37 +++++++++++++++++++++++++++++++++++++ block/genhd.c | 32 -------------------------------- 2 files changed, 37 insertions(+), 32 deletions(-)
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 50b61a92b08d..90fab2253367 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -884,6 +884,42 @@ struct kobj_type blk_queue_ktype = { .release = blk_release_queue, };
+static void disk_init_partition(struct gendisk *disk) +{ + struct device *ddev = disk_to_dev(disk); + struct block_device *bdev; + struct disk_part_iter piter; + struct hd_struct *part; + + /* No minors to use for partitions */ + if (!disk_part_scan_enabled(disk)) + goto exit; + + /* No such device (e.g., media were just removed) */ + if (!get_capacity(disk)) + goto exit; + + bdev = bdget_disk(disk, 0); + if (!bdev) + goto exit; + + bdev->bd_invalidated = 1; + if (blkdev_get(bdev, FMODE_READ, NULL)) + goto exit; + blkdev_put(bdev, FMODE_READ); + +exit: + /* announce disk after possible partitions are created */ + dev_set_uevent_suppress(ddev, 0); + kobject_uevent(&ddev->kobj, KOBJ_ADD); + + /* announce possible partitions */ + disk_part_iter_init(&piter, disk, 0); + while ((part = disk_part_iter_next(&piter))) + kobject_uevent(&part_to_dev(part)->kobj, KOBJ_ADD); + disk_part_iter_exit(&piter); +} + /** * blk_register_queue - register a block layer queue with sysfs * @disk: Disk of which the request queue should be registered with sysfs. @@ -944,6 +980,7 @@ int blk_register_queue(struct gendisk *disk) * before it's registration is done. */ disk->flags |= GENHD_FL_UP; + disk_init_partition(disk); ret = 0; unlock: mutex_unlock(&q->sysfs_lock); diff --git a/block/genhd.c b/block/genhd.c index a158298b759f..124f8d94584c 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -598,9 +598,6 @@ static int exact_lock(dev_t devt, void *data) static void register_disk(struct device *parent, struct gendisk *disk) { struct device *ddev = disk_to_dev(disk); - struct block_device *bdev; - struct disk_part_iter piter; - struct hd_struct *part; int err;
ddev->parent = parent; @@ -634,35 +631,6 @@ static void register_disk(struct device *parent, struct gendisk *disk) if (disk->flags & GENHD_FL_HIDDEN) return;
- /* No minors to use for partitions */ - if (!disk_part_scan_enabled(disk)) - goto exit; - - /* No such device (e.g., media were just removed) */ - if (!get_capacity(disk)) - goto exit; - - bdev = bdget_disk(disk, 0); - if (!bdev) - goto exit; - - bdev->bd_invalidated = 1; - err = blkdev_get(bdev, FMODE_READ, NULL); - if (err < 0) - goto exit; - blkdev_put(bdev, FMODE_READ); - -exit: - /* announce disk after possible partitions are created */ - dev_set_uevent_suppress(ddev, 0); - kobject_uevent(&ddev->kobj, KOBJ_ADD); - - /* announce possible partitions */ - disk_part_iter_init(&piter, disk, 0); - while ((part = disk_part_iter_next(&piter))) - kobject_uevent(&part_to_dev(part)->kobj, KOBJ_ADD); - disk_part_iter_exit(&piter); - if (disk->queue->backing_dev_info->dev) { err = sysfs_create_link(&ddev->kobj, &disk->queue->backing_dev_info->dev->kobj,