
mainline inclusion from mainline-v6.17-rc1 commit 7d345aa1fac4c2ec9584fbd6f389f2c2368671d5 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/ICN2MB Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i... -------------------------------- The grp->bb_largest_free_order is updated regardless of whether mb_optimize_scan is enabled. This can lead to inconsistencies between grp->bb_largest_free_order and the actual s_mb_largest_free_orders list index when mb_optimize_scan is repeatedly enabled and disabled via remount. For example, if mb_optimize_scan is initially enabled, largest free order is 3, and the group is in s_mb_largest_free_orders[3]. Then, mb_optimize_scan is disabled via remount, block allocations occur, updating largest free order to 2. Finally, mb_optimize_scan is re-enabled via remount, more block allocations update largest free order to 1. At this point, the group would be removed from s_mb_largest_free_orders[3] under the protection of s_mb_largest_free_orders_locks[2]. This lock mismatch can lead to list corruption. To fix this, whenever grp->bb_largest_free_order changes, we now always attempt to remove the group from its old order list. However, we only insert the group into the new order list if `mb_optimize_scan` is enabled. This approach helps prevent lock inconsistencies and ensures the data in the order lists remains reliable. Fixes: 196e402adf2e ("ext4: improve cr 0 / cr 1 group scanning") CC: stable@vger.kernel.org Suggested-by: Jan Kara <jack@suse.cz> Signed-off-by: Baokun Li <libaokun1@huawei.com> Reviewed-by: Zhang Yi <yi.zhang@huawei.com> Link: https://patch.msgid.link/20250714130327.1830534-12-libaokun1@huawei.com Signed-off-by: Theodore Ts'o <tytso@mit.edu> Signed-off-by: Baokun Li <libaokun1@huawei.com> --- fs/ext4/mballoc.c | 33 ++++++++++++++------------------- 1 file changed, 14 insertions(+), 19 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 1075a658c6c5..dcad2d88df6b 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -1155,33 +1155,28 @@ static void mb_set_largest_free_order(struct super_block *sb, struct ext4_group_info *grp) { struct ext4_sb_info *sbi = EXT4_SB(sb); - int i; + int new, old = grp->bb_largest_free_order; - for (i = MB_NUM_ORDERS(sb) - 1; i >= 0; i--) - if (grp->bb_counters[i] > 0) + for (new = MB_NUM_ORDERS(sb) - 1; new >= 0; new--) + if (grp->bb_counters[new] > 0) break; + /* No need to move between order lists? */ - if (!test_opt2(sb, MB_OPTIMIZE_SCAN) || - i == grp->bb_largest_free_order) { - grp->bb_largest_free_order = i; + if (new == old) return; - } - if (grp->bb_largest_free_order >= 0) { - write_lock(&sbi->s_mb_largest_free_orders_locks[ - grp->bb_largest_free_order]); + if (old >= 0 && !list_empty(&grp->bb_largest_free_order_node)) { + write_lock(&sbi->s_mb_largest_free_orders_locks[old]); list_del_init(&grp->bb_largest_free_order_node); - write_unlock(&sbi->s_mb_largest_free_orders_locks[ - grp->bb_largest_free_order]); + write_unlock(&sbi->s_mb_largest_free_orders_locks[old]); } - grp->bb_largest_free_order = i; - if (grp->bb_largest_free_order >= 0 && grp->bb_free) { - write_lock(&sbi->s_mb_largest_free_orders_locks[ - grp->bb_largest_free_order]); + + grp->bb_largest_free_order = new; + if (test_opt2(sb, MB_OPTIMIZE_SCAN) && new >= 0 && grp->bb_free) { + write_lock(&sbi->s_mb_largest_free_orders_locks[new]); list_add_tail(&grp->bb_largest_free_order_node, - &sbi->s_mb_largest_free_orders[grp->bb_largest_free_order]); - write_unlock(&sbi->s_mb_largest_free_orders_locks[ - grp->bb_largest_free_order]); + &sbi->s_mb_largest_free_orders[new]); + write_unlock(&sbi->s_mb_largest_free_orders_locks[new]); } } -- 2.46.1