hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I7CBCS
--------------------------------
Folllowing steps could make ext4_wripages trap into a dead loop:
1. Consume free_clusters until free_clusters > 2 * sbi->s_resv_clusters, and free_clusters > EXT4_FREECLUSTERS_WATERMARK. // eg. free_clusters = 1422, sbi->s_resv_clusters = 512 // nr_cpus = 4, EXT4_FREECLUSTERS_WATERMARK = 512 2. umount && mount. // dirty_clusters = 0 3. Run free_clusters tasks concurrently to write different files, many tasks write(appendant) 4K data by da_write method. And each inode will consume one data block and one extent block in map_block. // There are (free_clusters - EXT4_FREECLUSTERS_WATERMARK = 910) // tasks choosing da_write method, left 512 tasks choose write_begin // method. If tasks which chooses da_write path run first. // dirty_clusters = 910, free_clusters = 1422 // Tasks which choose write_begin path will get ENOSPC: // free_clusters < (nclusters + dirty_clusters + resv_clusters) // 1422 < (1 + 910 + 512) 4. After certain number of map_block iterations in ext4_writepages. // free_clusters = 0, // dirty_clusters = 910 - (1422 / 2) = 199 5. Delete one 4K file. // free_clusters = 1 6. ext4_writepages traps into dead loop: mpage_map_and_submit_extent mpage_map_one_extent // ret = ENOSPC ext4_map_blocks -> ext4_ext_map_blocks -> ext4_mb_new_blocks -> ext4_claim_free_clusters: if (free_clusters >= (nclusters + dirty_clusters)) // false if (err == -ENOSPC && ext4_count_free_clusters(sb)) // true return err *give_up_on_write = true // won't be executed
Fix it by terminating ext4_writepages if no free blocks generated.
Signed-off-by: Zhihao Cheng chengzhihao1@huawei.com --- fs/ext4/ext4.h | 1 + fs/ext4/extents.c | 4 +++- fs/ext4/inode.c | 2 +- fs/ext4/mballoc.c | 5 +++-- 4 files changed, 8 insertions(+), 4 deletions(-)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 5d5ae6f44510..48d02bea4984 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -700,6 +700,7 @@ enum { #define EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER 0x0010 #define EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER 0x0020 #define EXT4_FREE_BLOCKS_RERESERVE_CLUSTER 0x0040 +#define EXT4_FREE_BLOCKS_DONT_WAIT_JOURNAL 0x0080
/* * ioctl commands diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index e42a78170109..b1eb4ea2c68a 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -1294,7 +1294,8 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, if (!ablocks[i]) continue; ext4_free_blocks(handle, inode, NULL, ablocks[i], 1, - EXT4_FREE_BLOCKS_METADATA); + EXT4_FREE_BLOCKS_METADATA | + EXT4_FREE_BLOCKS_DONT_WAIT_JOURNAL); } } kfree(ablocks); @@ -4329,6 +4330,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, ext4_discard_preallocations(inode, 0); if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) fb_flags = EXT4_FREE_BLOCKS_NO_QUOT_UPDATE; + fb_flags |= EXT4_FREE_BLOCKS_DONT_WAIT_JOURNAL; ext4_free_blocks(handle, inode, NULL, newblock, EXT4_C2B(sbi, allocated_clusters), fb_flags); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 44018d1da27e..ebb794fed3a7 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2479,7 +2479,7 @@ static int mpage_map_and_submit_extent(handle_t *handle, * is non-zero, a commit should free up blocks. */ if ((err == -ENOMEM) || - (err == -ENOSPC && ext4_count_free_clusters(sb))) { + (err == -ENOSPC && EXT4_SB(sb)->s_mb_free_pending)) { if (progress) goto update_disksize; return err; diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index fd8464aadf00..3870dff5f2e7 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -5498,8 +5498,9 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, * consistency guarantees. */ if (ext4_handle_valid(handle) && - ((flags & EXT4_FREE_BLOCKS_METADATA) || - !ext4_should_writeback_data(inode))) { + (((flags & EXT4_FREE_BLOCKS_METADATA) || + !ext4_should_writeback_data(inode)) && + !(flags & EXT4_FREE_BLOCKS_DONT_WAIT_JOURNAL))) { struct ext4_free_data *new_entry; /* * We use __GFP_NOFAIL because ext4_free_blocks() is not allowed