From: Jan Kara jack@suse.cz
mainline inclusion from mainline-v5.11-rc1 commit c92dc856848f32781e37b88c1b7f875e274f5efb category: bugfix bugzilla: 46758 CVE: NA
-----------------------------------------------
When filesystem inconsistency is detected with group locked, we currently try to modify superblock to store error there without blocking. However this can cause superblock checksum failures (or DIF/DIX failure) when the superblock is just being written out.
Make error handling code just store error information in ext4_sb_info structure and copy it to on-disk superblock only in ext4_commit_super(). In case of error happening with group locked, we just postpone the superblock flushing to a workqueue.
[ Added fixup so that s_first_error_* does not get updated after the file system is remounted. Also added fix for syzbot failure. - Ted ]
Signed-off-by: Jan Kara jack@suse.cz Link: https://lore.kernel.org/r/20201127113405.26867-8-jack@suse.cz Signed-off-by: Theodore Ts'o tytso@mit.edu Cc: Hillf Danton hdanton@sina.com Reported-by: syzbot+9043030c040ce1849a60@syzkaller.appspotmail.com
conflicts: fs/ext4/ext4.h fs/ext4/super.c
Signed-off-by: Ye Bin yebin10@huawei.com Reviewed-by: zhangyi (F) yi.zhang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- fs/ext4/ext4.h | 21 +++++++++ fs/ext4/super.c | 120 +++++++++++++++++++++++++++++++++--------------- 2 files changed, 104 insertions(+), 37 deletions(-)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 73cd56bfd9307..2a63c69bd19e2 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1545,6 +1545,27 @@ struct ext4_sb_info { /* Record the errseq of the backing block device */ errseq_t s_bdev_wb_err; spinlock_t s_bdev_wb_lock; + + /* Information about errors that happened during this mount */ + spinlock_t s_error_lock; + int s_add_error_count; + int s_first_error_code; + __u32 s_first_error_line; + __u32 s_first_error_ino; + __u64 s_first_error_block; + const char *s_first_error_func; + time64_t s_first_error_time; + int s_last_error_code; + __u32 s_last_error_line; + __u32 s_last_error_ino; + __u64 s_last_error_block; + const char *s_last_error_func; + time64_t s_last_error_time; + /* + * If we are in a context where we cannot update error information in + * the on-disk superblock, we queue this work to do it. + */ + struct work_struct s_error_work; };
static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index d6e604ca9f0e5..9ae5a84c80c91 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -344,10 +344,8 @@ void ext4_itable_unused_set(struct super_block *sb, bg->bg_itable_unused_hi = cpu_to_le16(count >> 16); }
-static void __ext4_update_tstamp(__le32 *lo, __u8 *hi) +static void __ext4_update_tstamp(__le32 *lo, __u8 *hi, time64_t now) { - time64_t now = ktime_get_real_seconds(); - now = clamp_val(now, 0, (1ull << 40) - 1);
*lo = cpu_to_le32(lower_32_bits(now)); @@ -359,7 +357,8 @@ static time64_t __ext4_get_tstamp(__le32 *lo, __u8 *hi) return ((time64_t)(*hi) << 32) + le32_to_cpu(*lo); } #define ext4_update_tstamp(es, tstamp) \ - __ext4_update_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi) + __ext4_update_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi, \ + ktime_get_real_seconds()) #define ext4_get_tstamp(es, tstamp) \ __ext4_get_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi)
@@ -489,7 +488,7 @@ static void __save_error_info(struct super_block *sb, int error, __u32 ino, __u64 block, const char *func, unsigned int line) { - struct ext4_super_block *es = EXT4_SB(sb)->s_es; + struct ext4_sb_info *sbi = EXT4_SB(sb);
EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; if (bdev_read_only(sb->s_bdev)) @@ -497,30 +496,24 @@ static void __save_error_info(struct super_block *sb, int error, /* We default to EFSCORRUPTED error... */ if (error == 0) error = EFSCORRUPTED; - es->s_state |= cpu_to_le16(EXT4_ERROR_FS); - ext4_update_tstamp(es, s_last_error_time); - strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func)); - es->s_last_error_line = cpu_to_le32(line); - es->s_last_error_ino = cpu_to_le32(ino); - es->s_last_error_block = cpu_to_le64(block); - es->s_last_error_errcode = ext4_errno_to_code(error); - if (!es->s_first_error_time) { - es->s_first_error_time = es->s_last_error_time; - es->s_first_error_time_hi = es->s_last_error_time_hi; - strncpy(es->s_first_error_func, func, - sizeof(es->s_first_error_func)); - es->s_first_error_line = cpu_to_le32(line); - es->s_first_error_ino = es->s_last_error_ino; - es->s_first_error_block = es->s_last_error_block; - es->s_first_error_errcode = es->s_last_error_errcode; - } - /* - * Start the daily error reporting function if it hasn't been - * started already - */ - if (!es->s_error_count) - mod_timer(&EXT4_SB(sb)->s_err_report, jiffies + 24*60*60*HZ); - le32_add_cpu(&es->s_error_count, 1); + + spin_lock(&sbi->s_error_lock); + sbi->s_add_error_count++; + sbi->s_last_error_code = error; + sbi->s_last_error_line = line; + sbi->s_last_error_ino = ino; + sbi->s_last_error_block = block; + sbi->s_last_error_func = func; + sbi->s_last_error_time = ktime_get_real_seconds(); + if (!sbi->s_first_error_time) { + sbi->s_first_error_code = error; + sbi->s_first_error_line = line; + sbi->s_first_error_ino = ino; + sbi->s_first_error_block = block; + sbi->s_first_error_func = func; + sbi->s_first_error_time = sbi->s_last_error_time; + } + spin_unlock(&sbi->s_error_lock); }
static void save_error_info(struct super_block *sb, int error, @@ -590,6 +583,14 @@ static void ext4_handle_error(struct super_block *sb, bool force_ro) ext4_netlink_send_info(sb, 1); }
+static void flush_stashed_error_work(struct work_struct *work) +{ + struct ext4_sb_info *sbi = container_of(work, struct ext4_sb_info, + s_error_work); + + ext4_commit_super(sbi->s_sb, 1); +} + #define ext4_error_ratelimit(sb) \ ___ratelimit(&(EXT4_SB(sb)->s_err_ratelimit_state), \ "EXT4-fs error") @@ -828,8 +829,6 @@ __acquires(bitlock) return;
trace_ext4_error(sb, function, line); - __save_error_info(sb, EFSCORRUPTED, ino, block, function, line); - if (ext4_error_ratelimit(sb)) { va_start(args, fmt); vaf.fmt = fmt; @@ -845,16 +844,15 @@ __acquires(bitlock) va_end(args); }
- if (test_opt(sb, WARN_ON_ERROR)) - WARN_ON_ONCE(1); - if (test_opt(sb, ERRORS_CONT)) { - ext4_commit_super(sb, 0); + if (test_opt(sb, WARN_ON_ERROR)) + WARN_ON_ONCE(1); + __save_error_info(sb, EFSCORRUPTED, ino, block, function, line); + schedule_work(&EXT4_SB(sb)->s_error_work); return; } - ext4_unlock_group(sb, grp); - ext4_commit_super(sb, 1); + save_error_info(sb, EFSCORRUPTED, ino, block, function, line); ext4_handle_error(sb, false); /* * We only get here in the ERRORS_RO case; relocking the group @@ -1027,6 +1025,7 @@ static void ext4_put_super(struct super_block *sb) ext4_unregister_li_request(sb); ext4_quota_off_umount(sb);
+ flush_work(&sbi->s_error_work); destroy_workqueue(sbi->rsv_conversion_wq);
if (sbi->s_journal) { @@ -4303,6 +4302,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) }
timer_setup(&sbi->s_err_report, print_daily_error_info, 0); + spin_lock_init(&sbi->s_error_lock); + INIT_WORK(&sbi->s_error_work, flush_stashed_error_work);
/* Register extent status tree shrinker */ if (ext4_es_register_shrinker(sbi)) @@ -4702,6 +4703,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) ext4_es_unregister_shrinker(sbi); failed_mount3: del_timer_sync(&sbi->s_err_report); + flush_work(&sbi->s_error_work); if (sbi->s_mmp_tsk) kthread_stop(sbi->s_mmp_tsk); failed_mount2: @@ -5023,6 +5025,7 @@ static int ext4_load_journal(struct super_block *sb,
static int ext4_commit_super(struct super_block *sb, int sync) { + struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = EXT4_SB(sb)->s_es; struct buffer_head *sbh = EXT4_SB(sb)->s_sbh; int error = 0; @@ -5059,6 +5062,46 @@ static int ext4_commit_super(struct super_block *sb, int sync) es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive( &EXT4_SB(sb)->s_freeinodes_counter)); + /* Copy error information to the on-disk superblock */ + spin_lock(&sbi->s_error_lock); + if (sbi->s_add_error_count > 0) { + es->s_state |= cpu_to_le16(EXT4_ERROR_FS); + if (!es->s_first_error_time && !es->s_first_error_time_hi) { + __ext4_update_tstamp(&es->s_first_error_time, + &es->s_first_error_time_hi, + sbi->s_first_error_time); + strncpy(es->s_first_error_func, sbi->s_first_error_func, + sizeof(es->s_first_error_func)); + es->s_first_error_line = + cpu_to_le32(sbi->s_first_error_line); + es->s_first_error_ino = + cpu_to_le32(sbi->s_first_error_ino); + es->s_first_error_block = + cpu_to_le64(sbi->s_first_error_block); + es->s_first_error_errcode = + ext4_errno_to_code(sbi->s_first_error_code); + } + __ext4_update_tstamp(&es->s_last_error_time, + &es->s_last_error_time_hi, + sbi->s_last_error_time); + strncpy(es->s_last_error_func, sbi->s_last_error_func, + sizeof(es->s_last_error_func)); + es->s_last_error_line = cpu_to_le32(sbi->s_last_error_line); + es->s_last_error_ino = cpu_to_le32(sbi->s_last_error_ino); + es->s_last_error_block = cpu_to_le64(sbi->s_last_error_block); + es->s_last_error_errcode = + ext4_errno_to_code(sbi->s_last_error_code); + /* + * Start the daily error reporting function if it hasn't been + * started already + */ + if (!es->s_error_count) + mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ); + le32_add_cpu(&es->s_error_count, sbi->s_add_error_count); + sbi->s_add_error_count = 0; + } + spin_unlock(&sbi->s_error_lock); + BUFFER_TRACE(sbh, "marking dirty"); ext4_superblock_csum_set(sb); if (sync) @@ -5412,6 +5455,9 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); }
+ /* Flush outstanding errors before changing fs state */ + flush_work(&sbi->s_error_work); + if ((bool)(*flags & SB_RDONLY) != sb_rdonly(sb)) { if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) { err = -EROFS;