Jan Kara (1): init: Initialize noop_backing_dev_info early
Jchao Sun (1): writeback: Fix inode->i_io_list not be protected by inode->i_lock error
drivers/base/init.c | 2 ++ fs/fs-writeback.c | 37 ++++++++++++++++++++++++++++--------- fs/inode.c | 2 +- include/linux/backing-dev.h | 2 ++ mm/backing-dev.c | 11 ++--------- 5 files changed, 35 insertions(+), 19 deletions(-)
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/12614 邮件列表地址:https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/H...
FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/12614 Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/H...
From: Jchao Sun sunjunchao2870@gmail.com
mainline inclusion from mainline-v5.19-rc2 commit 10e14073107dd0b6d97d9516a02845a8e501c2c9 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IAZJPL
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
Commit b35250c0816c ("writeback: Protect inode->i_io_list with inode->i_lock") made inode->i_io_list not only protected by wb->list_lock but also inode->i_lock, but inode_io_list_move_locked() was missed. Add lock there and also update comment describing things protected by inode->i_lock. This also fixes a race where __mark_inode_dirty() could move inode under flush worker's hands and thus sync(2) could miss writing some inodes.
Fixes: b35250c0816c ("writeback: Protect inode->i_io_list with inode->i_lock") Link: https://lore.kernel.org/r/20220524150540.12552-1-sunjunchao2870@gmail.com CC: stable@vger.kernel.org Signed-off-by: Jchao Sun sunjunchao2870@gmail.com Signed-off-by: Jan Kara jack@suse.cz
Conflicts: fs/fs-writeback.c [Context conflict caused by commit 35d14f278e53 ("fs: clean up __mark_inode_dirty() a bit") not being backported.] Signed-off-by: Baokun Li libaokun1@huawei.com --- fs/fs-writeback.c | 37 ++++++++++++++++++++++++++++--------- fs/inode.c | 2 +- 2 files changed, 29 insertions(+), 10 deletions(-)
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index dd53d00a2583..d34054fcb702 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -136,6 +136,7 @@ static bool inode_io_list_move_locked(struct inode *inode, struct list_head *head) { assert_spin_locked(&wb->list_lock); + assert_spin_locked(&inode->i_lock);
list_move(&inode->i_io_list, head);
@@ -1178,9 +1179,9 @@ static int move_expired_inodes(struct list_head *delaying_queue, inode = wb_inode(delaying_queue->prev); if (inode_dirtied_after(inode, dirtied_before)) break; + spin_lock(&inode->i_lock); list_move(&inode->i_io_list, &tmp); moved++; - spin_lock(&inode->i_lock); inode->i_state |= I_SYNC_QUEUED; spin_unlock(&inode->i_lock); if (sb_is_blkdev_sb(inode->i_sb)) @@ -1196,7 +1197,12 @@ static int move_expired_inodes(struct list_head *delaying_queue, goto out; }
- /* Move inodes from one superblock together */ + /* + * Although inode's i_io_list is moved from 'tmp' to 'dispatch_queue', + * we don't take inode->i_lock here because it is just a pointless overhead. + * Inode is already marked as I_SYNC_QUEUED so writeback list handling is + * fully under our control. + */ while (!list_empty(&tmp)) { sb = wb_inode(tmp.prev)->i_sb; list_for_each_prev_safe(pos, node, &tmp) { @@ -1618,8 +1624,8 @@ static long writeback_sb_inodes(struct super_block *sb, * We'll have another go at writing back this inode * when we completed a full scan of b_io. */ - spin_unlock(&inode->i_lock); requeue_io(inode, wb); + spin_unlock(&inode->i_lock); trace_writeback_sb_inodes_requeue(inode); continue; } @@ -2158,6 +2164,7 @@ void __mark_inode_dirty(struct inode *inode, int flags) { struct super_block *sb = inode->i_sb; int dirtytime; + struct bdi_writeback *wb = NULL;
trace_writeback_mark_inode_dirty(inode, flags);
@@ -2199,6 +2206,17 @@ void __mark_inode_dirty(struct inode *inode, int flags) inode->i_state &= ~I_DIRTY_TIME; inode->i_state |= flags;
+ /* + * Grab inode's wb early because it requires dropping i_lock and we + * need to make sure following checks happen atomically with dirty + * list handling so that we don't move inodes under flush worker's + * hands. + */ + if (!was_dirty) { + wb = locked_inode_to_wb_and_lock_list(inode); + spin_lock(&inode->i_lock); + } + /* * If the inode is queued for writeback by flush worker, just * update its dirty state. Once the flush worker is done with @@ -2206,7 +2224,7 @@ void __mark_inode_dirty(struct inode *inode, int flags) * list, based upon its state. */ if (inode->i_state & I_SYNC_QUEUED) - goto out_unlock_inode; + goto out_unlock;
/* * Only add valid (hashed) inodes to the superblock's @@ -2214,22 +2232,19 @@ void __mark_inode_dirty(struct inode *inode, int flags) */ if (!S_ISBLK(inode->i_mode)) { if (inode_unhashed(inode)) - goto out_unlock_inode; + goto out_unlock; } if (inode->i_state & I_FREEING) - goto out_unlock_inode; + goto out_unlock;
/* * If the inode was already on b_dirty/b_io/b_more_io, don't * reposition it (that would break b_dirty time-ordering). */ if (!was_dirty) { - struct bdi_writeback *wb; struct list_head *dirty_list; bool wakeup_bdi = false;
- wb = locked_inode_to_wb_and_lock_list(inode); - inode->dirtied_when = jiffies; if (dirtytime) inode->dirtied_time_when = jiffies; @@ -2243,6 +2258,7 @@ void __mark_inode_dirty(struct inode *inode, int flags) dirty_list);
spin_unlock(&wb->list_lock); + spin_unlock(&inode->i_lock); trace_writeback_dirty_inode_enqueue(inode);
/* @@ -2256,6 +2272,9 @@ void __mark_inode_dirty(struct inode *inode, int flags) return; } } +out_unlock: + if (wb) + spin_unlock(&wb->list_lock); out_unlock_inode: spin_unlock(&inode->i_lock); } diff --git a/fs/inode.c b/fs/inode.c index 5df2e8ee23ed..ae9d7b84caf3 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -26,7 +26,7 @@ * Inode locking rules: * * inode->i_lock protects: - * inode->i_state, inode->i_hash, __iget() + * inode->i_state, inode->i_hash, __iget(), inode->i_io_list * Inode LRU list locks protect: * inode->i_sb->s_inode_lru, inode->i_lru * inode->i_sb->s_inode_list_lock protects:
From: Jan Kara jack@suse.cz
mainline inclusion from mainline-v5.19-rc3 commit 4bca7e80b6455772b4bf3f536dcbc19aac424d6a category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IAZJPL
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
noop_backing_dev_info is used by superblocks of various pseudofilesystems such as kdevtmpfs. After commit 10e14073107d ("writeback: Fix inode->i_io_list not be protected by inode->i_lock error") this broke because __mark_inode_dirty() started to access more fields from noop_backing_dev_info and this led to crashes inside locked_inode_to_wb_and_lock_list() called from __mark_inode_dirty(). Fix the problem by initializing noop_backing_dev_info before the filesystems get mounted.
Fixes: 10e14073107d ("writeback: Fix inode->i_io_list not be protected by inode->i_lock error") Reported-and-tested-by: Suzuki K Poulose suzuki.poulose@arm.com Reported-and-tested-by: Alexandru Elisei alexandru.elisei@arm.com Reported-and-tested-by: Guenter Roeck linux@roeck-us.net Reviewed-by: Christoph Hellwig hch@lst.de Signed-off-by: Jan Kara jack@suse.cz Signed-off-by: Baokun Li libaokun1@huawei.com --- drivers/base/init.c | 2 ++ include/linux/backing-dev.h | 2 ++ mm/backing-dev.c | 11 ++--------- 3 files changed, 6 insertions(+), 9 deletions(-)
diff --git a/drivers/base/init.c b/drivers/base/init.c index 908e6520e804..e086e0e441c7 100644 --- a/drivers/base/init.c +++ b/drivers/base/init.c @@ -8,6 +8,7 @@ #include <linux/init.h> #include <linux/memory.h> #include <linux/of.h> +#include <linux/backing-dev.h>
#include "base.h"
@@ -20,6 +21,7 @@ void __init driver_init(void) { /* These are the core pieces */ + bdi_init(&noop_backing_dev_info); devtmpfs_init(); devices_init(); buses_init(); diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index ec3ca0b197dd..c67b9f6a6b1b 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -142,6 +142,8 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio);
extern struct backing_dev_info noop_backing_dev_info;
+int bdi_init(struct backing_dev_info *bdi); + /** * writeback_in_progress - determine whether there is writeback in progress * @wb: bdi_writeback of interest diff --git a/mm/backing-dev.c b/mm/backing-dev.c index c5caa7e91935..a395eba85670 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -243,20 +243,13 @@ static __init int bdi_class_init(void) } postcore_initcall(bdi_class_init);
-static int bdi_init(struct backing_dev_info *bdi); - static int __init default_bdi_init(void) { - int err; - bdi_wq = alloc_workqueue("writeback", WQ_MEM_RECLAIM | WQ_UNBOUND | WQ_SYSFS, 0); if (!bdi_wq) return -ENOMEM; - - err = bdi_init(&noop_backing_dev_info); - - return err; + return 0; } subsys_initcall(default_bdi_init);
@@ -844,7 +837,7 @@ static void cgwb_remove_from_bdi_list(struct bdi_writeback *wb)
#endif /* CONFIG_CGROUP_WRITEBACK */
-static int bdi_init(struct backing_dev_info *bdi) +int bdi_init(struct backing_dev_info *bdi) { int ret;