From: Dan Carpenter <dan.carpenter(a)oracle.com>
mainline inclusion
from mainline-v5.13-rc1
commit 3e9bf43f7f7a46f21ec071cb47be92d0874c48da
category: bugfix
bugzilla: https://gitee.com/src-openeuler/kernel/issues/I94JKH
CVE: CVE-2021-47049
---------------------------
The "open_info" variable is added to the &vmbus_connection.chn_msg_list,
but the error handling frees "open_info" without removing it from the
list. This will result in a use after free. First remove it from the
list, and then free it.
Fixes: 6f3d791f3006 ("Drivers: hv: vmbus: Fix rescind handling issues")
Signed-off-by: Dan Carpenter <dan.carpenter(a)oracle.com>
Reviewed-by: Andrea Parri <parri.andrea(a)gmail.com>
Link: https://lore.kernel.org/r/YHV3XLCot6xBS44r@mwanda
Signed-off-by: Wei Liu <wei.liu(a)kernel.org>
Conflicts:
drivers/hv/channel.c
Signed-off-by: Zhang Xiaoxu <zhangxiaoxu5(a)huawei.com>
---
drivers/hv/channel.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index 5e515533e9cd..756d2fff50dc 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -194,7 +194,7 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size,
if (newchannel->rescind) {
err = -ENODEV;
- goto error_free_gpadl;
+ goto error_clean_msglist;
}
ret = vmbus_post_msg(open_msg,
--
2.34.1
From: Linus Torvalds <torvalds(a)linuxfoundation.org>
stable inclusion
from stable-v5.10.210
commit db896bbe4a9c67cee377e5f6a743350d3ae4acf6
category: bugfix
bugzilla: https://gitee.com/src-openeuler/kernel/issues/I93SNV
CVE: CVE-2024-26602
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id…
--------------------------------
commit 944d5fe50f3f03daacfea16300e656a1691c4a23 upstream.
On some systems, sys_membarrier can be very expensive, causing overall
slowdowns for everything. So put a lock on the path in order to
serialize the accesses to prevent the ability for this to be called at
too high of a frequency and saturate the machine.
Reviewed-and-tested-by: Mathieu Desnoyers <mathieu.desnoyers(a)efficios.com>
Acked-by: Borislav Petkov <bp(a)alien8.de>
Fixes: 22e4ebb97582 ("membarrier: Provide expedited private command")
Fixes: c5f58bd58f43 ("membarrier: Provide GLOBAL_EXPEDITED command")
Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org>
[ converted to explicit mutex_*() calls - cleanup.h is not in this stable
branch - gregkh ]
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Signed-off-by: Zhao Wenhui <zhaowenhui8(a)huawei.com>
---
kernel/sched/membarrier.c | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c
index 1757074be994..3ceb582ef9d7 100644
--- a/kernel/sched/membarrier.c
+++ b/kernel/sched/membarrier.c
@@ -34,6 +34,8 @@
| MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED \
| MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK)
+static DEFINE_MUTEX(membarrier_ipi_mutex);
+
static void ipi_mb(void *info)
{
smp_mb(); /* IPIs should be serializing but paranoid. */
@@ -89,6 +91,7 @@ static int membarrier_global_expedited(void)
if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
return -ENOMEM;
+ mutex_lock(&membarrier_ipi_mutex);
cpus_read_lock();
rcu_read_lock();
for_each_online_cpu(cpu) {
@@ -135,6 +138,8 @@ static int membarrier_global_expedited(void)
* rq->curr modification in scheduler.
*/
smp_mb(); /* exit from system call is not a mb */
+ mutex_unlock(&membarrier_ipi_mutex);
+
return 0;
}
@@ -168,6 +173,7 @@ static int membarrier_private_expedited(int flags)
if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
return -ENOMEM;
+ mutex_lock(&membarrier_ipi_mutex);
cpus_read_lock();
rcu_read_lock();
for_each_online_cpu(cpu) {
@@ -202,6 +208,7 @@ static int membarrier_private_expedited(int flags)
* rq->curr modification in scheduler.
*/
smp_mb(); /* exit from system call is not a mb */
+ mutex_unlock(&membarrier_ipi_mutex);
return 0;
}
@@ -243,6 +250,7 @@ static int sync_runqueues_membarrier_state(struct mm_struct *mm)
* between threads which are users of @mm has its membarrier state
* updated.
*/
+ mutex_lock(&membarrier_ipi_mutex);
cpus_read_lock();
rcu_read_lock();
for_each_online_cpu(cpu) {
@@ -259,6 +267,7 @@ static int sync_runqueues_membarrier_state(struct mm_struct *mm)
free_cpumask_var(tmpmask);
cpus_read_unlock();
+ mutex_unlock(&membarrier_ipi_mutex);
return 0;
}
--
2.34.1
tree: https://gitee.com/openeuler/kernel.git OLK-6.6
head: 3016126ef302b602feaa8a7b99e4f85987536d65
commit: fa75636857993e25030c3db31444d2aa01d5d9de [3620/3684] RAS: Report ARM processor information to userspace
config: x86_64-defconfig (https://download.01.org/0day-ci/archive/20240301/202403010924.KlNHxAf6-lkp@…)
compiler: gcc-11 (Debian 11.3.0-12) 11.3.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240301/202403010924.KlNHxAf6-lkp@…)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp(a)intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202403010924.KlNHxAf6-lkp@intel.com/
All warnings (new ones prefixed by >>):
In file included from arch/x86/kernel/cpu/mce/core.c:36:
include/linux/ras.h:45:1: error: return type defaults to 'int' [-Werror=implicit-int]
45 | log_arm_hw_error(struct cper_sec_proc_arm *err) { return; }
| ^~~~~~~~~~~~~~~~
>> include/linux/ras.h:45:1: warning: no previous prototype for 'log_arm_hw_error' [-Wmissing-prototypes]
include/linux/ras.h: In function 'log_arm_hw_error':
>> include/linux/ras.h:45:51: warning: 'return' with no value, in function returning non-void [-Wreturn-type]
45 | log_arm_hw_error(struct cper_sec_proc_arm *err) { return; }
| ^~~~~~
include/linux/ras.h:45:1: note: declared here
45 | log_arm_hw_error(struct cper_sec_proc_arm *err) { return; }
| ^~~~~~~~~~~~~~~~
cc1: some warnings being treated as errors
vim +/log_arm_hw_error +45 include/linux/ras.h
fa75636857993e Shengwei Luo 2022-02-23 40
fa75636857993e Shengwei Luo 2022-02-23 41 #ifdef CONFIG_RAS_ARM_EVENT_INFO
0607512d0a8d7f Arnd Bergmann 2017-06-27 42 static inline void
fa75636857993e Shengwei Luo 2022-02-23 43 log_arm_hw_error(struct cper_sec_proc_arm *err, const u8 sev) { return; }
fa75636857993e Shengwei Luo 2022-02-23 44 #else
0607512d0a8d7f Arnd Bergmann 2017-06-27 @45 log_arm_hw_error(struct cper_sec_proc_arm *err) { return; }
297b64c74385fc Tyler Baicar 2017-06-21 46 #endif
297b64c74385fc Tyler Baicar 2017-06-21 47
:::::: The code at line 45 was first introduced by commit
:::::: 0607512d0a8d7fac86667466b884095e04b10a59 ras: mark stub functions as 'inline'
:::::: TO: Arnd Bergmann <arnd(a)arndb.de>
:::::: CC: Will Deacon <will.deacon(a)arm.com>
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
From: Filipe Manana <fdmanana(a)suse.com>
stable inclusion
from stable-v5.12.5
commit 96157707c0420e3d3edfe046f1cc797fee117ade
bugzilla: https://gitee.com/src-openeuler/kernel/issues/I94K22
CVE: CVE-2021-46987
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id…
---------------------------
[ Upstream commit f9baa501b4fd6962257853d46ddffbc21f27e344 ]
There are a few exceptional cases where cloning an inline extent needs to
copy the inline extent data into a page of the destination inode.
When this happens, we end up starting a transaction while having a dirty
page for the destination inode and while having the range locked in the
destination's inode iotree too. Because when reserving metadata space
for a transaction we may need to flush existing delalloc in case there is
not enough free space, we have a mechanism in place to prevent a deadlock,
which was introduced in commit 3d45f221ce627d ("btrfs: fix deadlock when
cloning inline extent and low on free metadata space").
However when using qgroups, a transaction also reserves metadata qgroup
space, which can also result in flushing delalloc in case there is not
enough available space at the moment. When this happens we deadlock, since
flushing delalloc requires locking the file range in the inode's iotree
and the range was already locked at the very beginning of the clone
operation, before attempting to start the transaction.
When this issue happens, stack traces like the following are reported:
[72747.556262] task:kworker/u81:9 state:D stack: 0 pid: 225 ppid: 2 flags:0x00004000
[72747.556268] Workqueue: writeback wb_workfn (flush-btrfs-1142)
[72747.556271] Call Trace:
[72747.556273] __schedule+0x296/0x760
[72747.556277] schedule+0x3c/0xa0
[72747.556279] io_schedule+0x12/0x40
[72747.556284] __lock_page+0x13c/0x280
[72747.556287] ? generic_file_readonly_mmap+0x70/0x70
[72747.556325] extent_write_cache_pages+0x22a/0x440 [btrfs]
[72747.556331] ? __set_page_dirty_nobuffers+0xe7/0x160
[72747.556358] ? set_extent_buffer_dirty+0x5e/0x80 [btrfs]
[72747.556362] ? update_group_capacity+0x25/0x210
[72747.556366] ? cpumask_next_and+0x1a/0x20
[72747.556391] extent_writepages+0x44/0xa0 [btrfs]
[72747.556394] do_writepages+0x41/0xd0
[72747.556398] __writeback_single_inode+0x39/0x2a0
[72747.556403] writeback_sb_inodes+0x1ea/0x440
[72747.556407] __writeback_inodes_wb+0x5f/0xc0
[72747.556410] wb_writeback+0x235/0x2b0
[72747.556414] ? get_nr_inodes+0x35/0x50
[72747.556417] wb_workfn+0x354/0x490
[72747.556420] ? newidle_balance+0x2c5/0x3e0
[72747.556424] process_one_work+0x1aa/0x340
[72747.556426] worker_thread+0x30/0x390
[72747.556429] ? create_worker+0x1a0/0x1a0
[72747.556432] kthread+0x116/0x130
[72747.556435] ? kthread_park+0x80/0x80
[72747.556438] ret_from_fork+0x1f/0x30
[72747.566958] Workqueue: btrfs-flush_delalloc btrfs_work_helper [btrfs]
[72747.566961] Call Trace:
[72747.566964] __schedule+0x296/0x760
[72747.566968] ? finish_wait+0x80/0x80
[72747.566970] schedule+0x3c/0xa0
[72747.566995] wait_extent_bit.constprop.68+0x13b/0x1c0 [btrfs]
[72747.566999] ? finish_wait+0x80/0x80
[72747.567024] lock_extent_bits+0x37/0x90 [btrfs]
[72747.567047] btrfs_invalidatepage+0x299/0x2c0 [btrfs]
[72747.567051] ? find_get_pages_range_tag+0x2cd/0x380
[72747.567076] __extent_writepage+0x203/0x320 [btrfs]
[72747.567102] extent_write_cache_pages+0x2bb/0x440 [btrfs]
[72747.567106] ? update_load_avg+0x7e/0x5f0
[72747.567109] ? enqueue_entity+0xf4/0x6f0
[72747.567134] extent_writepages+0x44/0xa0 [btrfs]
[72747.567137] ? enqueue_task_fair+0x93/0x6f0
[72747.567140] do_writepages+0x41/0xd0
[72747.567144] __filemap_fdatawrite_range+0xc7/0x100
[72747.567167] btrfs_run_delalloc_work+0x17/0x40 [btrfs]
[72747.567195] btrfs_work_helper+0xc2/0x300 [btrfs]
[72747.567200] process_one_work+0x1aa/0x340
[72747.567202] worker_thread+0x30/0x390
[72747.567205] ? create_worker+0x1a0/0x1a0
[72747.567208] kthread+0x116/0x130
[72747.567211] ? kthread_park+0x80/0x80
[72747.567214] ret_from_fork+0x1f/0x30
[72747.569686] task:fsstress state:D stack: 0 pid:841421 ppid:841417 flags:0x00000000
[72747.569689] Call Trace:
[72747.569691] __schedule+0x296/0x760
[72747.569694] schedule+0x3c/0xa0
[72747.569721] try_flush_qgroup+0x95/0x140 [btrfs]
[72747.569725] ? finish_wait+0x80/0x80
[72747.569753] btrfs_qgroup_reserve_data+0x34/0x50 [btrfs]
[72747.569781] btrfs_check_data_free_space+0x5f/0xa0 [btrfs]
[72747.569804] btrfs_buffered_write+0x1f7/0x7f0 [btrfs]
[72747.569810] ? path_lookupat.isra.48+0x97/0x140
[72747.569833] btrfs_file_write_iter+0x81/0x410 [btrfs]
[72747.569836] ? __kmalloc+0x16a/0x2c0
[72747.569839] do_iter_readv_writev+0x160/0x1c0
[72747.569843] do_iter_write+0x80/0x1b0
[72747.569847] vfs_writev+0x84/0x140
[72747.569869] ? btrfs_file_llseek+0x38/0x270 [btrfs]
[72747.569873] do_writev+0x65/0x100
[72747.569876] do_syscall_64+0x33/0x40
[72747.569879] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[72747.569899] task:fsstress state:D stack: 0 pid:841424 ppid:841417 flags:0x00004000
[72747.569903] Call Trace:
[72747.569906] __schedule+0x296/0x760
[72747.569909] schedule+0x3c/0xa0
[72747.569936] try_flush_qgroup+0x95/0x140 [btrfs]
[72747.569940] ? finish_wait+0x80/0x80
[72747.569967] __btrfs_qgroup_reserve_meta+0x36/0x50 [btrfs]
[72747.569989] start_transaction+0x279/0x580 [btrfs]
[72747.570014] clone_copy_inline_extent+0x332/0x490 [btrfs]
[72747.570041] btrfs_clone+0x5b7/0x7a0 [btrfs]
[72747.570068] ? lock_extent_bits+0x64/0x90 [btrfs]
[72747.570095] btrfs_clone_files+0xfc/0x150 [btrfs]
[72747.570122] btrfs_remap_file_range+0x3d8/0x4a0 [btrfs]
[72747.570126] do_clone_file_range+0xed/0x200
[72747.570131] vfs_clone_file_range+0x37/0x110
[72747.570134] ioctl_file_clone+0x7d/0xb0
[72747.570137] do_vfs_ioctl+0x138/0x630
[72747.570140] __x64_sys_ioctl+0x62/0xc0
[72747.570143] do_syscall_64+0x33/0x40
[72747.570146] entry_SYSCALL_64_after_hwframe+0x44/0xa9
So fix this by skipping the flush of delalloc for an inode that is
flagged with BTRFS_INODE_NO_DELALLOC_FLUSH, meaning it is currently under
such a special case of cloning an inline extent, when flushing delalloc
during qgroup metadata reservation.
The special cases for cloning inline extents were added in kernel 5.7 by
by commit 05a5a7621ce66c ("Btrfs: implement full reflink support for
inline extents"), while having qgroup metadata space reservation flushing
delalloc when low on space was added in kernel 5.9 by commit
c53e9653605dbf ("btrfs: qgroup: try to flush qgroup space when we get
-EDQUOT"). So use a "Fixes:" tag for the later commit to ease stable
kernel backports.
Reported-by: Wang Yugui <wangyugui(a)e16-tech.com>
Link: https://lore.kernel.org/linux-btrfs/20210421083137.31E3.409509F4@e16-tech.c…
Fixes: c53e9653605dbf ("btrfs: qgroup: try to flush qgroup space when we get -EDQUOT")
CC: stable(a)vger.kernel.org # 5.9+
Reviewed-by: Qu Wenruo <wqu(a)suse.com>
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: Yifan Qiao <qiaoyifan4(a)huawei.com>
---
fs/btrfs/ctree.h | 2 +-
fs/btrfs/inode.c | 4 ++--
fs/btrfs/ioctl.c | 2 +-
fs/btrfs/qgroup.c | 2 +-
fs/btrfs/send.c | 4 ++--
fs/btrfs/transaction.c | 2 +-
6 files changed, 8 insertions(+), 8 deletions(-)
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index bcc6848bb6d6..92ecb4562584 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3008,7 +3008,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
struct inode *inode, u64 new_size,
u32 min_type);
-int btrfs_start_delalloc_snapshot(struct btrfs_root *root);
+int btrfs_start_delalloc_snapshot(struct btrfs_root *root, bool in_reclaim_context);
int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, u64 nr,
bool in_reclaim_context);
int btrfs_set_extent_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index c900a39666e3..d87a53613e34 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -9526,7 +9526,7 @@ static int start_delalloc_inodes(struct btrfs_root *root,
return ret;
}
-int btrfs_start_delalloc_snapshot(struct btrfs_root *root)
+int btrfs_start_delalloc_snapshot(struct btrfs_root *root, bool in_reclaim_context)
{
struct writeback_control wbc = {
.nr_to_write = LONG_MAX,
@@ -9539,7 +9539,7 @@ int btrfs_start_delalloc_snapshot(struct btrfs_root *root)
if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
return -EROFS;
- return start_delalloc_inodes(root, &wbc, true, false);
+ return start_delalloc_inodes(root, &wbc, true, in_reclaim_context);
}
int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, u64 nr,
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 63bf68e0b006..e71c80b45bb1 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1030,7 +1030,7 @@ static noinline int btrfs_mksnapshot(const struct path *parent,
*/
btrfs_drew_read_lock(&root->snapshot_lock);
- ret = btrfs_start_delalloc_snapshot(root);
+ ret = btrfs_start_delalloc_snapshot(root, false);
if (ret)
goto out;
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index a67323c2d41f..506d12ef0d16 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -3695,7 +3695,7 @@ static int try_flush_qgroup(struct btrfs_root *root)
return 0;
}
- ret = btrfs_start_delalloc_snapshot(root);
+ ret = btrfs_start_delalloc_snapshot(root, true);
if (ret < 0)
goto out;
btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1);
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index b081b61e97c8..ff3114e652b3 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -7196,7 +7196,7 @@ static int flush_delalloc_roots(struct send_ctx *sctx)
int i;
if (root) {
- ret = btrfs_start_delalloc_snapshot(root);
+ ret = btrfs_start_delalloc_snapshot(root, false);
if (ret)
return ret;
btrfs_wait_ordered_extents(root, U64_MAX, 0, U64_MAX);
@@ -7204,7 +7204,7 @@ static int flush_delalloc_roots(struct send_ctx *sctx)
for (i = 0; i < sctx->clone_roots_cnt; i++) {
root = sctx->clone_roots[i].root;
- ret = btrfs_start_delalloc_snapshot(root);
+ ret = btrfs_start_delalloc_snapshot(root, false);
if (ret)
return ret;
btrfs_wait_ordered_extents(root, U64_MAX, 0, U64_MAX);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index abd67f984fbc..6b1bdb710123 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -2044,7 +2044,7 @@ static inline int btrfs_start_delalloc_flush(struct btrfs_trans_handle *trans)
list_for_each_entry(pending, head, list) {
int ret;
- ret = btrfs_start_delalloc_snapshot(pending->root);
+ ret = btrfs_start_delalloc_snapshot(pending->root, false);
if (ret)
return ret;
}
--
2.39.2
From: Filipe Manana <fdmanana(a)suse.com>
stable inclusion
from stable-v5.12.5
commit 96157707c0420e3d3edfe046f1cc797fee117ade
bugzilla: 189580
CVE: CVE-2021-46987
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id…
---------------------------
[ Upstream commit f9baa501b4fd6962257853d46ddffbc21f27e344 ]
There are a few exceptional cases where cloning an inline extent needs to
copy the inline extent data into a page of the destination inode.
When this happens, we end up starting a transaction while having a dirty
page for the destination inode and while having the range locked in the
destination's inode iotree too. Because when reserving metadata space
for a transaction we may need to flush existing delalloc in case there is
not enough free space, we have a mechanism in place to prevent a deadlock,
which was introduced in commit 3d45f221ce627d ("btrfs: fix deadlock when
cloning inline extent and low on free metadata space").
However when using qgroups, a transaction also reserves metadata qgroup
space, which can also result in flushing delalloc in case there is not
enough available space at the moment. When this happens we deadlock, since
flushing delalloc requires locking the file range in the inode's iotree
and the range was already locked at the very beginning of the clone
operation, before attempting to start the transaction.
When this issue happens, stack traces like the following are reported:
[72747.556262] task:kworker/u81:9 state:D stack: 0 pid: 225 ppid: 2 flags:0x00004000
[72747.556268] Workqueue: writeback wb_workfn (flush-btrfs-1142)
[72747.556271] Call Trace:
[72747.556273] __schedule+0x296/0x760
[72747.556277] schedule+0x3c/0xa0
[72747.556279] io_schedule+0x12/0x40
[72747.556284] __lock_page+0x13c/0x280
[72747.556287] ? generic_file_readonly_mmap+0x70/0x70
[72747.556325] extent_write_cache_pages+0x22a/0x440 [btrfs]
[72747.556331] ? __set_page_dirty_nobuffers+0xe7/0x160
[72747.556358] ? set_extent_buffer_dirty+0x5e/0x80 [btrfs]
[72747.556362] ? update_group_capacity+0x25/0x210
[72747.556366] ? cpumask_next_and+0x1a/0x20
[72747.556391] extent_writepages+0x44/0xa0 [btrfs]
[72747.556394] do_writepages+0x41/0xd0
[72747.556398] __writeback_single_inode+0x39/0x2a0
[72747.556403] writeback_sb_inodes+0x1ea/0x440
[72747.556407] __writeback_inodes_wb+0x5f/0xc0
[72747.556410] wb_writeback+0x235/0x2b0
[72747.556414] ? get_nr_inodes+0x35/0x50
[72747.556417] wb_workfn+0x354/0x490
[72747.556420] ? newidle_balance+0x2c5/0x3e0
[72747.556424] process_one_work+0x1aa/0x340
[72747.556426] worker_thread+0x30/0x390
[72747.556429] ? create_worker+0x1a0/0x1a0
[72747.556432] kthread+0x116/0x130
[72747.556435] ? kthread_park+0x80/0x80
[72747.556438] ret_from_fork+0x1f/0x30
[72747.566958] Workqueue: btrfs-flush_delalloc btrfs_work_helper [btrfs]
[72747.566961] Call Trace:
[72747.566964] __schedule+0x296/0x760
[72747.566968] ? finish_wait+0x80/0x80
[72747.566970] schedule+0x3c/0xa0
[72747.566995] wait_extent_bit.constprop.68+0x13b/0x1c0 [btrfs]
[72747.566999] ? finish_wait+0x80/0x80
[72747.567024] lock_extent_bits+0x37/0x90 [btrfs]
[72747.567047] btrfs_invalidatepage+0x299/0x2c0 [btrfs]
[72747.567051] ? find_get_pages_range_tag+0x2cd/0x380
[72747.567076] __extent_writepage+0x203/0x320 [btrfs]
[72747.567102] extent_write_cache_pages+0x2bb/0x440 [btrfs]
[72747.567106] ? update_load_avg+0x7e/0x5f0
[72747.567109] ? enqueue_entity+0xf4/0x6f0
[72747.567134] extent_writepages+0x44/0xa0 [btrfs]
[72747.567137] ? enqueue_task_fair+0x93/0x6f0
[72747.567140] do_writepages+0x41/0xd0
[72747.567144] __filemap_fdatawrite_range+0xc7/0x100
[72747.567167] btrfs_run_delalloc_work+0x17/0x40 [btrfs]
[72747.567195] btrfs_work_helper+0xc2/0x300 [btrfs]
[72747.567200] process_one_work+0x1aa/0x340
[72747.567202] worker_thread+0x30/0x390
[72747.567205] ? create_worker+0x1a0/0x1a0
[72747.567208] kthread+0x116/0x130
[72747.567211] ? kthread_park+0x80/0x80
[72747.567214] ret_from_fork+0x1f/0x30
[72747.569686] task:fsstress state:D stack: 0 pid:841421 ppid:841417 flags:0x00000000
[72747.569689] Call Trace:
[72747.569691] __schedule+0x296/0x760
[72747.569694] schedule+0x3c/0xa0
[72747.569721] try_flush_qgroup+0x95/0x140 [btrfs]
[72747.569725] ? finish_wait+0x80/0x80
[72747.569753] btrfs_qgroup_reserve_data+0x34/0x50 [btrfs]
[72747.569781] btrfs_check_data_free_space+0x5f/0xa0 [btrfs]
[72747.569804] btrfs_buffered_write+0x1f7/0x7f0 [btrfs]
[72747.569810] ? path_lookupat.isra.48+0x97/0x140
[72747.569833] btrfs_file_write_iter+0x81/0x410 [btrfs]
[72747.569836] ? __kmalloc+0x16a/0x2c0
[72747.569839] do_iter_readv_writev+0x160/0x1c0
[72747.569843] do_iter_write+0x80/0x1b0
[72747.569847] vfs_writev+0x84/0x140
[72747.569869] ? btrfs_file_llseek+0x38/0x270 [btrfs]
[72747.569873] do_writev+0x65/0x100
[72747.569876] do_syscall_64+0x33/0x40
[72747.569879] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[72747.569899] task:fsstress state:D stack: 0 pid:841424 ppid:841417 flags:0x00004000
[72747.569903] Call Trace:
[72747.569906] __schedule+0x296/0x760
[72747.569909] schedule+0x3c/0xa0
[72747.569936] try_flush_qgroup+0x95/0x140 [btrfs]
[72747.569940] ? finish_wait+0x80/0x80
[72747.569967] __btrfs_qgroup_reserve_meta+0x36/0x50 [btrfs]
[72747.569989] start_transaction+0x279/0x580 [btrfs]
[72747.570014] clone_copy_inline_extent+0x332/0x490 [btrfs]
[72747.570041] btrfs_clone+0x5b7/0x7a0 [btrfs]
[72747.570068] ? lock_extent_bits+0x64/0x90 [btrfs]
[72747.570095] btrfs_clone_files+0xfc/0x150 [btrfs]
[72747.570122] btrfs_remap_file_range+0x3d8/0x4a0 [btrfs]
[72747.570126] do_clone_file_range+0xed/0x200
[72747.570131] vfs_clone_file_range+0x37/0x110
[72747.570134] ioctl_file_clone+0x7d/0xb0
[72747.570137] do_vfs_ioctl+0x138/0x630
[72747.570140] __x64_sys_ioctl+0x62/0xc0
[72747.570143] do_syscall_64+0x33/0x40
[72747.570146] entry_SYSCALL_64_after_hwframe+0x44/0xa9
So fix this by skipping the flush of delalloc for an inode that is
flagged with BTRFS_INODE_NO_DELALLOC_FLUSH, meaning it is currently under
such a special case of cloning an inline extent, when flushing delalloc
during qgroup metadata reservation.
The special cases for cloning inline extents were added in kernel 5.7 by
by commit 05a5a7621ce66c ("Btrfs: implement full reflink support for
inline extents"), while having qgroup metadata space reservation flushing
delalloc when low on space was added in kernel 5.9 by commit
c53e9653605dbf ("btrfs: qgroup: try to flush qgroup space when we get
-EDQUOT"). So use a "Fixes:" tag for the later commit to ease stable
kernel backports.
Reported-by: Wang Yugui <wangyugui(a)e16-tech.com>
Link: https://lore.kernel.org/linux-btrfs/20210421083137.31E3.409509F4@e16-tech.c…
Fixes: c53e9653605dbf ("btrfs: qgroup: try to flush qgroup space when we get -EDQUOT")
CC: stable(a)vger.kernel.org # 5.9+
Reviewed-by: Qu Wenruo <wqu(a)suse.com>
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: Yifan Qiao <qiaoyifan4(a)huawei.com>
---
fs/btrfs/ctree.h | 2 +-
fs/btrfs/inode.c | 4 ++--
fs/btrfs/ioctl.c | 2 +-
fs/btrfs/qgroup.c | 2 +-
fs/btrfs/send.c | 4 ++--
fs/btrfs/transaction.c | 2 +-
6 files changed, 8 insertions(+), 8 deletions(-)
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index bcc6848bb6d6..92ecb4562584 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3008,7 +3008,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
struct inode *inode, u64 new_size,
u32 min_type);
-int btrfs_start_delalloc_snapshot(struct btrfs_root *root);
+int btrfs_start_delalloc_snapshot(struct btrfs_root *root, bool in_reclaim_context);
int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, u64 nr,
bool in_reclaim_context);
int btrfs_set_extent_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index c900a39666e3..d87a53613e34 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -9526,7 +9526,7 @@ static int start_delalloc_inodes(struct btrfs_root *root,
return ret;
}
-int btrfs_start_delalloc_snapshot(struct btrfs_root *root)
+int btrfs_start_delalloc_snapshot(struct btrfs_root *root, bool in_reclaim_context)
{
struct writeback_control wbc = {
.nr_to_write = LONG_MAX,
@@ -9539,7 +9539,7 @@ int btrfs_start_delalloc_snapshot(struct btrfs_root *root)
if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
return -EROFS;
- return start_delalloc_inodes(root, &wbc, true, false);
+ return start_delalloc_inodes(root, &wbc, true, in_reclaim_context);
}
int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, u64 nr,
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 63bf68e0b006..e71c80b45bb1 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1030,7 +1030,7 @@ static noinline int btrfs_mksnapshot(const struct path *parent,
*/
btrfs_drew_read_lock(&root->snapshot_lock);
- ret = btrfs_start_delalloc_snapshot(root);
+ ret = btrfs_start_delalloc_snapshot(root, false);
if (ret)
goto out;
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index a67323c2d41f..506d12ef0d16 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -3695,7 +3695,7 @@ static int try_flush_qgroup(struct btrfs_root *root)
return 0;
}
- ret = btrfs_start_delalloc_snapshot(root);
+ ret = btrfs_start_delalloc_snapshot(root, true);
if (ret < 0)
goto out;
btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1);
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index b081b61e97c8..ff3114e652b3 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -7196,7 +7196,7 @@ static int flush_delalloc_roots(struct send_ctx *sctx)
int i;
if (root) {
- ret = btrfs_start_delalloc_snapshot(root);
+ ret = btrfs_start_delalloc_snapshot(root, false);
if (ret)
return ret;
btrfs_wait_ordered_extents(root, U64_MAX, 0, U64_MAX);
@@ -7204,7 +7204,7 @@ static int flush_delalloc_roots(struct send_ctx *sctx)
for (i = 0; i < sctx->clone_roots_cnt; i++) {
root = sctx->clone_roots[i].root;
- ret = btrfs_start_delalloc_snapshot(root);
+ ret = btrfs_start_delalloc_snapshot(root, false);
if (ret)
return ret;
btrfs_wait_ordered_extents(root, U64_MAX, 0, U64_MAX);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index abd67f984fbc..6b1bdb710123 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -2044,7 +2044,7 @@ static inline int btrfs_start_delalloc_flush(struct btrfs_trans_handle *trans)
list_for_each_entry(pending, head, list) {
int ret;
- ret = btrfs_start_delalloc_snapshot(pending->root);
+ ret = btrfs_start_delalloc_snapshot(pending->root, false);
if (ret)
return ret;
}
--
2.39.2