tree: https://gitee.com/openeuler/kernel.git OLK-5.10
head: 8bba4b17284e774544bd01d9ea5190546e93a849
commit: 33ba25cc4869bab81ad31784e7bbb25e5da2a2ad [2610/2610] anolis: fscache,cachefiles: add fscache_prepare_read() helper
config: arm64-randconfig-004-20241230 (https://download.01.org/0day-ci/archive/20241231/202412310433.hkwF4RYM-lkp@…)
compiler: aarch64-linux-gcc (GCC) 14.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20241231/202412310433.hkwF4RYM-lkp@…)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp(a)intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202412310433.hkwF4RYM-lkp@intel.com/
All warnings (new ones prefixed by >>):
>> fs/cachefiles/rdwr.c:860:6: warning: no previous prototype for 'cachefiles_readpages_work_func' [-Wmissing-prototypes]
860 | void cachefiles_readpages_work_func(struct work_struct *work)
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
vim +/cachefiles_readpages_work_func +860 fs/cachefiles/rdwr.c
859
> 860 void cachefiles_readpages_work_func(struct work_struct *work)
861 {
862 struct cachefiles_kiocb *ki = container_of(work, struct cachefiles_kiocb, work);
863 int ret;
864
865 ret = vfs_iocb_iter_read(ki->iocb.ki_filp, &ki->iocb, &ki->iter);
866 /* complete the request if there's any progress or error occurred */
867 if (ret != -EIOCBQUEUED) {
868 struct fscache_retrieval *op = ki->op;
869 unsigned int nr_pages = atomic_read(&op->n_pages);
870 unsigned int done_pages = 0;
871 int i, error;
872
873 if (ret > 0)
874 done_pages = ret / PAGE_SIZE;
875
876 for (i = 0; i < nr_pages; i++) {
877 error = i < done_pages ? 0 : -EIO;
878 fscache_end_io(op, ki->bvs[i].bv_page, error);
879 }
880
881 fscache_retrieval_complete(op, nr_pages);
882 fscache_put_retrieval(op);
883 kfree(ki);
884 }
885 }
886
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
Hi Zhao,
FYI, the error/warning still remains.
tree: https://gitee.com/openeuler/kernel.git openEuler-1.0-LTS
head: 4dc4cec05b40921a3db85d24f97f1142272e4abf
commit: 6636f4434a9c5c9c645694db206188ee5a6626dd [1356/1356] ext4: report error to userspace by netlink
config: x86_64-buildonly-randconfig-003-20241228 (https://download.01.org/0day-ci/archive/20241231/202412310428.apoNWdMg-lkp@…)
compiler: gcc-12 (Debian 12.2.0-14) 12.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20241231/202412310428.apoNWdMg-lkp@…)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp(a)intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202412310428.apoNWdMg-lkp@intel.com/
All errors (new ones prefixed by >>):
>> ERROR: "netlink_kernel_release" [fs/ext4/ext4.ko] undefined!
>> ERROR: "init_net" [fs/ext4/ext4.ko] undefined!
>> ERROR: "__alloc_skb" [fs/ext4/ext4.ko] undefined!
>> ERROR: "netlink_broadcast" [fs/ext4/ext4.ko] undefined!
>> ERROR: "kfree_skb" [fs/ext4/ext4.ko] undefined!
>> ERROR: "__netlink_kernel_create" [fs/ext4/ext4.ko] undefined!
>> ERROR: "__nlmsg_put" [fs/ext4/ext4.ko] undefined!
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
tree: https://gitee.com/openeuler/kernel.git OLK-5.10
head: 8bba4b17284e774544bd01d9ea5190546e93a849
commit: 33ba25cc4869bab81ad31784e7bbb25e5da2a2ad [2610/2610] anolis: fscache,cachefiles: add fscache_prepare_read() helper
config: x86_64-buildonly-randconfig-001-20241230 (https://download.01.org/0day-ci/archive/20241231/202412310255.QS4rGSOC-lkp@…)
compiler: clang version 19.1.3 (https://github.com/llvm/llvm-project ab51eccf88f5321e7c60591c5546b254b6afab99)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20241231/202412310255.QS4rGSOC-lkp@…)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp(a)intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202412310255.QS4rGSOC-lkp@intel.com/
All warnings (new ones prefixed by >>):
In file included from fs/cachefiles/rdwr.c:11:
In file included from include/linux/swap.h:10:
In file included from include/linux/memcontrol.h:13:
In file included from include/linux/cgroup.h:28:
In file included from include/linux/cgroup-defs.h:22:
In file included from include/linux/bpf-cgroup.h:5:
In file included from include/linux/bpf.h:21:
In file included from include/linux/kallsyms.h:12:
In file included from include/linux/mm.h:1587:
include/linux/vmstat.h:431:36: warning: arithmetic between different enumeration types ('enum node_stat_item' and 'enum lru_list') [-Wenum-enum-conversion]
431 | return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_"
| ~~~~~~~~~~~ ^ ~~~
>> fs/cachefiles/rdwr.c:860:6: warning: no previous prototype for function 'cachefiles_readpages_work_func' [-Wmissing-prototypes]
860 | void cachefiles_readpages_work_func(struct work_struct *work)
| ^
fs/cachefiles/rdwr.c:860:1: note: declare 'static' if the function is not intended to be used outside of this translation unit
860 | void cachefiles_readpages_work_func(struct work_struct *work)
| ^
| static
2 warnings generated.
vim +/cachefiles_readpages_work_func +860 fs/cachefiles/rdwr.c
859
> 860 void cachefiles_readpages_work_func(struct work_struct *work)
861 {
862 struct cachefiles_kiocb *ki = container_of(work, struct cachefiles_kiocb, work);
863 int ret;
864
865 ret = vfs_iocb_iter_read(ki->iocb.ki_filp, &ki->iocb, &ki->iter);
866 /* complete the request if there's any progress or error occurred */
867 if (ret != -EIOCBQUEUED) {
868 struct fscache_retrieval *op = ki->op;
869 unsigned int nr_pages = atomic_read(&op->n_pages);
870 unsigned int done_pages = 0;
871 int i, error;
872
873 if (ret > 0)
874 done_pages = ret / PAGE_SIZE;
875
876 for (i = 0; i < nr_pages; i++) {
877 error = i < done_pages ? 0 : -EIO;
878 fscache_end_io(op, ki->bvs[i].bv_page, error);
879 }
880
881 fscache_retrieval_complete(op, nr_pages);
882 fscache_put_retrieval(op);
883 kfree(ki);
884 }
885 }
886
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
tree: https://gitee.com/openeuler/kernel.git OLK-5.10
head: 8bba4b17284e774544bd01d9ea5190546e93a849
commit: 2937cd5f8c58bd8e7895f6b2698057721442248e [2610/2610] cachefiles: notify the user daemon when looking up cookie
config: arm64-randconfig-004-20241230 (https://download.01.org/0day-ci/archive/20241231/202412310256.l8FuLcbO-lkp@…)
compiler: aarch64-linux-gcc (GCC) 14.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20241231/202412310256.l8FuLcbO-lkp@…)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp(a)intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202412310256.l8FuLcbO-lkp@intel.com/
All warnings (new ones prefixed by >>):
fs/cachefiles/ondemand.c: In function 'cachefiles_ondemand_fd_release':
>> fs/cachefiles/ondemand.c:23:25: warning: implicit conversion from 'enum cachefiles_obj_ref_trace' to 'enum fscache_obj_ref_trace' [-Wenum-conversion]
23 | cachefiles_obj_put_ondemand_fd);
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
fs/cachefiles/ondemand.c: In function 'cachefiles_ondemand_get_fd':
fs/cachefiles/ondemand.c:145:25: warning: implicit conversion from 'enum cachefiles_obj_ref_trace' to 'enum fscache_obj_ref_trace' [-Wenum-conversion]
145 | cachefiles_obj_get_ondemand_fd) ? 0 : -EAGAIN;
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
fs/cachefiles/ondemand.c:191:25: warning: implicit conversion from 'enum cachefiles_obj_ref_trace' to 'enum fscache_obj_ref_trace' [-Wenum-conversion]
191 | cachefiles_obj_put_ondemand_fd);
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
vim +23 fs/cachefiles/ondemand.c
7
8 static int cachefiles_ondemand_fd_release(struct inode *inode,
9 struct file *file)
10 {
11 struct cachefiles_object *object = file->private_data;
12 int object_id = object->ondemand_id;
13 struct cachefiles_cache *cache;
14
15 cache = container_of(object->fscache.cache,
16 struct cachefiles_cache, cache);
17
18 object->ondemand_id = CACHEFILES_ONDEMAND_ID_CLOSED;
19 xa_lock(&cache->ondemand_ids.idr_rt);
20 idr_remove(&cache->ondemand_ids, object_id);
21 xa_unlock(&cache->ondemand_ids.idr_rt);
22 object->fscache.cache->ops->put_object(&object->fscache,
> 23 cachefiles_obj_put_ondemand_fd);
24 return 0;
25 }
26
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
tree: https://gitee.com/openeuler/kernel.git OLK-6.6
head: 5672a2cb8a45de45872d2dd25d3dd94348c070eb
commit: 07a18fb6c71ef75cc1205999cd2493b567649466 [1673/1673] KVM: SVM: Prepare memory pool to allocate buffers for KVM_CSV_COMMAND_BATCH
config: x86_64-randconfig-121-20241230 (https://download.01.org/0day-ci/archive/20241231/202412310023.KFzowK2B-lkp@…)
compiler: clang version 19.1.3 (https://github.com/llvm/llvm-project ab51eccf88f5321e7c60591c5546b254b6afab99)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20241231/202412310023.KFzowK2B-lkp@…)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp(a)intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202412310023.KFzowK2B-lkp@intel.com/
sparse warnings: (new ones prefixed by >>)
arch/x86/kvm/svm/csv.c:53:5: sparse: sparse: symbol 'csv_vm_attestation' was not declared. Should it be static?
>> arch/x86/kvm/svm/csv.c:129:52: sparse: sparse: Using plain integer as NULL pointer
>> arch/x86/kvm/svm/csv.c:129:6: sparse: sparse: symbol 'g_trans_mempool' was not declared. Should it be static?
vim +129 arch/x86/kvm/svm/csv.c
127
128 static size_t g_mempool_offset;
> 129 void *g_trans_mempool[TRANS_MEMPOOL_BLOCK_NUM] = { 0, };
130
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
From: Hou Tao <houtao1(a)huawei.com>
mainline inclusion
from mainline-v6.13-rc1
commit 41748675c0bf252b3c5f600a95830f0936d366c1
category: bugfix
bugzilla: https://gitee.com/src-openeuler/kernel/issues/IBEAGA
CVE: CVE-2024-53219
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?…
--------------------------------
When trying to insert a 10MB kernel module kept in a virtio-fs with cache
disabled, the following warning was reported:
------------[ cut here ]------------
WARNING: CPU: 1 PID: 404 at mm/page_alloc.c:4551 ......
Modules linked in:
CPU: 1 PID: 404 Comm: insmod Not tainted 6.9.0-rc5+ #123
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996) ......
RIP: 0010:__alloc_pages+0x2bf/0x380
......
Call Trace:
<TASK>
? __warn+0x8e/0x150
? __alloc_pages+0x2bf/0x380
__kmalloc_large_node+0x86/0x160
__kmalloc+0x33c/0x480
virtio_fs_enqueue_req+0x240/0x6d0
virtio_fs_wake_pending_and_unlock+0x7f/0x190
queue_request_and_unlock+0x55/0x60
fuse_simple_request+0x152/0x2b0
fuse_direct_io+0x5d2/0x8c0
fuse_file_read_iter+0x121/0x160
__kernel_read+0x151/0x2d0
kernel_read+0x45/0x50
kernel_read_file+0x1a9/0x2a0
init_module_from_file+0x6a/0xe0
idempotent_init_module+0x175/0x230
__x64_sys_finit_module+0x5d/0xb0
x64_sys_call+0x1c3/0x9e0
do_syscall_64+0x3d/0xc0
entry_SYSCALL_64_after_hwframe+0x4b/0x53
......
</TASK>
---[ end trace 0000000000000000 ]---
The warning is triggered as follows:
1) syscall finit_module() handles the module insertion and it invokes
kernel_read_file() to read the content of the module first.
2) kernel_read_file() allocates a 10MB buffer by using vmalloc() and
passes it to kernel_read(). kernel_read() constructs a kvec iter by
using iov_iter_kvec() and passes it to fuse_file_read_iter().
3) virtio-fs disables the cache, so fuse_file_read_iter() invokes
fuse_direct_io(). As for now, the maximal read size for kvec iter is
only limited by fc->max_read. For virtio-fs, max_read is UINT_MAX, so
fuse_direct_io() doesn't split the 10MB buffer. It saves the address and
the size of the 10MB-sized buffer in out_args[0] of a fuse request and
passes the fuse request to virtio_fs_wake_pending_and_unlock().
4) virtio_fs_wake_pending_and_unlock() uses virtio_fs_enqueue_req() to
queue the request. Because virtiofs need DMA-able address, so
virtio_fs_enqueue_req() uses kmalloc() to allocate a bounce buffer for
all fuse args, copies these args into the bounce buffer and passed the
physical address of the bounce buffer to virtiofsd. The total length of
these fuse args for the passed fuse request is about 10MB, so
copy_args_to_argbuf() invokes kmalloc() with a 10MB size parameter and
it triggers the warning in __alloc_pages():
if (WARN_ON_ONCE_GFP(order > MAX_PAGE_ORDER, gfp))
return NULL;
5) virtio_fs_enqueue_req() will retry the memory allocation in a
kworker, but it won't help, because kmalloc() will always return NULL
due to the abnormal size and finit_module() will hang forever.
A feasible solution is to limit the value of max_read for virtio-fs, so
the length passed to kmalloc() will be limited. However it will affect
the maximal read size for normal read. And for virtio-fs write initiated
from kernel, it has the similar problem but now there is no way to limit
fc->max_write in kernel.
So instead of limiting both the values of max_read and max_write in
kernel, introducing use_pages_for_kvec_io in fuse_conn and setting it as
true in virtiofs. When use_pages_for_kvec_io is enabled, fuse will use
pages instead of pointer to pass the KVEC_IO data.
After switching to pages for KVEC_IO data, these pages will be used for
DMA through virtio-fs. If these pages are backed by vmalloc(),
{flush|invalidate}_kernel_vmap_range() are necessary to flush or
invalidate the cache before the DMA operation. So add two new fields in
fuse_args_pages to record the base address of vmalloc area and the
condition indicating whether invalidation is needed. Perform the flush
in fuse_get_user_pages() for write operations and the invalidation in
fuse_release_user_pages() for read operations.
It may seem necessary to introduce another field in fuse_conn to
indicate that these KVEC_IO pages are used for DMA, However, considering
that virtio-fs is currently the only user of use_pages_for_kvec_io, just
reuse use_pages_for_kvec_io to indicate that these pages will be used
for DMA.
Fixes: a62a8ef9d97d ("virtio-fs: add virtiofs filesystem")
Signed-off-by: Hou Tao <houtao1(a)huawei.com>
Tested-by: Jingbo Xu <jefflexu(a)linux.alibaba.com>
Signed-off-by: Miklos Szeredi <mszeredi(a)redhat.com>
Signed-off-by: Yifan Qiao <qiaoyifan4(a)huawei.com>
---
fs/fuse/fuse_i.h | 6 +++++
fs/fuse/file.c | 62 +++++++++++++++++++++++++++++++--------------
fs/fuse/virtio_fs.c | 1 +
3 files changed, 50 insertions(+), 19 deletions(-)
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 3d5734ed99cf..1bb136bcbe9e 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -286,9 +286,12 @@ struct fuse_args {
bool page_replace:1;
bool may_block:1;
bool is_ext:1;
+ bool invalidate_vmap:1;
struct fuse_in_arg in_args[3];
struct fuse_arg out_args[2];
void (*end)(struct fuse_mount *fm, struct fuse_args *args, int error);
+ /* Used for kvec iter backed by vmalloc address */
+ void *vmap_base;
};
struct fuse_args_pages {
@@ -823,6 +826,9 @@ struct fuse_conn {
/* Is statx not implemented by fs? */
unsigned int no_statx:1;
+ /* Use pages instead of pointer for kernel I/O */
+ unsigned int use_pages_for_kvec_io:1;
+
/** The number of requests waiting for completion */
atomic_t num_waiting;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index ceb9f7d23038..fca2be898336 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -625,7 +625,7 @@ void fuse_read_args_fill(struct fuse_io_args *ia, struct file *file, loff_t pos,
args->out_args[0].size = count;
}
-static void fuse_release_user_pages(struct fuse_args_pages *ap,
+static void fuse_release_user_pages(struct fuse_args_pages *ap, ssize_t nres,
bool should_dirty)
{
unsigned int i;
@@ -635,6 +635,9 @@ static void fuse_release_user_pages(struct fuse_args_pages *ap,
set_page_dirty_lock(ap->pages[i]);
put_page(ap->pages[i]);
}
+
+ if (nres > 0 && ap->args.invalidate_vmap)
+ invalidate_kernel_vmap_range(ap->args.vmap_base, nres);
}
static void fuse_io_release(struct kref *kref)
@@ -733,25 +736,29 @@ static void fuse_aio_complete_req(struct fuse_mount *fm, struct fuse_args *args,
struct fuse_io_args *ia = container_of(args, typeof(*ia), ap.args);
struct fuse_io_priv *io = ia->io;
ssize_t pos = -1;
-
- fuse_release_user_pages(&ia->ap, io->should_dirty);
+ size_t nres;
if (err) {
/* Nothing */
} else if (io->write) {
if (ia->write.out.size > ia->write.in.size) {
err = -EIO;
- } else if (ia->write.in.size != ia->write.out.size) {
- pos = ia->write.in.offset - io->offset +
- ia->write.out.size;
+ } else {
+ nres = ia->write.out.size;
+ if (ia->write.in.size != ia->write.out.size)
+ pos = ia->write.in.offset - io->offset +
+ ia->write.out.size;
}
} else {
u32 outsize = args->out_args[0].size;
+ nres = outsize;
if (ia->read.in.size != outsize)
pos = ia->read.in.offset - io->offset + outsize;
}
+ fuse_release_user_pages(&ia->ap, err ?: nres, io->should_dirty);
+
fuse_aio_complete(io, err, pos);
fuse_io_free(ia);
}
@@ -1368,24 +1375,37 @@ static inline size_t fuse_get_frag_size(const struct iov_iter *ii,
static int fuse_get_user_pages(struct fuse_args_pages *ap, struct iov_iter *ii,
size_t *nbytesp, int write,
- unsigned int max_pages)
+ unsigned int max_pages,
+ bool use_pages_for_kvec_io)
{
+ bool flush_or_invalidate = false;
size_t nbytes = 0; /* # bytes already packed in req */
ssize_t ret = 0;
- /* Special case for kernel I/O: can copy directly into the buffer */
+ /* Special case for kernel I/O: can copy directly into the buffer.
+ * However if the implementation of fuse_conn requires pages instead of
+ * pointer (e.g., virtio-fs), use iov_iter_extract_pages() instead.
+ */
if (iov_iter_is_kvec(ii)) {
- unsigned long user_addr = fuse_get_user_addr(ii);
- size_t frag_size = fuse_get_frag_size(ii, *nbytesp);
+ void *user_addr = (void *)fuse_get_user_addr(ii);
- if (write)
- ap->args.in_args[1].value = (void *) user_addr;
- else
- ap->args.out_args[0].value = (void *) user_addr;
+ if (!use_pages_for_kvec_io) {
+ size_t frag_size = fuse_get_frag_size(ii, *nbytesp);
- iov_iter_advance(ii, frag_size);
- *nbytesp = frag_size;
- return 0;
+ if (write)
+ ap->args.in_args[1].value = user_addr;
+ else
+ ap->args.out_args[0].value = user_addr;
+
+ iov_iter_advance(ii, frag_size);
+ *nbytesp = frag_size;
+ return 0;
+ }
+
+ if (is_vmalloc_addr(user_addr)) {
+ ap->args.vmap_base = user_addr;
+ flush_or_invalidate = true;
+ }
}
while (nbytes < *nbytesp && ap->num_pages < max_pages) {
@@ -1411,6 +1431,10 @@ static int fuse_get_user_pages(struct fuse_args_pages *ap, struct iov_iter *ii,
(PAGE_SIZE - ret) & (PAGE_SIZE - 1);
}
+ if (write && flush_or_invalidate)
+ flush_kernel_vmap_range(ap->args.vmap_base, nbytes);
+
+ ap->args.invalidate_vmap = !write && flush_or_invalidate;
ap->args.user_pages = true;
if (write)
ap->args.in_pages = true;
@@ -1478,7 +1502,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
size_t nbytes = min(count, nmax);
err = fuse_get_user_pages(&ia->ap, iter, &nbytes, write,
- max_pages);
+ max_pages, fc->use_pages_for_kvec_io);
if (err && !nbytes)
break;
@@ -1492,7 +1516,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
}
if (!io->async || nres < 0) {
- fuse_release_user_pages(&ia->ap, io->should_dirty);
+ fuse_release_user_pages(&ia->ap, nres, io->should_dirty);
fuse_io_free(ia);
}
ia = NULL;
diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
index d84dacbdce2c..5779c7ba1e3d 100644
--- a/fs/fuse/virtio_fs.c
+++ b/fs/fuse/virtio_fs.c
@@ -1458,6 +1458,7 @@ static int virtio_fs_get_tree(struct fs_context *fsc)
fc->delete_stale = true;
fc->auto_submounts = true;
fc->sync_fs = true;
+ fc->use_pages_for_kvec_io = true;
/* Tell FUSE to split requests that exceed the virtqueue's size */
fc->max_pages_limit = min_t(unsigned int, fc->max_pages_limit,
--
2.39.2
From: Pavel Begunkov <asml.silence(a)gmail.com>
stable inclusion
from stable-v6.6.68
commit 2ca94c8de36091067b9ce7527ae8db3812d38781
category: bugfix
bugzilla: https://gitee.com/src-openeuler/kernel/issues/IBEG43
CVE: CVE-2024-56709
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id…
--------------------------------
commit dbd2ca9367eb19bc5e269b8c58b0b1514ada9156 upstream.
task work can be executed after the task has gone through io_uring
termination, whether it's the final task_work run or the fallback path.
In this case, task work will find ->io_wq being already killed and
null'ed, which is a problem if it then tries to forward the request to
io_queue_iowq(). Make io_queue_iowq() fail requests in this case.
Note that it also checks PF_KTHREAD, because the user can first close
a DEFER_TASKRUN ring and shortly after kill the task, in which case
->iowq check would race.
Cc: stable(a)vger.kernel.org
Fixes: 50c52250e2d74 ("block: implement async io_uring discard cmd")
Fixes: 773af69121ecc ("io_uring: always reissue from task_work context")
Reported-by: Will <willsroot(a)protonmail.com>
Signed-off-by: Pavel Begunkov <asml.silence(a)gmail.com>
Link: https://lore.kernel.org/r/63312b4a2c2bb67ad67b857d17a300e1d3b078e8.17346379…
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Signed-off-by: Yifan Qiao <qiaoyifan4(a)huawei.com>
---
io_uring/io_uring.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 4d69fb4cf803..bb37e8f08ae5 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -1090,6 +1090,7 @@ static struct file *io_file_get(struct io_ring_ctx *ctx,
unsigned int issue_flags);
static void __io_queue_sqe(struct io_kiocb *req);
static void io_rsrc_put_work(struct work_struct *work);
+static void io_req_task_queue_fail(struct io_kiocb *req, int ret);
static void io_req_task_queue(struct io_kiocb *req);
static void io_submit_flush_completions(struct io_ring_ctx *ctx);
@@ -1459,7 +1460,11 @@ static void io_queue_async_work(struct io_kiocb *req, bool *locked)
locked = NULL;
BUG_ON(!tctx);
- BUG_ON(!tctx->io_wq);
+
+ if ((current->flags & PF_KTHREAD) || !tctx->io_wq) {
+ io_req_task_queue_fail(req, -ECANCELED);
+ return;
+ }
/* init ->work of the whole link before punting */
io_prep_async_link(req);
--
2.39.2
From: Hou Tao <houtao1(a)huawei.com>
mainline inclusion
from mainline-v6.13-rc1
commit 41748675c0bf252b3c5f600a95830f0936d366c1
category: bugfix
bugzilla: https://gitee.com/src-openeuler/kernel/issues/IBEAGA
CVE: CVE-2024-53219
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?…
--------------------------------
When trying to insert a 10MB kernel module kept in a virtio-fs with cache
disabled, the following warning was reported:
------------[ cut here ]------------
WARNING: CPU: 1 PID: 404 at mm/page_alloc.c:4551 ......
Modules linked in:
CPU: 1 PID: 404 Comm: insmod Not tainted 6.9.0-rc5+ #123
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996) ......
RIP: 0010:__alloc_pages+0x2bf/0x380
......
Call Trace:
<TASK>
? __warn+0x8e/0x150
? __alloc_pages+0x2bf/0x380
__kmalloc_large_node+0x86/0x160
__kmalloc+0x33c/0x480
virtio_fs_enqueue_req+0x240/0x6d0
virtio_fs_wake_pending_and_unlock+0x7f/0x190
queue_request_and_unlock+0x55/0x60
fuse_simple_request+0x152/0x2b0
fuse_direct_io+0x5d2/0x8c0
fuse_file_read_iter+0x121/0x160
__kernel_read+0x151/0x2d0
kernel_read+0x45/0x50
kernel_read_file+0x1a9/0x2a0
init_module_from_file+0x6a/0xe0
idempotent_init_module+0x175/0x230
__x64_sys_finit_module+0x5d/0xb0
x64_sys_call+0x1c3/0x9e0
do_syscall_64+0x3d/0xc0
entry_SYSCALL_64_after_hwframe+0x4b/0x53
......
</TASK>
---[ end trace 0000000000000000 ]---
The warning is triggered as follows:
1) syscall finit_module() handles the module insertion and it invokes
kernel_read_file() to read the content of the module first.
2) kernel_read_file() allocates a 10MB buffer by using vmalloc() and
passes it to kernel_read(). kernel_read() constructs a kvec iter by
using iov_iter_kvec() and passes it to fuse_file_read_iter().
3) virtio-fs disables the cache, so fuse_file_read_iter() invokes
fuse_direct_io(). As for now, the maximal read size for kvec iter is
only limited by fc->max_read. For virtio-fs, max_read is UINT_MAX, so
fuse_direct_io() doesn't split the 10MB buffer. It saves the address and
the size of the 10MB-sized buffer in out_args[0] of a fuse request and
passes the fuse request to virtio_fs_wake_pending_and_unlock().
4) virtio_fs_wake_pending_and_unlock() uses virtio_fs_enqueue_req() to
queue the request. Because virtiofs need DMA-able address, so
virtio_fs_enqueue_req() uses kmalloc() to allocate a bounce buffer for
all fuse args, copies these args into the bounce buffer and passed the
physical address of the bounce buffer to virtiofsd. The total length of
these fuse args for the passed fuse request is about 10MB, so
copy_args_to_argbuf() invokes kmalloc() with a 10MB size parameter and
it triggers the warning in __alloc_pages():
if (WARN_ON_ONCE_GFP(order > MAX_PAGE_ORDER, gfp))
return NULL;
5) virtio_fs_enqueue_req() will retry the memory allocation in a
kworker, but it won't help, because kmalloc() will always return NULL
due to the abnormal size and finit_module() will hang forever.
A feasible solution is to limit the value of max_read for virtio-fs, so
the length passed to kmalloc() will be limited. However it will affect
the maximal read size for normal read. And for virtio-fs write initiated
from kernel, it has the similar problem but now there is no way to limit
fc->max_write in kernel.
So instead of limiting both the values of max_read and max_write in
kernel, introducing use_pages_for_kvec_io in fuse_conn and setting it as
true in virtiofs. When use_pages_for_kvec_io is enabled, fuse will use
pages instead of pointer to pass the KVEC_IO data.
After switching to pages for KVEC_IO data, these pages will be used for
DMA through virtio-fs. If these pages are backed by vmalloc(),
{flush|invalidate}_kernel_vmap_range() are necessary to flush or
invalidate the cache before the DMA operation. So add two new fields in
fuse_args_pages to record the base address of vmalloc area and the
condition indicating whether invalidation is needed. Perform the flush
in fuse_get_user_pages() for write operations and the invalidation in
fuse_release_user_pages() for read operations.
It may seem necessary to introduce another field in fuse_conn to
indicate that these KVEC_IO pages are used for DMA, However, considering
that virtio-fs is currently the only user of use_pages_for_kvec_io, just
reuse use_pages_for_kvec_io to indicate that these pages will be used
for DMA.
Fixes: a62a8ef9d97d ("virtio-fs: add virtiofs filesystem")
Signed-off-by: Hou Tao <houtao1(a)huawei.com>
Tested-by: Jingbo Xu <jefflexu(a)linux.alibaba.com>
Signed-off-by: Miklos Szeredi <mszeredi(a)redhat.com>
Signed-off-by: Yifan Qiao <qiaoyifan4(a)huawei.com>
---
fs/fuse/fuse_i.h | 6 +++++
fs/fuse/file.c | 62 +++++++++++++++++++++++++++++++--------------
fs/fuse/virtio_fs.c | 1 +
3 files changed, 50 insertions(+), 19 deletions(-)
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 029f8e382c97..4ad05b0c72b7 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -273,9 +273,12 @@ struct fuse_args {
#ifndef __GENKSYMS__
bool user_pages:1;
#endif
+ bool invalidate_vmap:1;
struct fuse_in_arg in_args[3];
struct fuse_arg out_args[2];
void (*end)(struct fuse_mount *fm, struct fuse_args *args, int error);
+ /* Used for kvec iter backed by vmalloc address */
+ void *vmap_base;
};
struct fuse_args_pages {
@@ -763,6 +766,9 @@ struct fuse_conn {
/* Auto-mount submounts announced by the server */
unsigned int auto_submounts:1;
+ /* Use pages instead of pointer for kernel I/O */
+ unsigned int use_pages_for_kvec_io:1;
+
/** The number of requests waiting for completion */
atomic_t num_waiting;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 86f2e1c2ad0f..da6d8e241e8f 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -610,7 +610,7 @@ void fuse_read_args_fill(struct fuse_io_args *ia, struct file *file, loff_t pos,
args->out_args[0].size = count;
}
-static void fuse_release_user_pages(struct fuse_args_pages *ap,
+static void fuse_release_user_pages(struct fuse_args_pages *ap, ssize_t nres,
bool should_dirty)
{
unsigned int i;
@@ -620,6 +620,9 @@ static void fuse_release_user_pages(struct fuse_args_pages *ap,
set_page_dirty_lock(ap->pages[i]);
put_page(ap->pages[i]);
}
+
+ if (nres > 0 && ap->args.invalidate_vmap)
+ invalidate_kernel_vmap_range(ap->args.vmap_base, nres);
}
static void fuse_io_release(struct kref *kref)
@@ -718,25 +721,29 @@ static void fuse_aio_complete_req(struct fuse_mount *fm, struct fuse_args *args,
struct fuse_io_args *ia = container_of(args, typeof(*ia), ap.args);
struct fuse_io_priv *io = ia->io;
ssize_t pos = -1;
-
- fuse_release_user_pages(&ia->ap, io->should_dirty);
+ size_t nres;
if (err) {
/* Nothing */
} else if (io->write) {
if (ia->write.out.size > ia->write.in.size) {
err = -EIO;
- } else if (ia->write.in.size != ia->write.out.size) {
- pos = ia->write.in.offset - io->offset +
- ia->write.out.size;
+ } else {
+ nres = ia->write.out.size;
+ if (ia->write.in.size != ia->write.out.size)
+ pos = ia->write.in.offset - io->offset +
+ ia->write.out.size;
}
} else {
u32 outsize = args->out_args[0].size;
+ nres = outsize;
if (ia->read.in.size != outsize)
pos = ia->read.in.offset - io->offset + outsize;
}
+ fuse_release_user_pages(&ia->ap, err ?: nres, io->should_dirty);
+
fuse_aio_complete(io, err, pos);
fuse_io_free(ia);
}
@@ -1389,24 +1396,37 @@ static inline size_t fuse_get_frag_size(const struct iov_iter *ii,
static int fuse_get_user_pages(struct fuse_args_pages *ap, struct iov_iter *ii,
size_t *nbytesp, int write,
- unsigned int max_pages)
+ unsigned int max_pages,
+ bool use_pages_for_kvec_io)
{
+ bool flush_or_invalidate = false;
size_t nbytes = 0; /* # bytes already packed in req */
ssize_t ret = 0;
- /* Special case for kernel I/O: can copy directly into the buffer */
+ /* Special case for kernel I/O: can copy directly into the buffer.
+ * However if the implementation of fuse_conn requires pages instead of
+ * pointer (e.g., virtio-fs), use iov_iter_extract_pages() instead.
+ */
if (iov_iter_is_kvec(ii)) {
- unsigned long user_addr = fuse_get_user_addr(ii);
- size_t frag_size = fuse_get_frag_size(ii, *nbytesp);
+ void *user_addr = (void *)fuse_get_user_addr(ii);
- if (write)
- ap->args.in_args[1].value = (void *) user_addr;
- else
- ap->args.out_args[0].value = (void *) user_addr;
+ if (!use_pages_for_kvec_io) {
+ size_t frag_size = fuse_get_frag_size(ii, *nbytesp);
- iov_iter_advance(ii, frag_size);
- *nbytesp = frag_size;
- return 0;
+ if (write)
+ ap->args.in_args[1].value = user_addr;
+ else
+ ap->args.out_args[0].value = user_addr;
+
+ iov_iter_advance(ii, frag_size);
+ *nbytesp = frag_size;
+ return 0;
+ }
+
+ if (is_vmalloc_addr(user_addr)) {
+ ap->args.vmap_base = user_addr;
+ flush_or_invalidate = true;
+ }
}
while (nbytes < *nbytesp && ap->num_pages < max_pages) {
@@ -1433,6 +1453,10 @@ static int fuse_get_user_pages(struct fuse_args_pages *ap, struct iov_iter *ii,
(PAGE_SIZE - ret) & (PAGE_SIZE - 1);
}
+ if (write && flush_or_invalidate)
+ flush_kernel_vmap_range(ap->args.vmap_base, nbytes);
+
+ ap->args.invalidate_vmap = !write && flush_or_invalidate;
ap->args.user_pages = true;
if (write)
ap->args.in_pages = true;
@@ -1484,7 +1508,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
size_t nbytes = min(count, nmax);
err = fuse_get_user_pages(&ia->ap, iter, &nbytes, write,
- max_pages);
+ max_pages, fc->use_pages_for_kvec_io);
if (err && !nbytes)
break;
@@ -1498,7 +1522,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
}
if (!io->async || nres < 0) {
- fuse_release_user_pages(&ia->ap, io->should_dirty);
+ fuse_release_user_pages(&ia->ap, nres, io->should_dirty);
fuse_io_free(ia);
}
ia = NULL;
diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
index 77338f139cb2..da5e1d1826ff 100644
--- a/fs/fuse/virtio_fs.c
+++ b/fs/fuse/virtio_fs.c
@@ -1461,6 +1461,7 @@ static int virtio_fs_get_tree(struct fs_context *fsc)
fc->release = fuse_free_conn;
fc->delete_stale = true;
fc->auto_submounts = true;
+ fc->use_pages_for_kvec_io = true;
fsc->s_fs_info = fm;
sb = sget_fc(fsc, virtio_fs_test_super, virtio_fs_set_super);
--
2.39.2