tree: https://gitee.com/openeuler/kernel.git openEuler-1.0-LTS
head: 765b132216f73bb52fced28360c98a386ee20152
commit: 41298197ead9e85ee2ec1d52122f03fd1863cff2 [1343/1343] ext4: convert BUG_ON's to WARN_ON's in mballoc.c
config: x86_64-randconfig-101-20241223 (https://download.01.org/0day-ci/archive/20241224/202412240236.czAsLPLH-lkp@…)
compiler: clang version 19.1.3 (https://github.com/llvm/llvm-project ab51eccf88f5321e7c60591c5546b254b6afab99)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20241224/202412240236.czAsLPLH-lkp@…)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp(a)intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202412240236.czAsLPLH-lkp@intel.com/
All warnings (new ones prefixed by >>):
In file included from fs/ext4/mballoc.c:12:
In file included from fs/ext4/ext4_jbd2.h:16:
In file included from include/linux/jbd2.h:26:
In file included from include/linux/buffer_head.h:14:
include/linux/pagemap.h:425:21: warning: cast from 'int (*)(struct file *, struct page *)' to 'filler_t *' (aka 'int (*)(void *, struct page *)') converts to incompatible function type [-Wcast-function-type-strict]
425 | filler_t *filler = (filler_t *)mapping->a_ops->readpage;
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1 warning generated.
fs/ext4/mballoc.c:4734: warning: Function parameter or member 'bh' not described in 'ext4_free_blocks'
>> fs/ext4/mballoc.o: warning: objtool: ext4_mb_complex_scan_group()+0x11a4: unreachable instruction
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
From: Philipp Stanner <pstanner(a)redhat.com>
mainline inclusion
from mainline-v6.6.63
commit d372dd09cfbf1324f54cbffd81fcaf6cdf3e608e
category: bugfix
bugzilla: https://gitee.com/src-openeuler/kernel/issues/IB956Q
CVE: CVE-2024-53126
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id…
--------------------------------
commit 0b364cf53b20204e92bac7c6ebd1ee7d3ec62931 upstream.
In psnet_open_pf_bar() and snet_open_vf_bar() a string later passed to
pcim_iomap_regions() is placed on the stack. Neither
pcim_iomap_regions() nor the functions it calls copy that string.
Should the string later ever be used, this, consequently, causes
undefined behavior since the stack frame will by then have disappeared.
Fix the bug by allocating the strings on the heap through
devm_kasprintf().
Cc: stable(a)vger.kernel.org # v6.3
Fixes: 51a8f9d7f587 ("virtio: vdpa: new SolidNET DPU driver.")
Reported-by: Christophe JAILLET <christophe.jaillet(a)wanadoo.fr>
Closes: https://lore.kernel.org/all/74e9109a-ac59-49e2-9b1d-d825c9c9f891@wanadoo.fr/
Suggested-by: Andy Shevchenko <andy(a)kernel.org>
Signed-off-by: Philipp Stanner <pstanner(a)redhat.com>
Reviewed-by: Stefano Garzarella <sgarzare(a)redhat.com>
Message-Id: <20241028074357.9104-3-pstanner(a)redhat.com>
Signed-off-by: Michael S. Tsirkin <mst(a)redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Signed-off-by: Rui Xiang <rui.xiang(a)huawei.com>
Reviewed-by: Weilong Chen <chenweilong(a)huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13(a)huawei.com>
---
drivers/vdpa/solidrun/snet_main.c | 14 ++++++++++----
1 file changed, 10 insertions(+), 4 deletions(-)
diff --git a/drivers/vdpa/solidrun/snet_main.c b/drivers/vdpa/solidrun/snet_main.c
index c0ddeb9ded11..6acf09461689 100644
--- a/drivers/vdpa/solidrun/snet_main.c
+++ b/drivers/vdpa/solidrun/snet_main.c
@@ -555,7 +555,7 @@ static const struct vdpa_config_ops snet_config_ops = {
static int psnet_open_pf_bar(struct pci_dev *pdev, struct psnet *psnet)
{
- char name[50];
+ char *name;
int ret, i, mask = 0;
/* We don't know which BAR will be used to communicate..
* We will map every bar with len > 0.
@@ -573,7 +573,10 @@ static int psnet_open_pf_bar(struct pci_dev *pdev, struct psnet *psnet)
return -ENODEV;
}
- snprintf(name, sizeof(name), "psnet[%s]-bars", pci_name(pdev));
+ name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "psnet[%s]-bars", pci_name(pdev));
+ if (!name)
+ return -ENOMEM;
+
ret = pcim_iomap_regions(pdev, mask, name);
if (ret) {
SNET_ERR(pdev, "Failed to request and map PCI BARs\n");
@@ -590,10 +593,13 @@ static int psnet_open_pf_bar(struct pci_dev *pdev, struct psnet *psnet)
static int snet_open_vf_bar(struct pci_dev *pdev, struct snet *snet)
{
- char name[50];
+ char *name;
int ret;
- snprintf(name, sizeof(name), "snet[%s]-bar", pci_name(pdev));
+ name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "snet[%s]-bars", pci_name(pdev));
+ if (!name)
+ return -ENOMEM;
+
/* Request and map BAR */
ret = pcim_iomap_regions(pdev, BIT(snet->psnet->cfg.vf_bar), name);
if (ret) {
--
2.34.1
hulk inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/IB8UVJ
CVE: NA
--------------------------------
During concurrent append writes to XFS filesystem, zero padding data
may appear in the file after power failure. This happens due to imprecise
disk size updates when handling write completion.
Consider this scenario with concurrent append writes same file:
Thread 1: Thread 2:
------------ -----------
write [A, A+B]
update inode size to A+B
submit I/O [A, A+BS]
write [A+B, A+B+C]
update inode size to A+B+C
<I/O completes, updates disk size to min(A+B+C, A+BS)>
<power failure>
After reboot:
1) with A+B+C < A+BS, the file has zero padding in range [A+B, A+B+C]
|< Block Size (BS) >|
|DDDDDDDDDDDDDDDD0000000000000000|
^ ^ ^
A A+B A+B+C
(EOF)
2) with A+B+C > A+BS, the file has zero padding in range [A+B, A+BS]
|< Block Size (BS) >|< Block Size (BS) >|
|DDDDDDDDDDDDDDDD0000000000000000|00000000000000000000000000000000|
^ ^ ^ ^
A A+B A+BS A+B+C
(EOF)
D = Valid Data
0 = Zero Padding
The issue stems from disk size being set to min(io_offset + io_size,
inode->i_size) at I/O completion. Since io_offset+io_size is block
size granularity, it may exceed the actual valid file data size. In
the case of concurrent append writes, inode->i_size may be larger
than the actual range of valid file data written to disk, leading to
inaccurate disk size updates.
This patch modifies the meaning of io_size to represent the size of
valid data within EOF in an ioend. If the ioend spans beyond i_size,
io_size will be trimmed to provide the file with more accurate size
information. This is particularly useful for on-disk size updates
at completion time.
After this change, ioends that span i_size will not grow or merge with
other ioends in concurrent scenarios. However, these cases that need
growth/merging rarely occur and it seems no noticeable performance impact.
Although rounding up io_size could enable ioend growth/merging in these
scenarios, we decided to keep the code simple after discussion [1].
Another benefit is that it makes the xfs_ioend_is_append() check more
accurate, which can reduce unnecessary end bio callbacks of xfs_end_bio()
in certain scenarios, such as repeated writes at the file tail without
extending the file size.
Fixes: ae259a9c8593 ("fs: introduce iomap infrastructure") # goes further back than this
Link [1]: https://patchwork.kernel.org/project/xfs/patch/20241113091907.56937-1-leo.l…
Signed-off-by: Long Li <leo.lilong(a)huawei.com>
---
fs/iomap/buffered-io.c | 48 ++++++++++++++++++++++++++++++++++++++++++
include/linux/iomap.h | 2 +-
2 files changed, 49 insertions(+), 1 deletion(-)
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 33d882c2ad4d..ef8a8345ca9a 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -1846,6 +1846,7 @@ static int iomap_add_to_ioend(struct iomap_writepage_ctx *wpc,
{
struct iomap_folio_state *ifs = folio->private;
size_t poff = offset_in_folio(folio, pos);
+ loff_t isize = i_size_read(inode);
int error;
if (!wpc->ioend || !iomap_can_add_to_ioend(wpc, pos)) {
@@ -1861,7 +1862,54 @@ static int iomap_add_to_ioend(struct iomap_writepage_ctx *wpc,
if (ifs)
atomic_add(len, &ifs->write_bytes_pending);
+
+ /*
+ * Clamp io_offset and io_size to the incore EOF so that ondisk
+ * file size updates in the ioend completion are byte-accurate.
+ * This avoids recovering files with zeroed tail regions when
+ * writeback races with appending writes:
+ *
+ * Thread 1: Thread 2:
+ * ------------ -----------
+ * write [A, A+B]
+ * update inode size to A+B
+ * submit I/O [A, A+BS]
+ * write [A+B, A+B+C]
+ * update inode size to A+B+C
+ * <I/O completes, updates disk size to min(A+B+C, A+BS)>
+ * <power failure>
+ *
+ * After reboot:
+ * 1) with A+B+C < A+BS, the file has zero padding in range
+ * [A+B, A+B+C]
+ *
+ * |< Block Size (BS) >|
+ * |DDDDDDDDDDDD0000000000000|
+ * ^ ^ ^
+ * A A+B A+B+C
+ * (EOF)
+ *
+ * 2) with A+B+C > A+BS, the file has zero padding in range
+ * [A+B, A+BS]
+ *
+ * |< Block Size (BS) >|< Block Size (BS) >|
+ * |DDDDDDDDDDDD0000000000000|00000000000000000000000000|
+ * ^ ^ ^ ^
+ * A A+B A+BS A+B+C
+ * (EOF)
+ *
+ * D = Valid Data
+ * 0 = Zero Padding
+ *
+ * Note that this defeats the ability to chain the ioends of
+ * appending writes. Writeback beyond EOF block may occur in
+ * concurrent scenarios(e.g. racing with truncate) and io_size
+ * should not be trimmed in such cases.
+ */
wpc->ioend->io_size += len;
+ if (pos < isize && pos + len > isize)
+ wpc->ioend->io_size = isize - wpc->ioend->io_offset;
+
wbc_account_cgroup_owner(wbc, &folio->page, len);
return 0;
}
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index fd584156253f..6bd7ed98cf1f 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -318,7 +318,7 @@ struct iomap_ioend {
u16 io_type;
u16 io_flags; /* IOMAP_F_* */
struct inode *io_inode; /* file being written to */
- size_t io_size; /* size of the extent */
+ size_t io_size; /* size of data within eof */
loff_t io_offset; /* offset in the file */
sector_t io_sector; /* start sector of ioend */
struct bio io_bio; /* MUST BE LAST! */
--
2.39.2
hulk inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/IB3O3K
CVE: NA
--------------------------------
During concurrent append writes to XFS filesystem, zero padding data
may appear in the file after power failure. This happens due to imprecise
disk size updates when handling write completion.
Consider this scenario with concurrent append writes same file:
Thread 1: Thread 2:
------------ -----------
write [A, A+B]
update inode size to A+B
submit I/O [A, A+BS]
write [A+B, A+B+C]
update inode size to A+B+C
<I/O completes, updates disk size to min(A+B+C, A+BS)>
<power failure>
After reboot:
1) with A+B+C < A+BS, the file has zero padding in range [A+B, A+B+C]
|< Block Size (BS) >|
|DDDDDDDDDDDDDDDD0000000000000000|
^ ^ ^
A A+B A+B+C
(EOF)
2) with A+B+C > A+BS, the file has zero padding in range [A+B, A+BS]
|< Block Size (BS) >|< Block Size (BS) >|
|DDDDDDDDDDDDDDDD0000000000000000|00000000000000000000000000000000|
^ ^ ^ ^
A A+B A+BS A+B+C
(EOF)
D = Valid Data
0 = Zero Padding
The issue stems from disk size being set to min(io_offset + io_size,
inode->i_size) at I/O completion. Since io_offset+io_size is block
size granularity, it may exceed the actual valid file data size. In
the case of concurrent append writes, inode->i_size may be larger
than the actual range of valid file data written to disk, leading to
inaccurate disk size updates.
This patch modifies the meaning of io_size to represent the size of
valid data within EOF in an ioend. If the ioend spans beyond i_size,
io_size will be trimmed to provide the file with more accurate size
information. This is particularly useful for on-disk size updates
at completion time.
After this change, ioends that span i_size will not grow or merge with
other ioends in concurrent scenarios. However, these cases that need
growth/merging rarely occur and it seems no noticeable performance impact.
Although rounding up io_size could enable ioend growth/merging in these
scenarios, we decided to keep the code simple after discussion [1].
Another benefit is that it makes the xfs_ioend_is_append() check more
accurate, which can reduce unnecessary end bio callbacks of xfs_end_bio()
in certain scenarios, such as repeated writes at the file tail without
extending the file size.
Fixes: ae259a9c8593 ("fs: introduce iomap infrastructure") # goes further back than this
Link [1]: https://patchwork.kernel.org/project/xfs/patch/20241113091907.56937-1-leo.l…
Signed-off-by: Long Li <leo.lilong(a)huawei.com>
---
fs/iomap/buffered-io.c | 47 ++++++++++++++++++++++++++++++++++++++++++
include/linux/iomap.h | 2 +-
2 files changed, 48 insertions(+), 1 deletion(-)
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 45471ee7e919..e07c2d334fcb 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -1409,6 +1409,7 @@ iomap_add_to_ioend(struct inode *inode, loff_t offset, struct page *page,
unsigned len = i_blocksize(inode);
unsigned poff = offset & (PAGE_SIZE - 1);
bool merged, same_page = false;
+ loff_t isize = i_size_read(inode);
if (!wpc->ioend || !iomap_can_add_to_ioend(wpc, offset, sector)) {
if (wpc->ioend)
@@ -1429,7 +1430,53 @@ iomap_add_to_ioend(struct inode *inode, loff_t offset, struct page *page,
bio_add_page(wpc->ioend->io_bio, page, len, poff);
}
+ /*
+ * Clamp io_offset and io_size to the incore EOF so that ondisk
+ * file size updates in the ioend completion are byte-accurate.
+ * This avoids recovering files with zeroed tail regions when
+ * writeback races with appending writes:
+ *
+ * Thread 1: Thread 2:
+ * ------------ -----------
+ * write [A, A+B]
+ * update inode size to A+B
+ * submit I/O [A, A+BS]
+ * write [A+B, A+B+C]
+ * update inode size to A+B+C
+ * <I/O completes, updates disk size to min(A+B+C, A+BS)>
+ * <power failure>
+ *
+ * After reboot:
+ * 1) with A+B+C < A+BS, the file has zero padding in range
+ * [A+B, A+B+C]
+ *
+ * |< Block Size (BS) >|
+ * |DDDDDDDDDDDD0000000000000|
+ * ^ ^ ^
+ * A A+B A+B+C
+ * (EOF)
+ *
+ * 2) with A+B+C > A+BS, the file has zero padding in range
+ * [A+B, A+BS]
+ *
+ * |< Block Size (BS) >|< Block Size (BS) >|
+ * |DDDDDDDDDDDD0000000000000|00000000000000000000000000|
+ * ^ ^ ^ ^
+ * A A+B A+BS A+B+C
+ * (EOF)
+ *
+ * D = Valid Data
+ * 0 = Zero Padding
+ *
+ * Note that this defeats the ability to chain the ioends of
+ * appending writes. Writeback beyond EOF block may occur in
+ * concurrent scenarios(e.g. racing with truncate) and io_size
+ * should not be trimmed in such cases.
+ */
wpc->ioend->io_size += len;
+ if (offset < isize && offset + len > isize)
+ wpc->ioend->io_size = isize - wpc->ioend->io_offset;
+
wbc_account_cgroup_owner(wbc, page, len);
}
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 0965d5f12858..789cae57a27f 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -228,7 +228,7 @@ struct iomap_ioend {
u16 io_flags; /* IOMAP_F_* */
u32 io_folios; /* folios added to ioend */
struct inode *io_inode; /* file being written to */
- size_t io_size; /* size of the extent */
+ size_t io_size; /* size of data within eof */
loff_t io_offset; /* offset in the file */
void *io_private; /* file system private data */
sector_t io_sector; /* start sector of ioend */
--
2.39.2
hulk inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/IB3O3K
CVE: NA
--------------------------------
During concurrent append writes to XFS filesystem, zero padding data
may appear in the file after power failure. This happens due to imprecise
disk size updates when handling write completion.
Consider this scenario with concurrent append writes same file:
Thread 1: Thread 2:
------------ -----------
write [A, A+B]
update inode size to A+B
submit I/O [A, A+BS]
write [A+B, A+B+C]
update inode size to A+B+C
<I/O completes, updates disk size to min(A+B+C, A+BS)>
<power failure>
After reboot:
1) with A+B+C < A+BS, the file has zero padding in range [A+B, A+B+C]
|< Block Size (BS) >|
|DDDDDDDDDDDDDDDD0000000000000000|
^ ^ ^
A A+B A+B+C
(EOF)
2) with A+B+C > A+BS, the file has zero padding in range [A+B, A+BS]
|< Block Size (BS) >|< Block Size (BS) >|
|DDDDDDDDDDDDDDDD0000000000000000|00000000000000000000000000000000|
^ ^ ^ ^
A A+B A+BS A+B+C
(EOF)
D = Valid Data
0 = Zero Padding
The issue stems from disk size being set to min(io_offset + io_size,
inode->i_size) at I/O completion. Since io_offset+io_size is block
size granularity, it may exceed the actual valid file data size. In
the case of concurrent append writes, inode->i_size may be larger
than the actual range of valid file data written to disk, leading to
inaccurate disk size updates.
This patch modifies the meaning of io_size to represent the size of
valid data within EOF in an ioend. If the ioend spans beyond i_size,
io_size will be trimmed to provide the file with more accurate size
information. This is particularly useful for on-disk size updates
at completion time.
After this change, ioends that span i_size will not grow or merge with
other ioends in concurrent scenarios. However, these cases that need
growth/merging rarely occur and it seems no noticeable performance impact.
Although rounding up io_size could enable ioend growth/merging in these
scenarios, we decided to keep the code simple after discussion [1].
Another benefit is that it makes the xfs_ioend_is_append() check more
accurate, which can reduce unnecessary end bio callbacks of xfs_end_bio()
in certain scenarios, such as repeated writes at the file tail without
extending the file size.
Fixes: ae259a9c8593 ("fs: introduce iomap infrastructure") # goes further back than this
Link [1]: https://patchwork.kernel.org/project/xfs/patch/20241113091907.56937-1-leo.l…
Signed-off-by: Long Li <leo.lilong(a)huawei.com>
---
fs/iomap/buffered-io.c | 47 ++++++++++++++++++++++++++++++++++++++++++
include/linux/iomap.h | 2 +-
2 files changed, 48 insertions(+), 1 deletion(-)
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 0bb3257cba42..95e787f9e694 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -1409,6 +1409,7 @@ iomap_add_to_ioend(struct inode *inode, loff_t offset, struct page *page,
unsigned len = i_blocksize(inode);
unsigned poff = offset & (PAGE_SIZE - 1);
bool merged, same_page = false;
+ loff_t isize = i_size_read(inode);
if (!wpc->ioend || !iomap_can_add_to_ioend(wpc, offset, sector)) {
if (wpc->ioend)
@@ -1429,7 +1430,53 @@ iomap_add_to_ioend(struct inode *inode, loff_t offset, struct page *page,
bio_add_page(wpc->ioend->io_bio, page, len, poff);
}
+ /*
+ * Clamp io_offset and io_size to the incore EOF so that ondisk
+ * file size updates in the ioend completion are byte-accurate.
+ * This avoids recovering files with zeroed tail regions when
+ * writeback races with appending writes:
+ *
+ * Thread 1: Thread 2:
+ * ------------ -----------
+ * write [A, A+B]
+ * update inode size to A+B
+ * submit I/O [A, A+BS]
+ * write [A+B, A+B+C]
+ * update inode size to A+B+C
+ * <I/O completes, updates disk size to min(A+B+C, A+BS)>
+ * <power failure>
+ *
+ * After reboot:
+ * 1) with A+B+C < A+BS, the file has zero padding in range
+ * [A+B, A+B+C]
+ *
+ * |< Block Size (BS) >|
+ * |DDDDDDDDDDDD0000000000000|
+ * ^ ^ ^
+ * A A+B A+B+C
+ * (EOF)
+ *
+ * 2) with A+B+C > A+BS, the file has zero padding in range
+ * [A+B, A+BS]
+ *
+ * |< Block Size (BS) >|< Block Size (BS) >|
+ * |DDDDDDDDDDDD0000000000000|00000000000000000000000000|
+ * ^ ^ ^ ^
+ * A A+B A+BS A+B+C
+ * (EOF)
+ *
+ * D = Valid Data
+ * 0 = Zero Padding
+ *
+ * Note that this defeats the ability to chain the ioends of
+ * appending writes. Writeback beyond EOF block may occur in
+ * concurrent scenarios(e.g. racing with truncate) and io_size
+ * should not be trimmed in such cases.
+ */
wpc->ioend->io_size += len;
+ if (offset < isize && offset + len > isize)
+ wpc->ioend->io_size = isize - wpc->ioend->io_offset;
+
wbc_account_cgroup_owner(wbc, page, len);
}
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 1b6e22741d43..ff3473c134b3 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -229,7 +229,7 @@ struct iomap_ioend {
u16 io_flags; /* IOMAP_F_* */
u32 io_folios; /* folios added to ioend */
struct inode *io_inode; /* file being written to */
- size_t io_size; /* size of the extent */
+ size_t io_size; /* size of data within eof */
loff_t io_offset; /* offset in the file */
void *io_private; /* file system private data */
sector_t io_sector; /* start sector of ioend */
--
2.39.2