- Kernel - mailweb.openeuler.org

[PATCH OLK-6.6] f2fs: fix NULL pointer dereference in f2fs_submit_page_write()
by Yifan Qiao 22 Apr '24

22 Apr '24

From: Wenjie Qi <qwjhust(a)gmail.com> mainline inclusion from mainline-v6.9-rc1 commit c2034ef6192a65a986a45c2aa2ed05824fdc0e9f category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/I9HKBD CVE: CVE-2024-26871 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?… -------------------------------- BUG: kernel NULL pointer dereference, address: 0000000000000014 RIP: 0010:f2fs_submit_page_write+0x6cf/0x780 [f2fs] Call Trace: <TASK> ? show_regs+0x6e/0x80 ? __die+0x29/0x70 ? page_fault_oops+0x154/0x4a0 ? prb_read_valid+0x20/0x30 ? __irq_work_queue_local+0x39/0xd0 ? irq_work_queue+0x36/0x70 ? do_user_addr_fault+0x314/0x6c0 ? exc_page_fault+0x7d/0x190 ? asm_exc_page_fault+0x2b/0x30 ? f2fs_submit_page_write+0x6cf/0x780 [f2fs] ? f2fs_submit_page_write+0x736/0x780 [f2fs] do_write_page+0x50/0x170 [f2fs] f2fs_outplace_write_data+0x61/0xb0 [f2fs] f2fs_do_write_data_page+0x3f8/0x660 [f2fs] f2fs_write_single_data_page+0x5bb/0x7a0 [f2fs] f2fs_write_cache_pages+0x3da/0xbe0 [f2fs] ... It is possible that other threads have added this fio to io->bio and submitted the io->bio before entering f2fs_submit_page_write(). At this point io->bio = NULL. If is_end_zone_blkaddr(sbi, fio->new_blkaddr) of this fio is true, then an NULL pointer dereference error occurs at bio_get(io->bio). The original code for determining zone end was after "out:", which would have missed some fio who is zone end. I've moved this code before "skip:" to make sure it's done for each fio. Fixes: e067dc3c6b9c ("f2fs: maintain six open zones for zoned devices") Signed-off-by: Wenjie Qi <qwjhust(a)gmail.com> Reviewed-by: Chao Yu <chao(a)kernel.org> Signed-off-by: Jaegeuk Kim <jaegeuk(a)kernel.org> Signed-off-by: Yifan Qiao <qiaoyifan4(a)huawei.com> --- fs/f2fs/data.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index f5f33926acf8..8ab19498dcd7 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1080,10 +1080,6 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio) io->last_block_in_bio = fio->new_blkaddr; trace_f2fs_submit_page_write(fio->page, fio); -skip: - if (fio->in_list) - goto next; -out: #ifdef CONFIG_BLK_DEV_ZONED if (f2fs_sb_has_blkzoned(sbi) && btype < META && is_end_zone_blkaddr(sbi, fio->new_blkaddr)) { @@ -1096,6 +1092,10 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio) __submit_merged_bio(io); } #endif +skip: + if (fio->in_list) + goto next; +out: if (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) || !f2fs_is_checkpoint_ready(sbi)) __submit_merged_bio(io); -- 2.39.2

2 1

[openeuler:OLK-6.6 2431/7479] include/linux/fortify-string.h:57:33: warning: '__builtin_memcpy' reading between 65 and 536870912 bytes from a region of size 64
by kernel test robot 22 Apr '24

22 Apr '24

tree: https://gitee.com/openeuler/kernel.git OLK-6.6 head: e95a0071898084171841e9f3695a02fc5f58fd0f commit: f04c0f3eb9b49427c273cd3e4d5a2ff895855b4b [2431/7479] make OPTIMIZE_INLINING config editable config: arm64-randconfig-003-20240422 (https://download.01.org/0day-ci/archive/20240422/202404222001.4hDpVvmd-lkp@…) compiler: aarch64-linux-gcc (GCC) 13.2.0 reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240422/202404222001.4hDpVvmd-lkp@…) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp(a)intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202404222001.4hDpVvmd-lkp@intel.com/ All warnings (new ones prefixed by >>): In file included from include/linux/string.h:294, from include/linux/bitmap.h:11, from include/linux/cpumask.h:12, from include/linux/smp.h:13, from include/linux/lockdep.h:14, from include/linux/spinlock.h:63, from include/linux/swait.h:7, from include/linux/completion.h:12, from kernel/padata.c:14: In function 'bitmap_copy', inlined from 'cpumask_copy' at include/linux/cpumask.h:740:2, inlined from '__padata_set_cpumasks' at kernel/padata.c:709:2: >> include/linux/fortify-string.h:57:33: warning: '__builtin_memcpy' reading between 65 and 536870912 bytes from a region of size 64 [-Wstringop-overread] 57 | #define __underlying_memcpy __builtin_memcpy | ^ include/linux/fortify-string.h:648:9: note: in expansion of macro '__underlying_memcpy' 648 | __underlying_##op(p, q, __fortify_size); \ | ^~~~~~~~~~~~~ include/linux/fortify-string.h:693:26: note: in expansion of macro '__fortify_memcpy_chk' 693 | #define memcpy(p, q, s) __fortify_memcpy_chk(p, q, s, \ | ^~~~~~~~~~~~~~~~~~~~ include/linux/bitmap.h:268:17: note: in expansion of macro 'memcpy' 268 | memcpy(dst, src, len); | ^~~~~~ kernel/padata.c: In function '__padata_set_cpumasks': kernel/padata.c:692:48: note: source object 'pcpumask' of size [0, 64] 692 | cpumask_var_t pcpumask, | ~~~~~~~~~~~~~~^~~~~~~~ vim +/__builtin_memcpy +57 include/linux/fortify-string.h 78a498c3a227f2 Alexander Potapenko 2022-10-24 46 78a498c3a227f2 Alexander Potapenko 2022-10-24 47 #if defined(__SANITIZE_MEMORY__) 78a498c3a227f2 Alexander Potapenko 2022-10-24 48 /* 78a498c3a227f2 Alexander Potapenko 2022-10-24 49 * For KMSAN builds all memcpy/memset/memmove calls should be replaced by the 78a498c3a227f2 Alexander Potapenko 2022-10-24 50 * corresponding __msan_XXX functions. 78a498c3a227f2 Alexander Potapenko 2022-10-24 51 */ 78a498c3a227f2 Alexander Potapenko 2022-10-24 52 #include <linux/kmsan_string.h> 78a498c3a227f2 Alexander Potapenko 2022-10-24 53 #define __underlying_memcpy __msan_memcpy 78a498c3a227f2 Alexander Potapenko 2022-10-24 54 #define __underlying_memmove __msan_memmove 78a498c3a227f2 Alexander Potapenko 2022-10-24 55 #define __underlying_memset __msan_memset 78a498c3a227f2 Alexander Potapenko 2022-10-24 56 #else a28a6e860c6cf2 Francis Laniel 2021-02-25 @57 #define __underlying_memcpy __builtin_memcpy a28a6e860c6cf2 Francis Laniel 2021-02-25 58 #define __underlying_memmove __builtin_memmove a28a6e860c6cf2 Francis Laniel 2021-02-25 59 #define __underlying_memset __builtin_memset 78a498c3a227f2 Alexander Potapenko 2022-10-24 60 #endif 78a498c3a227f2 Alexander Potapenko 2022-10-24 61 :::::: The code at line 57 was first introduced by commit :::::: a28a6e860c6cf231cf3c5171c75c342adcd00406 string.h: move fortified functions definitions in a dedicated header. :::::: TO: Francis Laniel <laniel_francis(a)privacyrequired.com> :::::: CC: Linus Torvalds <torvalds(a)linux-foundation.org> -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki

1 0

[PATCH OLK-5.10] scsi: pm80xx: Fix memory leak during rmmod
by Li Lingfeng 22 Apr '24

22 Apr '24

From: Ajish Koshy <Ajish.Koshy(a)microchip.com> mainline inclusion from mainline-v5.16-rc1 commit 51e6ed83bb4ade7c360551fa4ae55c4eacea354b category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/I9FNET CVE: CVE-2021-47193 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?… -------------------------------- Driver failed to release all memory allocated. This would lead to memory leak during driver removal. Properly free memory when the module is removed. Link: https://lore.kernel.org/r/20210906170404.5682-5-Ajish.Koshy@microchip.com Acked-by: Jack Wang <jinpu.wang(a)ionos.com> Signed-off-by: Ajish Koshy <Ajish.Koshy(a)microchip.com> Signed-off-by: Viswas G <Viswas.G(a)microchip.com> Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com> Conflict: drivers/scsi/pm8001/pm8001_init.c Commit 27a34943bd89("scsi: pm8001: Remove typecast for pointer returned by kcalloc()") remove typecast for pointer returned by kcalloc(). Signed-off-by: Li Lingfeng <lilingfeng3(a)huawei.com> --- drivers/scsi/pm8001/pm8001_init.c | 11 +++++++++++ drivers/scsi/pm8001/pm8001_sas.h | 1 + 2 files changed, 12 insertions(+) diff --git a/drivers/scsi/pm8001/pm8001_init.c b/drivers/scsi/pm8001/pm8001_init.c index f40db6f40b1d..45bffa49f876 100644 --- a/drivers/scsi/pm8001/pm8001_init.c +++ b/drivers/scsi/pm8001/pm8001_init.c @@ -1166,6 +1166,7 @@ pm8001_init_ccb_tag(struct pm8001_hba_info *pm8001_ha, struct Scsi_Host *shost, goto err_out; /* Memory region for ccb_info*/ + pm8001_ha->ccb_count = ccb_count; pm8001_ha->ccb_info = (struct pm8001_ccb_info *) kcalloc(ccb_count, sizeof(struct pm8001_ccb_info), GFP_KERNEL); if (!pm8001_ha->ccb_info) { @@ -1226,6 +1227,16 @@ static void pm8001_pci_remove(struct pci_dev *pdev) tasklet_kill(&pm8001_ha->tasklet[j]); #endif scsi_host_put(pm8001_ha->shost); + + for (i = 0; i < pm8001_ha->ccb_count; i++) { + dma_free_coherent(&pm8001_ha->pdev->dev, + sizeof(struct pm8001_prd) * PM8001_MAX_DMA_SG, + pm8001_ha->ccb_info[i].buf_prd, + pm8001_ha->ccb_info[i].ccb_dma_handle); + } + kfree(pm8001_ha->ccb_info); + kfree(pm8001_ha->devices); + pm8001_free(pm8001_ha); kfree(sha->sas_phy); kfree(sha->sas_port); diff --git a/drivers/scsi/pm8001/pm8001_sas.h b/drivers/scsi/pm8001/pm8001_sas.h index 5cd6fe6a7d2d..74099d82e436 100644 --- a/drivers/scsi/pm8001/pm8001_sas.h +++ b/drivers/scsi/pm8001/pm8001_sas.h @@ -515,6 +515,7 @@ struct pm8001_hba_info { u32 iomb_size; /* SPC and SPCV IOMB size */ struct pm8001_device *devices; struct pm8001_ccb_info *ccb_info; + u32 ccb_count; #ifdef PM8001_USE_MSIX int number_of_intr;/*will be used in remove()*/ char intr_drvname[PM8001_MAX_MSIX_VEC] -- 2.31.1

2 1

[PATCH openEuler-1.0-LTS] scsi: lpfc: Fix link down processing to address NULL pointer dereference
by dinglongwei 22 Apr '24

22 Apr '24

From: James Smart <jsmart2021(a)gmail.com> mainline inclusion from mainline-v5.16-rc1 commit 1854f53ccd88ad4e7568ddfafafffe71f1ceb0a6 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/I9FNFF CVE: CVE-2021-47183 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?… -------------------------------- If an FC link down transition while PLOGIs are outstanding to fabric well known addresses, outstanding ABTS requests may result in a NULL pointer dereference. Driver unload requests may hang with repeated "2878" log messages. The Link down processing results in ABTS requests for outstanding ELS requests. The Abort WQEs are sent for the ELSs before the driver had set the link state to down. Thus the driver is sending the Abort with the expectation that an ABTS will be sent on the wire. The Abort request is stalled waiting for the link to come up. In some conditions the driver may auto-complete the ELSs thus if the link does come up, the Abort completions may reference an invalid structure. Fix by ensuring that Abort set the flag to avoid link traffic if issued due to conditions where the link failed. Link: https://lore.kernel.org/r/20211020211417.88754-7-jsmart2021@gmail.com Co-developed-by: Justin Tee <justin.tee(a)broadcom.com> Signed-off-by: Justin Tee <justin.tee(a)broadcom.com> Signed-off-by: James Smart <jsmart2021(a)gmail.com> Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com> --- drivers/scsi/lpfc/lpfc_sli.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 6c775a8bf..86d31fe32 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -10839,10 +10839,12 @@ lpfc_sli_abort_iotag_issue(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, if (cmdiocb->iocb_flag & LPFC_IO_FOF) abtsiocbp->iocb_flag |= LPFC_IO_FOF; - if (phba->link_state >= LPFC_LINK_UP) - iabt->ulpCommand = CMD_ABORT_XRI_CN; - else + if (phba->link_state < LPFC_LINK_UP || + (phba->sli_rev == LPFC_SLI_REV4 && + phba->sli4_hba.link_state.status == LPFC_FC_LA_TYPE_LINK_DOWN)) iabt->ulpCommand = CMD_CLOSE_XRI_CN; + else + iabt->ulpCommand = CMD_ABORT_XRI_CN; abtsiocbp->iocb_cmpl = lpfc_sli_abort_els_cmpl; abtsiocbp->vport = vport; -- 2.17.1

2 1

[PATCH OLK-6.6 v3 0/2] m: convert mm's rss stats to use atomic mode
by Peng Zhang 22 Apr '24

22 Apr '24

From: ZhangPeng <zhangpeng362(a)huawei.com> Since commit f1a7941243c1 ("mm: convert mm's rss stats into percpu_counter"), the rss_stats have converted into percpu_counter, which convert the error margin from (nr_threads * 64) to approximately (nr_cpus ^ 2). However, the new percpu allocation in mm_init() causes a performance regression on fork/exec/shell. Even after commit 14ef95be6f55 ("kernel/fork: group allocation/free of per-cpu counters for mm struct"), the performance of fork/exec/shell is still poor compared to previous kernel versions. To mitigate performance regression, we delay the allocation of percpu memory for rss_stats. Therefore, we convert mm's rss stats to use percpu_counter atomic mode. For single-thread processes, rss_stat is in atomic mode, which reduces the memory consumption and performance regression caused by using percpu. For multiple-thread processes, rss_stat is switched to the percpu mode to reduce the error margin. We convert rss_stats from atomic mode to percpu mode only when the second thread is created. After lmbench test, we can get 2% ~ 4% performance improvement for lmbench fork_proc/exec_proc/shell_proc and 6.7% performance improvement for lmbench page_fault (before batch mode[1]). The test results are as follows: base base+revert base+this patch fork_proc 416.3ms 400.0ms (3.9%) 398.6ms (4.2%) exec_proc 2095.9ms 2061.1ms (1.7%) 2047.7ms (2.3%) shell_proc 3028.2ms 2954.7ms (2.4%) 2961.2ms (2.2%) page_fault 0.3603ms 0.3358ms (6.8%) 0.3361ms (6.7%) [1] https://lore.kernel.org/all/20240412064751.119015-1-wangkefeng.wang@huawei.… ChangeLog: v2->v3: - remove patch 3. v1->v2: - Split patch 2 into two patches. ZhangPeng (2): percpu_counter: introduce atomic mode for percpu_counter mm: convert mm's rss stats to use atomic mode include/linux/mm.h | 50 +++++++++++++++++++++++++++++----- include/linux/percpu_counter.h | 48 ++++++++++++++++++++++++++++++-- include/trace/events/kmem.h | 4 +-- kernel/fork.c | 20 ++++++++------ lib/percpu_counter.c | 35 ++++++++++++++++++++++-- 5 files changed, 135 insertions(+), 22 deletions(-) -- 2.25.1

2 3

[PATCH OLK-6.6] dm: call the resume method on internal suspend
by Li Lingfeng 22 Apr '24

22 Apr '24

From: Mikulas Patocka <mpatocka(a)redhat.com> mainline inclusion from mainline-v6.9-rc1 commit 65e8fbde64520001abf1c8d0e573561b4746ef38 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/I9HJXV CVE: CVE-2024-26880 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?… -------------------------------- There is this reported crash when experimenting with the lvm2 testsuite. The list corruption is caused by the fact that the postsuspend and resume methods were not paired correctly; there were two consecutive calls to the origin_postsuspend function. The second call attempts to remove the "hash_list" entry from a list, while it was already removed by the first call. Fix __dm_internal_resume so that it calls the preresume and resume methods of the table's targets. If a preresume method of some target fails, we are in a tricky situation. We can't return an error because dm_internal_resume isn't supposed to return errors. We can't return success, because then the "resume" and "postsuspend" methods would not be paired correctly. So, we set the DMF_SUSPENDED flag and we fake normal suspend - it may confuse userspace tools, but it won't cause a kernel crash. ------------[ cut here ]------------ kernel BUG at lib/list_debug.c:56! invalid opcode: 0000 [#1] PREEMPT SMP CPU: 1 PID: 8343 Comm: dmsetup Not tainted 6.8.0-rc6 #4 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014 RIP: 0010:__list_del_entry_valid_or_report+0x77/0xc0 <snip> RSP: 0018:ffff8881b831bcc0 EFLAGS: 00010282 RAX: 000000000000004e RBX: ffff888143b6eb80 RCX: 0000000000000000 RDX: 0000000000000001 RSI: ffffffff819053d0 RDI: 00000000ffffffff RBP: ffff8881b83a3400 R08: 00000000fffeffff R09: 0000000000000058 R10: 0000000000000000 R11: ffffffff81a24080 R12: 0000000000000001 R13: ffff88814538e000 R14: ffff888143bc6dc0 R15: ffffffffa02e4bb0 FS: 00000000f7c0f780(0000) GS:ffff8893f0a40000(0000) knlGS:0000000000000000 CS: 0010 DS: 002b ES: 002b CR0: 0000000080050033 CR2: 0000000057fb5000 CR3: 0000000143474000 CR4: 00000000000006b0 Call Trace: <TASK> ? die+0x2d/0x80 ? do_trap+0xeb/0xf0 ? __list_del_entry_valid_or_report+0x77/0xc0 ? do_error_trap+0x60/0x80 ? __list_del_entry_valid_or_report+0x77/0xc0 ? exc_invalid_op+0x49/0x60 ? __list_del_entry_valid_or_report+0x77/0xc0 ? asm_exc_invalid_op+0x16/0x20 ? table_deps+0x1b0/0x1b0 [dm_mod] ? __list_del_entry_valid_or_report+0x77/0xc0 origin_postsuspend+0x1a/0x50 [dm_snapshot] dm_table_postsuspend_targets+0x34/0x50 [dm_mod] dm_suspend+0xd8/0xf0 [dm_mod] dev_suspend+0x1f2/0x2f0 [dm_mod] ? table_deps+0x1b0/0x1b0 [dm_mod] ctl_ioctl+0x300/0x5f0 [dm_mod] dm_compat_ctl_ioctl+0x7/0x10 [dm_mod] __x64_compat_sys_ioctl+0x104/0x170 do_syscall_64+0x184/0x1b0 entry_SYSCALL_64_after_hwframe+0x46/0x4e RIP: 0033:0xf7e6aead <snip> ---[ end trace 0000000000000000 ]--- Fixes: ffcc39364160 ("dm: enhance internal suspend and resume interface") Signed-off-by: Mikulas Patocka <mpatocka(a)redhat.com> Signed-off-by: Mike Snitzer <snitzer(a)kernel.org> Signed-off-by: Li Lingfeng <lilingfeng3(a)huawei.com> --- drivers/md/dm.c | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index f7212e8fc27f..6fa1dfe72ddb 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -2920,6 +2920,9 @@ static void __dm_internal_suspend(struct mapped_device *md, unsigned int suspend static void __dm_internal_resume(struct mapped_device *md) { + int r; + struct dm_table *map; + BUG_ON(!md->internal_suspend_count); if (--md->internal_suspend_count) @@ -2928,12 +2931,23 @@ static void __dm_internal_resume(struct mapped_device *md) if (dm_suspended_md(md)) goto done; /* resume from nested suspend */ - /* - * NOTE: existing callers don't need to call dm_table_resume_targets - * (which may fail -- so best to avoid it for now by passing NULL map) - */ - (void) __dm_resume(md, NULL); - + map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock)); + r = __dm_resume(md, map); + if (r) { + /* + * If a preresume method of some target failed, we are in a + * tricky situation. We can't return an error to the caller. We + * can't fake success because then the "resume" and + * "postsuspend" methods would not be paired correctly, and it + * would break various targets, for example it would cause list + * corruption in the "origin" target. + * + * So, we fake normal suspend here, to make sure that the + * "resume" and "postsuspend" methods will be paired correctly. + */ + DMERR("Preresume method failed: %d", r); + set_bit(DMF_SUSPENDED, &md->flags); + } done: clear_bit(DMF_SUSPENDED_INTERNALLY, &md->flags); smp_mb__after_atomic(); -- 2.31.1

2 1

[PATCH OLK-5.10] dm: call the resume method on internal suspend
by Li Lingfeng 22 Apr '24

22 Apr '24

From: Mikulas Patocka <mpatocka(a)redhat.com> mainline inclusion from mainline-v6.9-rc1 commit 65e8fbde64520001abf1c8d0e573561b4746ef38 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/I9HJXV CVE: CVE-2024-26880 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?… -------------------------------- There is this reported crash when experimenting with the lvm2 testsuite. The list corruption is caused by the fact that the postsuspend and resume methods were not paired correctly; there were two consecutive calls to the origin_postsuspend function. The second call attempts to remove the "hash_list" entry from a list, while it was already removed by the first call. Fix __dm_internal_resume so that it calls the preresume and resume methods of the table's targets. If a preresume method of some target fails, we are in a tricky situation. We can't return an error because dm_internal_resume isn't supposed to return errors. We can't return success, because then the "resume" and "postsuspend" methods would not be paired correctly. So, we set the DMF_SUSPENDED flag and we fake normal suspend - it may confuse userspace tools, but it won't cause a kernel crash. ------------[ cut here ]------------ kernel BUG at lib/list_debug.c:56! invalid opcode: 0000 [#1] PREEMPT SMP CPU: 1 PID: 8343 Comm: dmsetup Not tainted 6.8.0-rc6 #4 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014 RIP: 0010:__list_del_entry_valid_or_report+0x77/0xc0 <snip> RSP: 0018:ffff8881b831bcc0 EFLAGS: 00010282 RAX: 000000000000004e RBX: ffff888143b6eb80 RCX: 0000000000000000 RDX: 0000000000000001 RSI: ffffffff819053d0 RDI: 00000000ffffffff RBP: ffff8881b83a3400 R08: 00000000fffeffff R09: 0000000000000058 R10: 0000000000000000 R11: ffffffff81a24080 R12: 0000000000000001 R13: ffff88814538e000 R14: ffff888143bc6dc0 R15: ffffffffa02e4bb0 FS: 00000000f7c0f780(0000) GS:ffff8893f0a40000(0000) knlGS:0000000000000000 CS: 0010 DS: 002b ES: 002b CR0: 0000000080050033 CR2: 0000000057fb5000 CR3: 0000000143474000 CR4: 00000000000006b0 Call Trace: <TASK> ? die+0x2d/0x80 ? do_trap+0xeb/0xf0 ? __list_del_entry_valid_or_report+0x77/0xc0 ? do_error_trap+0x60/0x80 ? __list_del_entry_valid_or_report+0x77/0xc0 ? exc_invalid_op+0x49/0x60 ? __list_del_entry_valid_or_report+0x77/0xc0 ? asm_exc_invalid_op+0x16/0x20 ? table_deps+0x1b0/0x1b0 [dm_mod] ? __list_del_entry_valid_or_report+0x77/0xc0 origin_postsuspend+0x1a/0x50 [dm_snapshot] dm_table_postsuspend_targets+0x34/0x50 [dm_mod] dm_suspend+0xd8/0xf0 [dm_mod] dev_suspend+0x1f2/0x2f0 [dm_mod] ? table_deps+0x1b0/0x1b0 [dm_mod] ctl_ioctl+0x300/0x5f0 [dm_mod] dm_compat_ctl_ioctl+0x7/0x10 [dm_mod] __x64_compat_sys_ioctl+0x104/0x170 do_syscall_64+0x184/0x1b0 entry_SYSCALL_64_after_hwframe+0x46/0x4e RIP: 0033:0xf7e6aead <snip> ---[ end trace 0000000000000000 ]--- Fixes: ffcc39364160 ("dm: enhance internal suspend and resume interface") Signed-off-by: Mikulas Patocka <mpatocka(a)redhat.com> Signed-off-by: Mike Snitzer <snitzer(a)kernel.org> Signed-off-by: Li Lingfeng <lilingfeng3(a)huawei.com> --- drivers/md/dm.c | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 1c79eaede4df..e90b3e96fafc 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -2707,6 +2707,9 @@ static void __dm_internal_suspend(struct mapped_device *md, unsigned suspend_fla static void __dm_internal_resume(struct mapped_device *md) { + int r; + struct dm_table *map; + BUG_ON(!md->internal_suspend_count); if (--md->internal_suspend_count) @@ -2715,12 +2718,23 @@ static void __dm_internal_resume(struct mapped_device *md) if (dm_suspended_md(md)) goto done; /* resume from nested suspend */ - /* - * NOTE: existing callers don't need to call dm_table_resume_targets - * (which may fail -- so best to avoid it for now by passing NULL map) - */ - (void) __dm_resume(md, NULL); - + map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock)); + r = __dm_resume(md, map); + if (r) { + /* + * If a preresume method of some target failed, we are in a + * tricky situation. We can't return an error to the caller. We + * can't fake success because then the "resume" and + * "postsuspend" methods would not be paired correctly, and it + * would break various targets, for example it would cause list + * corruption in the "origin" target. + * + * So, we fake normal suspend here, to make sure that the + * "resume" and "postsuspend" methods will be paired correctly. + */ + DMERR("Preresume method failed: %d", r); + set_bit(DMF_SUSPENDED, &md->flags); + } done: clear_bit(DMF_SUSPENDED_INTERNALLY, &md->flags); smp_mb__after_atomic(); -- 2.31.1

2 1

[PATCH openEuler-1.0-LTS] dm: call the resume method on internal suspend
by Li Lingfeng 22 Apr '24

22 Apr '24

From: Mikulas Patocka <mpatocka(a)redhat.com> mainline inclusion from mainline-v6.9-rc1 commit 65e8fbde64520001abf1c8d0e573561b4746ef38 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/I9HJXV CVE: CVE-2024-26880 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?… -------------------------------- There is this reported crash when experimenting with the lvm2 testsuite. The list corruption is caused by the fact that the postsuspend and resume methods were not paired correctly; there were two consecutive calls to the origin_postsuspend function. The second call attempts to remove the "hash_list" entry from a list, while it was already removed by the first call. Fix __dm_internal_resume so that it calls the preresume and resume methods of the table's targets. If a preresume method of some target fails, we are in a tricky situation. We can't return an error because dm_internal_resume isn't supposed to return errors. We can't return success, because then the "resume" and "postsuspend" methods would not be paired correctly. So, we set the DMF_SUSPENDED flag and we fake normal suspend - it may confuse userspace tools, but it won't cause a kernel crash. ------------[ cut here ]------------ kernel BUG at lib/list_debug.c:56! invalid opcode: 0000 [#1] PREEMPT SMP CPU: 1 PID: 8343 Comm: dmsetup Not tainted 6.8.0-rc6 #4 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014 RIP: 0010:__list_del_entry_valid_or_report+0x77/0xc0 <snip> RSP: 0018:ffff8881b831bcc0 EFLAGS: 00010282 RAX: 000000000000004e RBX: ffff888143b6eb80 RCX: 0000000000000000 RDX: 0000000000000001 RSI: ffffffff819053d0 RDI: 00000000ffffffff RBP: ffff8881b83a3400 R08: 00000000fffeffff R09: 0000000000000058 R10: 0000000000000000 R11: ffffffff81a24080 R12: 0000000000000001 R13: ffff88814538e000 R14: ffff888143bc6dc0 R15: ffffffffa02e4bb0 FS: 00000000f7c0f780(0000) GS:ffff8893f0a40000(0000) knlGS:0000000000000000 CS: 0010 DS: 002b ES: 002b CR0: 0000000080050033 CR2: 0000000057fb5000 CR3: 0000000143474000 CR4: 00000000000006b0 Call Trace: <TASK> ? die+0x2d/0x80 ? do_trap+0xeb/0xf0 ? __list_del_entry_valid_or_report+0x77/0xc0 ? do_error_trap+0x60/0x80 ? __list_del_entry_valid_or_report+0x77/0xc0 ? exc_invalid_op+0x49/0x60 ? __list_del_entry_valid_or_report+0x77/0xc0 ? asm_exc_invalid_op+0x16/0x20 ? table_deps+0x1b0/0x1b0 [dm_mod] ? __list_del_entry_valid_or_report+0x77/0xc0 origin_postsuspend+0x1a/0x50 [dm_snapshot] dm_table_postsuspend_targets+0x34/0x50 [dm_mod] dm_suspend+0xd8/0xf0 [dm_mod] dev_suspend+0x1f2/0x2f0 [dm_mod] ? table_deps+0x1b0/0x1b0 [dm_mod] ctl_ioctl+0x300/0x5f0 [dm_mod] dm_compat_ctl_ioctl+0x7/0x10 [dm_mod] __x64_compat_sys_ioctl+0x104/0x170 do_syscall_64+0x184/0x1b0 entry_SYSCALL_64_after_hwframe+0x46/0x4e RIP: 0033:0xf7e6aead <snip> ---[ end trace 0000000000000000 ]--- Fixes: ffcc39364160 ("dm: enhance internal suspend and resume interface") Signed-off-by: Mikulas Patocka <mpatocka(a)redhat.com> Signed-off-by: Mike Snitzer <snitzer(a)kernel.org> Signed-off-by: Li Lingfeng <lilingfeng3(a)huawei.com> --- drivers/md/dm.c | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index f964a0818ddf..469aefecfe0b 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -2848,6 +2848,9 @@ static void __dm_internal_suspend(struct mapped_device *md, unsigned suspend_fla static void __dm_internal_resume(struct mapped_device *md) { + int r; + struct dm_table *map; + BUG_ON(!md->internal_suspend_count); if (--md->internal_suspend_count) @@ -2856,12 +2859,23 @@ static void __dm_internal_resume(struct mapped_device *md) if (dm_suspended_md(md)) goto done; /* resume from nested suspend */ - /* - * NOTE: existing callers don't need to call dm_table_resume_targets - * (which may fail -- so best to avoid it for now by passing NULL map) - */ - (void) __dm_resume(md, NULL); - + map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock)); + r = __dm_resume(md, map); + if (r) { + /* + * If a preresume method of some target failed, we are in a + * tricky situation. We can't return an error to the caller. We + * can't fake success because then the "resume" and + * "postsuspend" methods would not be paired correctly, and it + * would break various targets, for example it would cause list + * corruption in the "origin" target. + * + * So, we fake normal suspend here, to make sure that the + * "resume" and "postsuspend" methods will be paired correctly. + */ + DMERR("Preresume method failed: %d", r); + set_bit(DMF_SUSPENDED, &md->flags); + } done: clear_bit(DMF_SUSPENDED_INTERNALLY, &md->flags); smp_mb__after_atomic(); -- 2.31.1

2 1

[PATCH OLK-6.6 v2 0/3] mm: convert mm's rss stats to use atomic mode
by Peng Zhang 22 Apr '24

22 Apr '24

From: ZhangPeng <zhangpeng362(a)huawei.com> Since commit f1a7941243c1 ("mm: convert mm's rss stats into percpu_counter"), the rss_stats have converted into percpu_counter, which convert the error margin from (nr_threads * 64) to approximately (nr_cpus ^ 2). However, the new percpu allocation in mm_init() causes a performance regression on fork/exec/shell. Even after commit 14ef95be6f55 ("kernel/fork: group allocation/free of per-cpu counters for mm struct"), the performance of fork/exec/shell is still poor compared to previous kernel versions. To mitigate performance regression, we delay the allocation of percpu memory for rss_stats. Therefore, we convert mm's rss stats to use percpu_counter atomic mode. For single-thread processes, rss_stat is in atomic mode, which reduces the memory consumption and performance regression caused by using percpu. For multiple-thread processes, rss_stat is switched to the percpu mode to reduce the error margin. We convert rss_stats from atomic mode to percpu mode only when the second thread is created. After lmbench test, we can get 2% ~ 4% performance improvement for lmbench fork_proc/exec_proc/shell_proc and 6.7% performance improvement for lmbench page_fault (before batch mode[1]). The test results are as follows: base base+revert base+this patch fork_proc 416.3ms 400.0ms (3.9%) 398.6ms (4.2%) exec_proc 2095.9ms 2061.1ms (1.7%) 2047.7ms (2.3%) shell_proc 3028.2ms 2954.7ms (2.4%) 2961.2ms (2.2%) page_fault 0.3603ms 0.3358ms (6.8%) 0.3361ms (6.7%) [1] https://lore.kernel.org/all/20240412064751.119015-1-wangkefeng.wang@huawei.… ChangeLog: v1->v2: - Split patch 2 into two patches. ZhangPeng (3): percpu_counter: introduce atomic mode for percpu_counter mm: convert mm's rss stats to use atomic mode mm: introduce cmdline to disable mm counter atomic mode include/linux/mm.h | 50 +++++++++++++++++++++++++++++----- include/linux/percpu_counter.h | 48 ++++++++++++++++++++++++++++++-- include/trace/events/kmem.h | 4 +-- kernel/fork.c | 46 ++++++++++++++++++++++++++++--- lib/percpu_counter.c | 35 ++++++++++++++++++++++-- 5 files changed, 165 insertions(+), 18 deletions(-) -- 2.25.1

2 4

[PATCH OLK-5.10] net/sched: flower: Fix unable to handle page fault bug in fl_init
by Zhengchao Shao 22 Apr '24

22 Apr '24

hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I9IQLI CVE: NA -------------------------------- The tmplt_reoffload function pointer is of the const type, and the value is assigned to the constant in fl_init. As a result, the following issue occurs. BUG: unable to handle page fault for address: ffffffff98715da0 PF: supervisor write access in kernel mode PF: error_code(0x0003) - permissions violation PGD ec0d067 P4D ec0d067 PUD ec0e063 PMD 800000000e0001e1 Oops: 0003 [#1] SMP PTI CPU: 20 PID: 7533 Comm: tc Kdump: loaded Not tainted 5.10.0+ #40 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 RIP: 0010:fl_init+0xcf/0x100 RSP: 0018:ffffb6e7c0fe7978 EFLAGS: 00010202 RAX: 0000000000000049 RBX: ffff99c6b3580480 RCX: 0000000000000027 RDX: 0000000000000000 RSI: ffffffff98718740 RDI: ffff99c6a359f800 RBP: ffff99c6a359f800 R08: ffff99cfdce1fe50 R09: ffffb6e7c0fe77a0 R10: ffffb6e7c0fe7798 R11: ffffffff9967d5a8 R12: ffff99c6b3580480 R13: ffffb6e7c0fe7b80 R14: 0000000000000001 R15: ffffb6e7c0fe7ab0 FS: 00007fbaef7b1800(0000) GS:ffff99cfdce00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffffff98715da0 CR3: 000000011299c000 CR4: 00000000000006e0 Call Trace: tcf_proto_create.cold+0x66/0x9e tc_new_tfilter+0x611/0xa70 rtnetlink_rcv_msg+0x406/0x560 netlink_rcv_skb+0x64/0x180 rtnetlink_rcv+0x19/0x30 netlink_unicast_kernel+0x7b/0x180 netlink_unicast+0x13d/0x230 netlink_sendmsg+0x432/0x610 __sock_sendmsg+0xc6/0xd0 ____sys_sendmsg+0x1f5/0x380 ___sys_sendmsg+0x82/0xe Fixes: fbc634d37f5a ("net/sched: flower: Fix kabi change") Signed-off-by: Zhengchao Shao <shaozhengchao(a)huawei.com> --- net/sched/cls_api.c | 4 ++-- net/sched/cls_flower.c | 5 +---- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 7801d8c552c9..b6dd697a3d5f 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1397,8 +1397,8 @@ void tcf_block_put(struct tcf_block *block) EXPORT_SYMBOL(tcf_block_put); -void (* const tmplt_reoffload)(struct tcf_chain *chain, bool add, - flow_setup_cb_t *cb, void *cb_priv); +void (*tmplt_reoffload)(struct tcf_chain *chain, bool add, + flow_setup_cb_t *cb, void *cb_priv); EXPORT_SYMBOL(tmplt_reoffload); static void cls_tmplt_reoffload(struct tcf_chain *chain, bool add, diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 3a1c139c426e..d15729328aef 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -356,8 +356,6 @@ static int fl_init(struct tcf_proto *tp) rcu_assign_pointer(tp->root, head); idr_init(&head->handle_idr); - tmplt_reoffload = &fl_tmplt_reoffload; - return rhashtable_init(&head->ht, &mask_ht_params); } @@ -596,8 +594,6 @@ static void fl_destroy(struct tcf_proto *tp, bool rtnl_held, __module_get(THIS_MODULE); tcf_queue_work(&head->rwork, fl_destroy_sleepable); - - tmplt_reoffload = NULL; } static void fl_put(struct tcf_proto *tp, void *arg) @@ -3250,6 +3246,7 @@ static struct tcf_proto_ops cls_fl_ops __read_mostly = { static int __init cls_fl_init(void) { + tmplt_reoffload = &fl_tmplt_reoffload; return register_tcf_proto_ops(&cls_fl_ops); } -- 2.34.1

2 1