December 2020 - Kernel - mailweb.openeuler.org

[PATCH 1/2] ext4: Protect superblock modifications with a buffer lock
by Yang Yingliang 17 Dec '20

17 Dec '20

From: Jan Kara <jack(a)suse.cz> hulk inclusion category: bugfix bugzilla: 46758 CVE: NA --------------------------- Protect all superblock modifications (including checksum computation) with a superblock buffer lock. That way we are sure computed checksum matches current superblock contents (a mismatch could cause checksum failures in nojournal mode or if an unjournalled superblock update races with a journalled one). Also we avoid modifying superblock contents while it is being written out (which can cause DIF/DIX failures if we are running in nojournal mode). Signed-off-by: Jan Kara <jack(a)suse.cz> [backport the 10th patch: https://www.spinics.net/lists/linux-ext4/msg75423.html drop the other lock_buffer besides operation for orphan] Signed-off-by: yangerkun <yangerkun(a)huawei.com> Reviewed-by: zhangyi (F) <yi.zhang(a)huawei.com> Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com> --- fs/ext4/ext4_jbd2.c | 1 - fs/ext4/file.c | 1 + fs/ext4/inode.c | 1 + fs/ext4/namei.c | 6 ++++++ fs/ext4/resize.c | 4 ++++ fs/ext4/xattr.c | 1 + 6 files changed, 13 insertions(+), 1 deletion(-) diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index a589b7f79558..f9ac7dfd93bf 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -361,7 +361,6 @@ int __ext4_handle_dirty_super(const char *where, unsigned int line, struct buffer_head *bh = EXT4_SB(sb)->s_sbh; int err = 0; - ext4_superblock_csum_set(sb); if (ext4_handle_valid(handle)) { err = jbd2_journal_dirty_metadata(handle, bh); if (err) diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 52d155b4e733..1703871fa2d0 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -434,6 +434,7 @@ static int ext4_sample_last_mounted(struct super_block *sb, goto out_journal; strlcpy(sbi->s_es->s_last_mounted, cp, sizeof(sbi->s_es->s_last_mounted)); + ext4_superblock_csum_set(sb); ext4_handle_dirty_super(handle, sb); out_journal: ext4_journal_stop(handle); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 8d7e2cd9ae37..6ee89cd7a408 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5398,6 +5398,7 @@ static int ext4_do_update_inode(handle_t *handle, if (err) goto out_brelse; ext4_set_feature_large_file(sb); + ext4_superblock_csum_set(sb); ext4_handle_sync(handle); err = ext4_handle_dirty_super(handle, sb); } diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index ffc3695eb153..762eb6913240 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2889,7 +2889,10 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) (le32_to_cpu(sbi->s_es->s_inodes_count))) { /* Insert this inode at the head of the on-disk orphan list */ NEXT_ORPHAN(inode) = le32_to_cpu(sbi->s_es->s_last_orphan); + lock_buffer(sbi->s_sbh); sbi->s_es->s_last_orphan = cpu_to_le32(inode->i_ino); + ext4_superblock_csum_set(sb); + unlock_buffer(sbi->s_sbh); dirty = true; } list_add(&EXT4_I(inode)->i_orphan, &sbi->s_orphan); @@ -2972,7 +2975,10 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode) mutex_unlock(&sbi->s_orphan_lock); goto out_brelse; } + lock_buffer(sbi->s_sbh); sbi->s_es->s_last_orphan = cpu_to_le32(ino_next); + ext4_superblock_csum_set(inode->i_sb); + unlock_buffer(sbi->s_sbh); mutex_unlock(&sbi->s_orphan_lock); err = ext4_handle_dirty_super(handle, inode->i_sb); } else { diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 6a0c5c880354..c2e007d836e4 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -901,6 +901,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, ext4_kvfree_array_rcu(o_group_desc); le16_add_cpu(&es->s_reserved_gdt_blocks, -1); + ext4_superblock_csum_set(sb); err = ext4_handle_dirty_super(handle, sb); if (err) ext4_std_error(sb, err); @@ -1423,6 +1424,7 @@ static void ext4_update_super(struct super_block *sb, * active. */ ext4_r_blocks_count_set(es, ext4_r_blocks_count(es) + reserved_blocks); + ext4_superblock_csum_set(sb); /* Update the free space counts */ percpu_counter_add(&sbi->s_freeclusters_counter, @@ -1721,6 +1723,7 @@ static int ext4_group_extend_no_check(struct super_block *sb, ext4_blocks_count_set(es, o_blocks_count + add); ext4_free_blocks_count_set(es, ext4_free_blocks_count(es) + add); + ext4_superblock_csum_set(sb); ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count, o_blocks_count + add); /* We add the blocks to the bitmap and set the group need init bit */ @@ -1882,6 +1885,7 @@ static int ext4_convert_meta_bg(struct super_block *sb, struct inode *inode) ext4_set_feature_meta_bg(sb); sbi->s_es->s_first_meta_bg = cpu_to_le32(num_desc_blocks(sb, sbi->s_groups_count)); + ext4_superblock_csum_set(sb); err = ext4_handle_dirty_super(handle, sb); if (err) { diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 24cf730ba6b0..ae029dccebc1 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -791,6 +791,7 @@ static void ext4_xattr_update_super_block(handle_t *handle, BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access"); if (ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh) == 0) { ext4_set_feature_xattr(sb); + ext4_superblock_csum_set(sb); ext4_handle_dirty_super(handle, sb); } } -- 2.25.1

1 1

[PATCH 01/15] arm64: KVM: Remove some extra semicolon in kvm_target_cpu
by Yang Yingliang 17 Dec '20

17 Dec '20

From: zhong jiang <zhongjiang(a)huawei.com> mainline inclusion from mainline-4.20 commit f0725345e3e127032376e4fcb6b0fc893237fcef category: bugfix bugzilla: NA CVE: NA ------------------------------------------------- There are some extra semicolon in kvm_target_cpu, remove it. Signed-off-by: zhong jiang <zhongjiang(a)huawei.com> Signed-off-by: Marc Zyngier <marc.zyngier(a)arm.com> Signed-off-by: Xiangyou Xie <xiexiangyou(a)huawei.com> Reviewed-by: Ying Fang <fangying1(a)huawei.com> Reviewed-by: Zenghui Yu <yuzenghui(a)huawei.com> Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com> --- arch/arm64/kvm/guest.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index ff9908c5d496..ff00e1c8023a 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -432,15 +432,15 @@ int __attribute_const__ kvm_target_cpu(void) return KVM_ARM_TARGET_CORTEX_A53; case ARM_CPU_PART_CORTEX_A57: return KVM_ARM_TARGET_CORTEX_A57; - }; + } break; case ARM_CPU_IMP_APM: switch (part_number) { case APM_CPU_PART_POTENZA: return KVM_ARM_TARGET_XGENE_POTENZA; - }; + } break; - }; + } /* Return a default generic target */ return KVM_ARM_TARGET_GENERIC_V8; -- 2.25.1

1 14

[PATCH] mm: thp: make the THP mapcount atomic against __split_huge_pmd_locked()
by Yang Yingliang 17 Dec '20

17 Dec '20

From: Andrea Arcangeli <aarcange(a)redhat.com> stable inclusion from linux-4.19.129 commit 453d8a481b127edfa7fb76af1bc7586b7a63bdd2 CVE: CVE-2020-29368 -------------------------------- commit c444eb564fb16645c172d550359cb3d75fe8a040 upstream. Write protect anon page faults require an accurate mapcount to decide if to break the COW or not. This is implemented in the THP path with reuse_swap_page() -> page_trans_huge_map_swapcount()/page_trans_huge_mapcount(). If the COW triggers while the other processes sharing the page are under a huge pmd split, to do an accurate reading, we must ensure the mapcount isn't computed while it's being transferred from the head page to the tail pages. reuse_swap_cache() already runs serialized by the page lock, so it's enough to add the page lock around __split_huge_pmd_locked too, in order to add the missing serialization. Note: the commit in "Fixes" is just to facilitate the backporting, because the code before such commit didn't try to do an accurate THP mapcount calculation and it instead used the page_count() to decide if to COW or not. Both the page_count and the pin_count are THP-wide refcounts, so they're inaccurate if used in reuse_swap_page(). Reverting such commit (besides the unrelated fix to the local anon_vma assignment) would have also opened the window for memory corruption side effects to certain workloads as documented in such commit header. Signed-off-by: Andrea Arcangeli <aarcange(a)redhat.com> Suggested-by: Jann Horn <jannh(a)google.com> Reported-by: Jann Horn <jannh(a)google.com> Acked-by: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com> Fixes: 6d0a07edd17c ("mm: thp: calculate the mapcount correctly for THP pages during WP faults") Cc: stable(a)vger.kernel.org Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org> Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> Reviewed-by: Jason Yan <yanaijie(a)huawei.com> Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com> --- mm/huge_memory.c | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index b60cbf77e902..816a7fd3c6ff 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2286,6 +2286,8 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, spinlock_t *ptl; struct mm_struct *mm = vma->vm_mm; unsigned long haddr = address & HPAGE_PMD_MASK; + bool was_locked = false; + pmd_t _pmd; mmu_notifier_invalidate_range_start(mm, haddr, haddr + HPAGE_PMD_SIZE); ptl = pmd_lock(mm, pmd); @@ -2295,11 +2297,32 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, * pmd against. Otherwise we can end up replacing wrong page. */ VM_BUG_ON(freeze && !page); - if (page && page != pmd_page(*pmd)) - goto out; + if (page) { + VM_WARN_ON_ONCE(!PageLocked(page)); + was_locked = true; + if (page != pmd_page(*pmd)) + goto out; + } +repeat: if (pmd_trans_huge(*pmd)) { - page = pmd_page(*pmd); + if (!page) { + page = pmd_page(*pmd); + if (unlikely(!trylock_page(page))) { + get_page(page); + _pmd = *pmd; + spin_unlock(ptl); + lock_page(page); + spin_lock(ptl); + if (unlikely(!pmd_same(*pmd, _pmd))) { + unlock_page(page); + put_page(page); + page = NULL; + goto repeat; + } + put_page(page); + } + } if (PageMlocked(page)) clear_page_mlock(page); } else if (!(pmd_devmap(*pmd) || is_pmd_migration_entry(*pmd))) @@ -2307,6 +2330,8 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, __split_huge_pmd_locked(vma, pmd, haddr, freeze); out: spin_unlock(ptl); + if (!was_locked && page) + unlock_page(page); /* * No need to double call mmu_notifier->invalidate_range() callback. * They are 3 cases to consider inside __split_huge_pmd_locked(): -- 2.25.1

1 0

[PATCH] romfs: fix uninitialized memory leak in romfs_dev_read()
by Yang Yingliang 17 Dec '20

17 Dec '20

From: Jann Horn <jannh(a)google.com> stable inclusion from linux-4.19.142 commit 9660983738399465fd0e3b1977a61bbd29b2e5be CVE: CVE-2020-29371 -------------------------------- commit bcf85fcedfdd17911982a3e3564fcfec7b01eebd upstream. romfs has a superblock field that limits the size of the filesystem; data beyond that limit is never accessed. romfs_dev_read() fetches a caller-supplied number of bytes from the backing device. It returns 0 on success or an error code on failure; therefore, its API can't represent short reads, it's all-or-nothing. However, when romfs_dev_read() detects that the requested operation would cross the filesystem size limit, it currently silently truncates the requested number of bytes. This e.g. means that when the content of a file with size 0x1000 starts one byte before the filesystem size limit, ->readpage() will only fill a single byte of the supplied page while leaving the rest uninitialized, leaking that uninitialized memory to userspace. Fix it by returning an error code instead of truncating the read when the requested read operation would go beyond the end of the filesystem. Fixes: da4458bda237 ("NOMMU: Make it possible for RomFS to use MTD devices directly") Signed-off-by: Jann Horn <jannh(a)google.com> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> Reviewed-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> Cc: David Howells <dhowells(a)redhat.com> Cc: <stable(a)vger.kernel.org> Link: http://lkml.kernel.org/r/20200818013202.2246365-1-jannh@google.com Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org> Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> Reviewed-by: Jason Yan <yanaijie(a)huawei.com> Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com> --- fs/romfs/storage.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/romfs/storage.c b/fs/romfs/storage.c index f86f51f99ace..1dcadd22b440 100644 --- a/fs/romfs/storage.c +++ b/fs/romfs/storage.c @@ -221,10 +221,8 @@ int romfs_dev_read(struct super_block *sb, unsigned long pos, size_t limit; limit = romfs_maxsize(sb); - if (pos >= limit) + if (pos >= limit || buflen > limit - pos) return -EIO; - if (buflen > limit - pos) - buflen = limit - pos; #ifdef CONFIG_ROMFS_ON_MTD if (sb->s_mtd) -- 2.25.1

1 0

[PATCH v2 OLK-5.10 0/3] Backport 5.10.1 LTS
by Chen Jun 17 Dec '20

17 Dec '20

v1->v2 modifty the commit message: linux-5.10.1 -> stable-5.10.1 *** BLURB HERE *** Greg Kroah-Hartman (3): Revert "md: change mddev 'chunk_sectors' from int to unsigned" Revert "dm raid: fix discard limits for raid1 and raid10" Linux 5.10.1 Makefile | 2 +- drivers/md/dm-raid.c | 12 +++++------- drivers/md/md.h | 4 ++-- 3 files changed, 8 insertions(+), 10 deletions(-) -- 2.25.0

2 6

[OLK-5.10 0/3] Backport 5.10.1 LTS
by Chen Jun 16 Dec '20

16 Dec '20

*** BLURB HERE *** Greg Kroah-Hartman (3): Revert "md: change mddev 'chunk_sectors' from int to unsigned" Revert "dm raid: fix discard limits for raid1 and raid10" Linux 5.10.1 Makefile | 2 +- drivers/md/dm-raid.c | 12 +++++------- drivers/md/md.h | 4 ++-- 3 files changed, 8 insertions(+), 10 deletions(-) -- 2.25.0

2 6

为什么你一定要参加 openEuler Summit 的 TC、Maintainer、SIG 组工作会议
by public openEuler 16 Dec '20

16 Dec '20

嗨，各位 openEuler 社区的开发者：很高兴能够向你们汇报 openEuler Summit 的会议进度。对于各位来说，本周最重要的事情是 TC、Maintainer和SIG 组开放工作会议的议程已经上线了。 *为什么你一定要参与 TC、Maintainer、SIG组开放工作会议？* 1. 你可以清楚的了解 openEuler 社区的运作模式 2. 你可以和 TC、Maintainer、SIG 的成员共同探讨openEuler 社区的下一个版本的计划和技术路线 3. 可以跟社区里的技术大佬进行面对面交流。 4. 更好的规划自己在社区中的成长路线 *会议时间和房间号：* TC 开放工作会议 Maintainer 开放工作会议 SIG 组开放工作会议时间 12/24 13:00-14:30 12/24 15:00-17:00 12/25 13:00-15:30 房间号悦府6和悦府7 悦府6和悦府7 大宴会厅B *TC **开放工作会议议程：* 1. 讨论 openEuler 支持 App Stream 机制 2. 申请成立智能运维 SIG 3. 讨论从 Gitee issue 整体切换到bugzilla 4. 讨论2021年 TC 合作的开展方式 5. …… *Maintainer **开放工作会议议程* 1. 开场 & 颁发 Maintainer 纪念徽章 2. openEuler 社区运作分析 3. 我在 openEuler 社区担任 Maintainer 的那些事儿 4. 自由问答 5. 总结 *参与本次 SIG 组开放工作会议的 SIG 组：* 1. sig-DDE 2. sig-UKUI 3. sig-Ha 4. sig-arrch32 5. A-Tune 6. iSulad & Container 7. sig-ai-bitdata 8. sig-security-facility 9. sig-confidential-computing 10. Compiler 11. Doc 12. Infrastructure 13. security-committee 14. sig-Compatibility-Infra 15. sig-QA 16. sig-release-management 17. Kernel 18. Virt *openEuler Summit **专属伴手礼：* [image: image.png] *openEuler **卫衣* [image: image.png] *openEuler Maintainer **纪念徽章* *欢迎通过openEuler 社区开发者专属的报名渠道报名：* https://etherpad.openeuler.org/p/openEuler-Summit-2020 openEuler Summit 官网： https://openeuler.org/zh/interaction/summit-list/

1 0

[PATCH 1/2] arm64: arch_timer: only do cntvct workaround on VDSO path on D05
by Yang Yingliang 16 Dec '20

16 Dec '20

hulk inclusion category: bugfix bugzilla: NA CVE: NA -------------------------------- On other board do do cntvct workaround on VDSO path may cause unexpected error, so we do this only on D05. Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com> Reviewed-by: Hanjun Guo <guohanjun(a)huawei.com> Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com> --- drivers/clocksource/arm_arch_timer.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index 443079810300..50030326d27b 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -551,9 +551,14 @@ void arch_timer_enable_workaround(const struct arch_timer_erratum_workaround *wa * change both the default value and the vdso itself. */ if (wa->read_cntvct_el0) { - clocksource_counter.archdata.vdso_direct = true; - vdso_default = true; - vdso_fix = true; + if (wa->read_cntvct_el0 == hisi_161010101_read_cntvct_el0) { + clocksource_counter.archdata.vdso_direct = true; + vdso_default = true; + vdso_fix = true; + } else { + clocksource_counter.archdata.vdso_direct = false; + vdso_default = false; + } } } -- 2.25.1

1 1

[PATCH] scsi: libiscsi: Fix cmds hung when sd_shutdown
by Yang Yingliang 15 Dec '20

15 Dec '20

From: Wu Bo <wubo40(a)huawei.com> hulk inclusion category: bugfix bugzilla: NA CVE: NA https://gitee.com/src-openeuler/kernel/issues/I28N9J --------------------------- For some reason, during reboot the system, iscsi.service failed to logout all sessions. kernel will hang forever on its sd_sync_cache() logic, after issuing the SYNCHRONIZE_CACHE cmd to all still existent paths. [ 1044.098991] reboot: Mddev shutdown finished. [ 1044.099311] reboot: Usermodehelper disable finished. [ 1050.611244] connection2:0: ping timeout of 5 secs expired, recv timeout 5, last rx 4295152378, last ping 4295153633, now 4295154944 [ 1348.599676] Call trace: [ 1348.599887] __switch_to+0xe8/0x150 [ 1348.600113] __schedule+0x33c/0xa08 [ 1348.600372] schedule+0x2c/0x88 [ 1348.600567] schedule_timeout+0x184/0x3a8 [ 1348.600820] io_schedule_timeout+0x28/0x48 [ 1348.601089] wait_for_common_io.constprop.2+0x168/0x258 [ 1348.601425] wait_for_completion_io_timeout+0x28/0x38 [ 1348.601762] blk_execute_rq+0x98/0xd8 [ 1348.602006] __scsi_execute+0xe0/0x1e8 [ 1348.602262] sd_sync_cache+0xd0/0x220 [sd_mod] [ 1348.602551] sd_shutdown+0x6c/0xf8 [sd_mod] [ 1348.602826] device_shutdown+0x13c/0x250 [ 1348.603078] kernel_restart_prepare+0x5c/0x68 [ 1348.603400] kernel_restart+0x20/0x98 [ 1348.603683] __se_sys_reboot+0x214/0x260 [ 1348.603987] __arm64_sys_reboot+0x24/0x30 [ 1348.604300] el0_svc_common+0x80/0x1b8 [ 1348.604590] el0_svc_handler+0x78/0xe0 [ 1348.604877] el0_svc+0x10/0x260 d754941225 (scsi: libiscsi: Allow sd_shutdown on bad transport) Once solved this problem. The iscsi_eh_cmd_timed_out() function add system_state judgment, and will return BLK_EH_DONE and mark the result as DID_NO_CONNECT when system_state is not SYSTEM_RUNNING, To tell upper layers that the command was handled during the transport layer error handler helper. The scsi Mid Layer timeout handler function(scsi_times_out) will be abort the cmd if the scsi LLD timeout handler return BLK_EH_DONE. if abort cmd failed, will enter scsi EH logic. Scsi EH will do reset target logic, if reset target failed, Will call iscsi_eh_session_reset() function to drop the session. The iscsi_eh_session_reset function will wait for a relogin, session termination from userspace, or a recovery/replacement timeout. But at this time, the app iscsid has exited, and the session was marked as ISCSI_STATE_FAILED, So the SCSI EH process will never be scheduled back again. PID: 9123 TASK: ffff80020c1b4d80 CPU: 3 COMMAND: "scsi_eh_2" #0 [ffff00008632bb70] __switch_to at ffff000080088738 #1 [ffff00008632bb90] __schedule at ffff000080a00480 #2 [ffff00008632bc20] schedule at ffff000080a00b58 #3 [ffff00008632bc30] iscsi_eh_session_reset at ffff000000d1ab9c [libiscsi] #4 [ffff00008632bcb0] iscsi_eh_recover_target at ffff000000d1d1fc [libiscsi] #5 [ffff00008632bd00] scsi_try_target_reset at ffff0000806f0bac #6 [ffff00008632bd30] scsi_eh_ready_devs at ffff0000806f2724 #7 [ffff00008632bde0] scsi_error_handler at ffff0000806f41d4 #8 [ffff00008632be70] kthread at ffff000080119ae0 Reported-by: Tianxiong Lu <lutianxiong(a)huawei.com> Signed-off-by: Wu Bo <wubo40(a)huawei.com> Signed-off-by: Yu Kuai <yukuai3(a)huawei.com> Reviewed-by: Jason Yan <yanaijie(a)huawei.com> Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com> --- drivers/scsi/libiscsi.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index ec356e035c5f..35d603db33bb 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -2441,7 +2441,17 @@ int iscsi_eh_session_reset(struct scsi_cmnd *sc) mutex_lock(&session->eh_mutex); spin_lock_bh(&session->frwd_lock); - if (session->state == ISCSI_STATE_TERMINATE) { + + /* + * During shutdown, if session is prematurely disconnected, recovery + * won't happen and there will be hung cmds. To solve this case, all + * cmds would be enter scsi EH, but the EH path will wait for + * wait_event_interruptible() to complete until the state of the + * session is ISCSI_STATE_TERMINATE, ISCSI_STATE_LOGGED_IN or + * ISCSI_STATE_RECOVERY_FAILED. + */ + if (session->state == ISCSI_STATE_TERMINATE || + unlikely(system_state != SYSTEM_RUNNING)) { failed: ISCSI_DBG_EH(session, "failing session reset: Could not log back into " -- 2.25.1

1 0

[PATCH] scsi: iscsi: Perform connection failure entirely in kernel space
by Yang Yingliang 15 Dec '20

15 Dec '20

From: Bharath Ravi <rbharath(a)google.com> mainline inclusion from mainline-v5.7-rc1 commit 0ab710458da113a71c461c4df27e7f1353d9f864 category: bugfix bugzilla: NA CVE: NA https://gitee.com/src-openeuler/kernel/issues/I28N9J -------------------------------- Connection failure processing depends on a daemon being present to (at least) stop the connection and start recovery. This is a problem on a multipath scenario, where if the daemon failed for whatever reason, the SCSI path is never marked as down, multipath won't perform the failover and IO to the device will be forever waiting for that connection to come back. This patch performs the connection failure entirely inside the kernel. This way, the failover can happen and pending IO can continue even if the daemon is dead. Once the daemon comes alive again, it can execute recovery procedures if applicable. Cc: Mike Christie <mchristi(a)redhat.com> Cc: Lee Duncan <LDuncan(a)suse.com> Cc: Bart Van Assche <bvanassche(a)acm.org> Link: https://lore.kernel.org/r/20200125061925.191601-1-krisman@collabora.com Co-developed-by: Dave Clausen <dclausen(a)google.com> Co-developed-by: Nick Black <nlb(a)google.com> Co-developed-by: Vaibhav Nagarnaik <vnagarnaik(a)google.com> Co-developed-by: Anatol Pomazau <anatol(a)google.com> Co-developed-by: Tahsin Erdogan <tahsin(a)google.com> Co-developed-by: Frank Mayhar <fmayhar(a)google.com> Co-developed-by: Junho Ryu <jayr(a)google.com> Co-developed-by: Khazhismel Kumykov <khazhy(a)google.com> Reviewed-by: Reviewed-by: Khazhismel Kumykov <khazhy(a)google.com> Co-developed-by: Gabriel Krisman Bertazi <krisman(a)collabora.com> Reviewed-by: Lee Duncan <lduncan(a)suse.com> Signed-off-by: Bharath Ravi <rbharath(a)google.com> Signed-off-by: Dave Clausen <dclausen(a)google.com> Signed-off-by: Nick Black <nlb(a)google.com> Signed-off-by: Vaibhav Nagarnaik <vnagarnaik(a)google.com> Signed-off-by: Anatol Pomazau <anatol(a)google.com> Signed-off-by: Tahsin Erdogan <tahsin(a)google.com> Signed-off-by: Frank Mayhar <fmayhar(a)google.com> Signed-off-by: Junho Ryu <jayr(a)google.com> Signed-off-by: Khazhismel Kumykov <khazhy(a)google.com> Signed-off-by: Gabriel Krisman Bertazi <krisman(a)collabora.com> Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com> Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com> Reviewed-by: Jason Yan <yanaijie(a)huawei.com> Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com> --- drivers/scsi/scsi_transport_iscsi.c | 68 +++++++++++++++++++++++++++++ include/scsi/scsi_transport_iscsi.h | 1 + 2 files changed, 69 insertions(+) diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 698347301198..e1ac96b8940b 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -81,6 +81,12 @@ struct iscsi_internal { struct transport_container session_cont; }; +/* Worker to perform connection failure on unresponsive connections + * completely in kernel space. + */ +static void stop_conn_work_fn(struct work_struct *work); +static DECLARE_WORK(stop_conn_work, stop_conn_work_fn); + static atomic_t iscsi_session_nr; /* sysfs session id for next new session */ static struct workqueue_struct *iscsi_eh_timer_workq; @@ -1609,6 +1615,7 @@ static DEFINE_MUTEX(rx_queue_mutex); static LIST_HEAD(sesslist); static DEFINE_SPINLOCK(sesslock); static LIST_HEAD(connlist); +static LIST_HEAD(connlist_err); static DEFINE_SPINLOCK(connlock); static uint32_t iscsi_conn_get_sid(struct iscsi_cls_conn *conn) @@ -2247,6 +2254,7 @@ iscsi_create_conn(struct iscsi_cls_session *session, int dd_size, uint32_t cid) mutex_init(&conn->ep_mutex); INIT_LIST_HEAD(&conn->conn_list); + INIT_LIST_HEAD(&conn->conn_list_err); conn->transport = transport; conn->cid = cid; @@ -2293,6 +2301,7 @@ int iscsi_destroy_conn(struct iscsi_cls_conn *conn) spin_lock_irqsave(&connlock, flags); list_del(&conn->conn_list); + list_del(&conn->conn_list_err); spin_unlock_irqrestore(&connlock, flags); transport_unregister_device(&conn->dev); @@ -2407,6 +2416,51 @@ int iscsi_offload_mesg(struct Scsi_Host *shost, } EXPORT_SYMBOL_GPL(iscsi_offload_mesg); +static void stop_conn_work_fn(struct work_struct *work) +{ + struct iscsi_cls_conn *conn, *tmp; + unsigned long flags; + LIST_HEAD(recovery_list); + + spin_lock_irqsave(&connlock, flags); + if (list_empty(&connlist_err)) { + spin_unlock_irqrestore(&connlock, flags); + return; + } + list_splice_init(&connlist_err, &recovery_list); + spin_unlock_irqrestore(&connlock, flags); + + list_for_each_entry_safe(conn, tmp, &recovery_list, conn_list_err) { + uint32_t sid = iscsi_conn_get_sid(conn); + struct iscsi_cls_session *session; + + mutex_lock(&rx_queue_mutex); + + session = iscsi_session_lookup(sid); + if (session) { + if (system_state != SYSTEM_RUNNING) { + session->recovery_tmo = 0; + conn->transport->stop_conn(conn, + STOP_CONN_TERM); + } else { + conn->transport->stop_conn(conn, + STOP_CONN_RECOVER); + } + } + + list_del_init(&conn->conn_list_err); + + mutex_unlock(&rx_queue_mutex); + + /* we don't want to hold rx_queue_mutex for too long, + * for instance if many conns failed at the same time, + * since this stall other iscsi maintenance operations. + * Give other users a chance to proceed. + */ + cond_resched(); + } +} + void iscsi_conn_error_event(struct iscsi_cls_conn *conn, enum iscsi_err error) { struct nlmsghdr *nlh; @@ -2414,6 +2468,12 @@ void iscsi_conn_error_event(struct iscsi_cls_conn *conn, enum iscsi_err error) struct iscsi_uevent *ev; struct iscsi_internal *priv; int len = nlmsg_total_size(sizeof(*ev)); + unsigned long flags; + + spin_lock_irqsave(&connlock, flags); + list_add(&conn->conn_list_err, &connlist_err); + spin_unlock_irqrestore(&connlock, flags); + queue_work(system_unbound_wq, &stop_conn_work); priv = iscsi_if_transport_lookup(conn->transport); if (!priv) @@ -2743,11 +2803,19 @@ static int iscsi_if_destroy_conn(struct iscsi_transport *transport, struct iscsi_uevent *ev) { struct iscsi_cls_conn *conn; + unsigned long flags; conn = iscsi_conn_lookup(ev->u.d_conn.sid, ev->u.d_conn.cid); if (!conn) return -EINVAL; + spin_lock_irqsave(&connlock, flags); + if (!list_empty(&conn->conn_list_err)) { + spin_unlock_irqrestore(&connlock, flags); + return -EAGAIN; + } + spin_unlock_irqrestore(&connlock, flags); + ISCSI_DBG_TRANS_CONN(conn, "Destroying transport conn\n"); if (transport->destroy_conn) transport->destroy_conn(conn); diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h index b266d2a3bcb1..7de0bf6a9b60 100644 --- a/include/scsi/scsi_transport_iscsi.h +++ b/include/scsi/scsi_transport_iscsi.h @@ -203,6 +203,7 @@ extern void iscsi_ping_comp_event(uint32_t host_no, struct iscsi_cls_conn { struct list_head conn_list; /* item in connlist */ + struct list_head conn_list_err; /* item in connlist_err */ void *dd_data; /* LLD private data */ struct iscsi_transport *transport; uint32_t cid; /* connection id */ -- 2.25.1

1 0