October 2023 - Kernel - mailweb.openeuler.org

[PATCH OLK-5.10 v3 00/44] xfs: recent patches to fix xfs issues
by Long Li 08 Oct '23

08 Oct '23

Baokun Li (1): xfs: propagate the return value of xfs_log_force() to avoid soft lockup Colin Ian King (2): xfs: remove redundant initializations of pointers drop_leaf and save_leaf xfs: remove redundant pointer lip Darrick J. Wong (9): xfs: use setattr_copy to set vfs inode attributes xfs: remove kmem_zone typedef xfs: rename _zone variables to _cache xfs: compact deferred intent item structures xfs: create slab caches for frequently-used deferred items xfs: rename xfs_bmap_add_free to xfs_free_extent_later xfs: reduce the size of struct xfs_extent_free_item xfs: remove unused parameter from refcount code xfs: pass xfs_extent_free_item directly through the log intent code Dave Chinner (19): xfs: don't assert fail on perag references on teardown xfs: set prealloc flag in xfs_alloc_file_space() xfs: validity check agbnos on the AGFL xfs: validate block number being freed before adding to xefi xfs: don't reverse order of items in bulk AIL insertion xfs: use deferred frees for btree block freeing xfs: pass alloc flags through to xfs_extent_busy_flush() xfs: allow extent free intents to be retried xfs: don't block in busy flushing when freeing extents xfs: journal geometry is not properly bounds checked xfs: AGF length has never been bounds checked xfs: fix bounds check in xfs_defer_agfl_block() xfs: block reservation too large for minleft allocation xfs: punching delalloc extents on write failure is racy xfs: use byte ranges for write cleanup ranges xfs,iomap: move delalloc punching to iomap iomap: buffered write failure should not truncate the page cache xfs: xfs_bmap_punch_delalloc_range() should take a byte range xfs: fix off-by-one-block in xfs_discard_folio() Gaosheng Cui (1): xfs: remove xfs_setattr_time() declaration Guo Xuenan (1): xfs: set minleft correctly for randomly sparse inode allocations Jiapeng Chong (1): xfs: Remove redundant assignment to busy Long Li (6): xfs: fix dir3 block read verify fail during log recover Revert "xfs: propagate the return value of xfs_log_force() to avoid soft lockup" xfs: xfs_trans_cancel() path must check for log shutdown xfs: don't verify agf length when log recovery xfs: shutdown to ensure submits buffers on LSN boundaries xfs: update the last_sync_lsn with ctx start lsn yangerkun (4): xfs: keep growfs sb log item active until ail flush success xfs: fix xfs shutdown since we reserve more blocks in agfl fixup xfs: longest free extent no need consider postalloc xfs: shutdown xfs once inode double free fs/xfs/kmem.h | 4 - fs/xfs/libxfs/xfs_alloc.c | 390 +++++++++++++++++++++-------- fs/xfs/libxfs/xfs_alloc.h | 51 +++- fs/xfs/libxfs/xfs_alloc_btree.c | 2 +- fs/xfs/libxfs/xfs_attr_leaf.c | 2 - fs/xfs/libxfs/xfs_bmap.c | 90 +++---- fs/xfs/libxfs/xfs_bmap.h | 37 +-- fs/xfs/libxfs/xfs_bmap_btree.c | 27 +- fs/xfs/libxfs/xfs_btree.c | 4 +- fs/xfs/libxfs/xfs_btree.h | 2 +- fs/xfs/libxfs/xfs_da_btree.c | 6 +- fs/xfs/libxfs/xfs_da_btree.h | 3 +- fs/xfs/libxfs/xfs_defer.c | 70 +++++- fs/xfs/libxfs/xfs_defer.h | 3 + fs/xfs/libxfs/xfs_ialloc.c | 32 ++- fs/xfs/libxfs/xfs_ialloc_btree.c | 8 +- fs/xfs/libxfs/xfs_inode_fork.c | 4 +- fs/xfs/libxfs/xfs_inode_fork.h | 2 +- fs/xfs/libxfs/xfs_refcount.c | 56 +++-- fs/xfs/libxfs/xfs_refcount.h | 7 +- fs/xfs/libxfs/xfs_refcount_btree.c | 11 +- fs/xfs/libxfs/xfs_rmap.c | 21 +- fs/xfs/libxfs/xfs_rmap.h | 7 +- fs/xfs/libxfs/xfs_rmap_btree.c | 2 +- fs/xfs/libxfs/xfs_sb.c | 56 ++++- fs/xfs/libxfs/xfs_types.c | 23 ++ fs/xfs/libxfs/xfs_types.h | 2 + fs/xfs/xfs_aops.c | 32 +-- fs/xfs/xfs_bmap_item.c | 16 +- fs/xfs/xfs_bmap_item.h | 6 +- fs/xfs/xfs_bmap_util.c | 19 +- fs/xfs/xfs_bmap_util.h | 2 +- fs/xfs/xfs_buf.c | 16 +- fs/xfs/xfs_buf_item.c | 10 +- fs/xfs/xfs_buf_item.h | 11 +- fs/xfs/xfs_buf_item_recover.c | 9 +- fs/xfs/xfs_dquot.c | 26 +- fs/xfs/xfs_extent_busy.c | 36 ++- fs/xfs/xfs_extent_busy.h | 6 +- fs/xfs/xfs_extfree_item.c | 137 +++++++--- fs/xfs/xfs_extfree_item.h | 6 +- fs/xfs/xfs_file.c | 8 - fs/xfs/xfs_icache.c | 8 +- fs/xfs/xfs_icreate_item.c | 6 +- fs/xfs/xfs_icreate_item.h | 2 +- fs/xfs/xfs_inode.c | 2 +- fs/xfs/xfs_inode.h | 2 +- fs/xfs/xfs_inode_item.c | 6 +- fs/xfs/xfs_inode_item.h | 2 +- fs/xfs/xfs_iomap.c | 292 ++++++++++++++++++--- fs/xfs/xfs_iops.c | 56 +---- fs/xfs/xfs_iops.h | 1 - fs/xfs/xfs_log.c | 72 +++--- fs/xfs/xfs_log_priv.h | 2 +- fs/xfs/xfs_log_recover.c | 6 +- fs/xfs/xfs_mount.c | 12 +- fs/xfs/xfs_mru_cache.c | 2 +- fs/xfs/xfs_pnfs.c | 3 +- fs/xfs/xfs_qm.h | 2 +- fs/xfs/xfs_refcount_item.c | 16 +- fs/xfs/xfs_refcount_item.h | 6 +- fs/xfs/xfs_reflink.c | 7 +- fs/xfs/xfs_rmap_item.c | 16 +- fs/xfs/xfs_rmap_item.h | 6 +- fs/xfs/xfs_super.c | 233 ++++++++--------- fs/xfs/xfs_trans.c | 24 +- fs/xfs/xfs_trans.h | 2 +- fs/xfs/xfs_trans_ail.c | 5 +- fs/xfs/xfs_trans_dquot.c | 4 +- mm/filemap.c | 1 + 70 files changed, 1358 insertions(+), 700 deletions(-) -- 2.31.1

2 45

[PATCH openEuler-1.0-LTS] mm: memory-failure: use rcu lock instead of tasklist_lock when collect_procs()
by Tong Tiangen 08 Oct '23

08 Oct '23

mainline inclusion from mainline-v6.6-rc1 commit d256d1cd8da1cbc4615de69df71c87ce623fec2f category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I85WL9 CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?… -------------------------------- We found a softlock issue in our test, analyzed the logs, and found that the relevant CPU call trace as follows: CPU0: _do_fork -> copy_process() -> write_lock_irq(&tasklist_lock) //Disable irq,waiting for //tasklist_lock CPU1: wp_page_copy() ->pte_offset_map_lock() -> spin_lock(&page->ptl); //Hold page->ptl -> ptep_clear_flush() -> flush_tlb_others() ... -> smp_call_function_many() -> arch_send_call_function_ipi_mask() -> csd_lock_wait() //Waiting for other CPUs respond //IPI CPU2: collect_procs_anon() -> read_lock(&tasklist_lock) //Hold tasklist_lock ->for_each_process(tsk) -> page_mapped_in_vma() -> page_vma_mapped_walk() -> map_pte() ->spin_lock(&page->ptl) //Waiting for page->ptl We can see that CPU1 waiting for CPU0 respond IPI，CPU0 waiting for CPU2 unlock tasklist_lock, CPU2 waiting for CPU1 unlock page->ptl. As a result, softlockup is triggered. For collect_procs_anon(), what we're doing is task list iteration, during the iteration, with the help of call_rcu(), the task_struct object is freed only after one or more grace periods elapse. the logic as follows: release_task() -> __exit_signal() -> __unhash_process() -> list_del_rcu() -> put_task_struct_rcu_user() -> call_rcu(&task->rcu, delayed_put_task_struct) delayed_put_task_struct() -> put_task_struct() -> if (refcount_sub_and_test()) __put_task_struct() -> free_task() Therefore, under the protection of the rcu lock, we can safely use get_task_struct() to ensure a safe reference to task_struct during the iteration. By removing the use of tasklist_lock in task list iteration, we can break the softlock chain above. The same logic can also be applied to: - collect_procs_file() - collect_procs_fsdax() - collect_procs_ksm() Link: https://lkml.kernel.org/r/20230828022527.241693-1-tongtiangen@huawei.com Signed-off-by: Tong Tiangen <tongtiangen(a)huawei.com> Acked-by: Naoya Horiguchi <naoya.horiguchi(a)nec.com> Cc: Kefeng Wang <wangkefeng.wang(a)huawei.com> Cc: Matthew Wilcox (Oracle) <willy(a)infradead.org> Cc: Miaohe Lin <linmiaohe(a)huawei.com> Cc: Paul E. McKenney <paulmck(a)kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> Conflicts: mm/filemap.c mm/ksm.c mm/memory-failure.c Signed-off-by: Tong Tiangen <tongtiangen(a)huawei.com> --- mm/filemap.c | 3 --- mm/memory-failure.c | 12 ++++++------ 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/mm/filemap.c b/mm/filemap.c index f33504b784a2..5bf4645256cb 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -166,9 +166,6 @@ static void page_cache_kill(struct page *page) * bdi.wb->list_lock (zap_pte_range->set_page_dirty) * ->inode->i_lock (zap_pte_range->set_page_dirty) * ->private_lock (zap_pte_range->__set_page_dirty_buffers) - * - * ->i_mmap_rwsem - * ->tasklist_lock (memory_failure, collect_procs_ao) */ static int page_cache_tree_insert(struct address_space *mapping, diff --git a/mm/memory-failure.c b/mm/memory-failure.c index fff715a27253..8924d7d9bffa 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -427,8 +427,8 @@ static void kill_procs(struct list_head *to_kill, int forcekill, bool fail, * on behalf of the thread group. Return task_struct of the (first found) * dedicated thread if found, and return NULL otherwise. * - * We already hold read_lock(&tasklist_lock) in the caller, so we don't - * have to call rcu_read_lock/unlock() in this function. + * We already hold rcu lock in the caller, so we don't have to call + * rcu_read_lock/unlock() in this function. */ static struct task_struct *find_early_kill_thread(struct task_struct *tsk) { @@ -478,7 +478,7 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill, return; pgoff = page_to_pgoff(page); - read_lock(&tasklist_lock); + rcu_read_lock(); for_each_process (tsk) { struct anon_vma_chain *vmac; struct task_struct *t = task_early_kill(tsk, force_early); @@ -494,7 +494,7 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill, add_to_kill(t, page, vma, to_kill, tkc); } } - read_unlock(&tasklist_lock); + rcu_read_unlock(); page_unlock_anon_vma_read(av); } @@ -509,7 +509,7 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill, struct address_space *mapping = page->mapping; i_mmap_lock_read(mapping); - read_lock(&tasklist_lock); + rcu_read_lock(); for_each_process(tsk) { pgoff_t pgoff = page_to_pgoff(page); struct task_struct *t = task_early_kill(tsk, force_early); @@ -529,7 +529,7 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill, add_to_kill(t, page, vma, to_kill, tkc); } } - read_unlock(&tasklist_lock); + rcu_read_unlock(); i_mmap_unlock_read(mapping); } -- 2.25.1

2 1

[PATCH openEuler-1.0-LTS v2] x86/topology: Fix erroneous smp_num_siblings on Intel Hybrid platforms
by Yu Liao 08 Oct '23

08 Oct '23

From: Zhang Rui <rui.zhang(a)intel.com> stable inclusion from stable-v4.19.293 commit f5867e5b6a780710251318cb2047e24c49935e43 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I85XNK CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id… -------------------------------- [ Upstream commit edc0a2b5957652f4685ef3516f519f84807087db ] Traditionally, all CPUs in a system have identical numbers of SMT siblings. That changes with hybrid processors where some logical CPUs have a sibling and others have none. Today, the CPU boot code sets the global variable smp_num_siblings when every CPU thread is brought up. The last thread to boot will overwrite it with the number of siblings of *that* thread. That last thread to boot will "win". If the thread is a Pcore, smp_num_siblings == 2. If it is an Ecore, smp_num_siblings == 1. smp_num_siblings describes if the *system* supports SMT. It should specify the maximum number of SMT threads among all cores. Ensure that smp_num_siblings represents the system-wide maximum number of siblings by always increasing its value. Never allow it to decrease. On MeteorLake-P platform, this fixes a problem that the Ecore CPUs are not updated in any cpu sibling map because the system is treated as an UP system when probing Ecore CPUs. Below shows part of the CPU topology information before and after the fix, for both Pcore and Ecore CPU (cpu0 is Pcore, cpu 12 is Ecore). ... -/sys/devices/system/cpu/cpu0/topology/package_cpus:000fff -/sys/devices/system/cpu/cpu0/topology/package_cpus_list:0-11 +/sys/devices/system/cpu/cpu0/topology/package_cpus:3fffff +/sys/devices/system/cpu/cpu0/topology/package_cpus_list:0-21 ... -/sys/devices/system/cpu/cpu12/topology/package_cpus:001000 -/sys/devices/system/cpu/cpu12/topology/package_cpus_list:12 +/sys/devices/system/cpu/cpu12/topology/package_cpus:3fffff +/sys/devices/system/cpu/cpu12/topology/package_cpus_list:0-21 Notice that the "before" 'package_cpus_list' has only one CPU. This means that userspace tools like lscpu will see a little laptop like an 11-socket system: -Core(s) per socket: 1 -Socket(s): 11 +Core(s) per socket: 16 +Socket(s): 1 This is also expected to make the scheduler do rather wonky things too. [ dhansen: remove CPUID detail from changelog, add end user effects ] CC: stable(a)kernel.org Fixes: bbb65d2d365e ("x86: use cpuid vector 0xb when available for detecting cpu topology") Fixes: 95f3d39ccf7a ("x86/cpu/topology: Provide detect_extended_topology_early()") Suggested-by: Len Brown <len.brown(a)intel.com> Signed-off-by: Zhang Rui <rui.zhang(a)intel.com> Signed-off-by: Dave Hansen <dave.hansen(a)linux.intel.com> Acked-by: Peter Zijlstra (Intel) <peterz(a)infradead.org> Link: https://lore.kernel.org/all/20230323015640.27906-1-rui.zhang%40intel.com Signed-off-by: Sasha Levin <sashal(a)kernel.org> Conflicts: arch/x86/kernel/cpu/topology.c Signed-off-by: Yu Liao <liaoyu15(a)huawei.com> --- arch/x86/kernel/cpu/topology.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index 7c130bdfd746..5c15e1329fcb 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -77,7 +77,7 @@ int detect_extended_topology_early(struct cpuinfo_x86 *c) * initial apic id, which also represents 32-bit extended x2apic id. */ c->initial_apicid = edx; - smp_num_siblings = LEVEL_MAX_SIBLINGS(ebx); + smp_num_siblings = max_t(int, smp_num_siblings, LEVEL_MAX_SIBLINGS(ebx)); #endif return 0; } @@ -106,7 +106,8 @@ int detect_extended_topology(struct cpuinfo_x86 *c) */ cpuid_count(leaf, SMT_LEVEL, &eax, &ebx, &ecx, &edx); c->initial_apicid = edx; - core_level_siblings = smp_num_siblings = LEVEL_MAX_SIBLINGS(ebx); + core_level_siblings = LEVEL_MAX_SIBLINGS(ebx); + smp_num_siblings = max_t(int, smp_num_siblings, LEVEL_MAX_SIBLINGS(ebx)); core_plus_mask_width = ht_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); die_level_siblings = LEVEL_MAX_SIBLINGS(ebx); die_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); -- 2.25.1

2 1

[PATCH openEuler-1.0-LTS] x86/topology: Fix erroneous smp_num_siblings on Intel Hybrid platforms
by Yu Liao 07 Oct '23

07 Oct '23

From: Zhang Rui <rui.zhang(a)intel.com> stable inclusion from stable-v4.19.293 commit f5867e5b6a780710251318cb2047e24c49935e43 category: bugfix bugzilla: 188350 CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id… -------------------------------- [ Upstream commit edc0a2b5957652f4685ef3516f519f84807087db ] Traditionally, all CPUs in a system have identical numbers of SMT siblings. That changes with hybrid processors where some logical CPUs have a sibling and others have none. Today, the CPU boot code sets the global variable smp_num_siblings when every CPU thread is brought up. The last thread to boot will overwrite it with the number of siblings of *that* thread. That last thread to boot will "win". If the thread is a Pcore, smp_num_siblings == 2. If it is an Ecore, smp_num_siblings == 1. smp_num_siblings describes if the *system* supports SMT. It should specify the maximum number of SMT threads among all cores. Ensure that smp_num_siblings represents the system-wide maximum number of siblings by always increasing its value. Never allow it to decrease. On MeteorLake-P platform, this fixes a problem that the Ecore CPUs are not updated in any cpu sibling map because the system is treated as an UP system when probing Ecore CPUs. Below shows part of the CPU topology information before and after the fix, for both Pcore and Ecore CPU (cpu0 is Pcore, cpu 12 is Ecore). ... -/sys/devices/system/cpu/cpu0/topology/package_cpus:000fff -/sys/devices/system/cpu/cpu0/topology/package_cpus_list:0-11 +/sys/devices/system/cpu/cpu0/topology/package_cpus:3fffff +/sys/devices/system/cpu/cpu0/topology/package_cpus_list:0-21 ... -/sys/devices/system/cpu/cpu12/topology/package_cpus:001000 -/sys/devices/system/cpu/cpu12/topology/package_cpus_list:12 +/sys/devices/system/cpu/cpu12/topology/package_cpus:3fffff +/sys/devices/system/cpu/cpu12/topology/package_cpus_list:0-21 Notice that the "before" 'package_cpus_list' has only one CPU. This means that userspace tools like lscpu will see a little laptop like an 11-socket system: -Core(s) per socket: 1 -Socket(s): 11 +Core(s) per socket: 16 +Socket(s): 1 This is also expected to make the scheduler do rather wonky things too. [ dhansen: remove CPUID detail from changelog, add end user effects ] CC: stable(a)kernel.org Fixes: bbb65d2d365e ("x86: use cpuid vector 0xb when available for detecting cpu topology") Fixes: 95f3d39ccf7a ("x86/cpu/topology: Provide detect_extended_topology_early()") Suggested-by: Len Brown <len.brown(a)intel.com> Signed-off-by: Zhang Rui <rui.zhang(a)intel.com> Signed-off-by: Dave Hansen <dave.hansen(a)linux.intel.com> Acked-by: Peter Zijlstra (Intel) <peterz(a)infradead.org> Link: https://lore.kernel.org/all/20230323015640.27906-1-rui.zhang%40intel.com Signed-off-by: Sasha Levin <sashal(a)kernel.org> Conflicts: arch/x86/kernel/cpu/topology.c Signed-off-by: Yu Liao <liaoyu15(a)huawei.com> --- arch/x86/kernel/cpu/topology.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index 7c130bdfd746..5c15e1329fcb 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -77,7 +77,7 @@ int detect_extended_topology_early(struct cpuinfo_x86 *c) * initial apic id, which also represents 32-bit extended x2apic id. */ c->initial_apicid = edx; - smp_num_siblings = LEVEL_MAX_SIBLINGS(ebx); + smp_num_siblings = max_t(int, smp_num_siblings, LEVEL_MAX_SIBLINGS(ebx)); #endif return 0; } @@ -106,7 +106,8 @@ int detect_extended_topology(struct cpuinfo_x86 *c) */ cpuid_count(leaf, SMT_LEVEL, &eax, &ebx, &ecx, &edx); c->initial_apicid = edx; - core_level_siblings = smp_num_siblings = LEVEL_MAX_SIBLINGS(ebx); + core_level_siblings = LEVEL_MAX_SIBLINGS(ebx); + smp_num_siblings = max_t(int, smp_num_siblings, LEVEL_MAX_SIBLINGS(ebx)); core_plus_mask_width = ht_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); die_level_siblings = LEVEL_MAX_SIBLINGS(ebx); die_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); -- 2.25.1

2 1

[PATCH OLK-5.10 00/28] fix CVE-2023-20569
by Jialin Zhang 07 Oct '23

07 Oct '23

New architectural features and CPUID bits related to the Speculative Return Stack Overflow (SRSO) vulnerability. Arnaldo Carvalho de Melo (2): tools headers cpufeatures: Sync with the kernel sources tools arch x86: Sync the msr-index.h copy with the kernel sources Borislav Petkov (AMD) (9): x86/bugs: Increase the x86 bugs vector size to two u32s x86/srso: Add a Speculative RAS Overflow mitigation x86/srso: Add IBPB_BRTYPE support x86/srso: Add SRSO_NO support x86/srso: Add IBPB x86/srso: Add IBPB on VMEXIT x86/srso: Tie SBPB bit setting to microcode patch detection x86/srso: Disable the mitigation on unaffected configurations x86/srso: Correct the mitigation status when SMT is disabled Jialin Zhang (1): x86/cpufeatures: Fix abi breakage caused by NCAPINTS in cpufeature header file. Josh Poimboeuf (2): x86/srso: Fix return thunks in generated code objtool: Add frame-pointer-specific function ignore Kim Phillips (1): x86/cpu, kvm: Add support for CPUID_80000021_EAX Nick Desaulniers (1): x86/srso: Fix build breakage with the LLVM linker Nikolay Borisov (1): kabi: Allow extra bugsints (bsc#1213927). Peter Zijlstra (10): x86/ibt: Add ANNOTATE_NOENDBR x86/cpu: Fix __x86_return_thunk symbol type x86/cpu: Fix up srso_safe_ret() and __x86_return_thunk() x86/alternative: Make custom return thunk unconditional x86/cpu: Clean up SRSO return thunk mess x86/cpu: Rename original retbleed methods x86/cpu: Rename srso_(.*)_alias to srso_alias_\1 x86/cpu: Cleanup the untrain mess objtool/x86: Fixup frame-pointer vs rethunk objtool/x86: Fix SRSO mess Sean Christopherson (1): x86/retpoline: Don't clobber RFLAGS during srso_safe_ret() Documentation/admin-guide/hw-vuln/index.rst | 1 + Documentation/admin-guide/hw-vuln/srso.rst | 133 ++++++++++++ .../admin-guide/kernel-parameters.txt | 11 + arch/x86/Kconfig | 7 + arch/x86/include/asm/cpufeature.h | 23 +- arch/x86/include/asm/cpufeatures.h | 15 +- arch/x86/include/asm/msr-index.h | 1 + arch/x86/include/asm/nospec-branch.h | 34 ++- arch/x86/include/asm/processor.h | 7 +- arch/x86/kernel/alternative.c | 2 +- arch/x86/kernel/cpu/amd.c | 19 ++ arch/x86/kernel/cpu/bugs.c | 197 ++++++++++++++++++ arch/x86/kernel/cpu/common.c | 17 +- arch/x86/kernel/cpu/mkcapflags.sh | 2 +- arch/x86/kernel/cpu/proc.c | 2 +- arch/x86/kernel/cpu/scattered.c | 3 + arch/x86/kernel/vmlinux.lds.S | 38 +++- arch/x86/kvm/cpuid.c | 3 + arch/x86/kvm/svm/svm.c | 4 +- arch/x86/kvm/svm/vmenter.S | 3 + arch/x86/lib/retpoline.S | 135 ++++++++++-- drivers/base/cpu.c | 8 + include/linux/cpu.h | 2 + include/linux/objtool.h | 28 +++ tools/arch/x86/include/asm/cpufeatures.h | 16 +- tools/arch/x86/include/asm/msr-index.h | 1 + tools/include/linux/objtool.h | 28 +++ tools/objtool/arch.h | 1 + tools/objtool/arch/x86/decode.c | 6 + tools/objtool/check.c | 41 +++- tools/objtool/elf.h | 1 + .../perf/trace/beauty/tracepoints/x86_msr.sh | 2 +- 32 files changed, 740 insertions(+), 51 deletions(-) create mode 100644 Documentation/admin-guide/hw-vuln/srso.rst -- 2.25.1

3 30

[PATCH openEuler-1.0-LTS] ipv4: fix null-deref in ipv4_link_failure
by Lu Wei 07 Oct '23

07 Oct '23

From: Kyle Zeng <zengyhkyle(a)gmail.com> mainline inclusion from mainline-v6.6-rc3 commit 0113d9c9d1ccc07f5a3710dac4aa24b6d711278c category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/I85DZB CVE: CVE-2023-42754 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?… -------------------------------- Currently, we assume the skb is associated with a device before calling __ip_options_compile, which is not always the case if it is re-routed by ipvs. When skb->dev is NULL, dev_net(skb->dev) will become null-dereference. This patch adds a check for the edge case and switch to use the net_device from the rtable when skb->dev is NULL. Fixes: ed0de45a1008 ("ipv4: recompile ip options in ipv4_link_failure") Suggested-by: David Ahern <dsahern(a)kernel.org> Signed-off-by: Kyle Zeng <zengyhkyle(a)gmail.com> Cc: Stephen Suryaputra <ssuryaextr(a)gmail.com> Cc: Vadim Fedorenko <vfedorenko(a)novek.ru> Reviewed-by: David Ahern <dsahern(a)kernel.org> Signed-off-by: David S. Miller <davem(a)davemloft.net> Signed-off-by: Lu Wei <luwei32(a)huawei.com> --- net/ipv4/route.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 68f67bf99ed7..86096e2e43b0 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1212,6 +1212,7 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) static void ipv4_send_dest_unreach(struct sk_buff *skb) { + struct net_device *dev; struct ip_options opt; int res; @@ -1229,7 +1230,8 @@ static void ipv4_send_dest_unreach(struct sk_buff *skb) opt.optlen = ip_hdr(skb)->ihl * 4 - sizeof(struct iphdr); rcu_read_lock(); - res = __ip_options_compile(dev_net(skb->dev), &opt, skb, NULL); + dev = skb->dev ? skb->dev : skb_rtable(skb)->dst.dev; + res = __ip_options_compile(dev_net(dev), &opt, skb, NULL); rcu_read_unlock(); if (res) -- 2.34.1

2 1

[PATCH OLK-5.10 00/28] fix CVE-2023-20569
by Jialin Zhang 07 Oct '23

07 Oct '23

New architectural features and CPUID bits related to the Speculative Return Stack Overflow (SRSO) vulnerability. Arnaldo Carvalho de Melo (2): tools headers cpufeatures: Sync with the kernel sources tools arch x86: Sync the msr-index.h copy with the kernel sources Borislav Petkov (AMD) (9): x86/bugs: Increase the x86 bugs vector size to two u32s x86/srso: Add a Speculative RAS Overflow mitigation x86/srso: Add IBPB_BRTYPE support x86/srso: Add SRSO_NO support x86/srso: Add IBPB x86/srso: Add IBPB on VMEXIT x86/srso: Tie SBPB bit setting to microcode patch detection x86/srso: Disable the mitigation on unaffected configurations x86/srso: Correct the mitigation status when SMT is disabled Jialin Zhang (1): x86/cpufeatures: Fix abi breakage caused by NCAPINTS in cpufeature header file. Josh Poimboeuf (2): x86/srso: Fix return thunks in generated code objtool: Add frame-pointer-specific function ignore Kim Phillips (1): x86/cpu, kvm: Add support for CPUID_80000021_EAX Nick Desaulniers (1): x86/srso: Fix build breakage with the LLVM linker Nikolay Borisov (1): kabi: Allow extra bugsints (bsc#1213927). Peter Zijlstra (10): x86/ibt: Add ANNOTATE_NOENDBR x86/cpu: Fix __x86_return_thunk symbol type x86/cpu: Fix up srso_safe_ret() and __x86_return_thunk() x86/alternative: Make custom return thunk unconditional x86/cpu: Clean up SRSO return thunk mess x86/cpu: Rename original retbleed methods x86/cpu: Rename srso_(.*)_alias to srso_alias_\1 x86/cpu: Cleanup the untrain mess objtool/x86: Fixup frame-pointer vs rethunk objtool/x86: Fix SRSO mess Sean Christopherson (1): x86/retpoline: Don't clobber RFLAGS during srso_safe_ret() Documentation/admin-guide/hw-vuln/index.rst | 1 + Documentation/admin-guide/hw-vuln/srso.rst | 133 ++++++++++++ .../admin-guide/kernel-parameters.txt | 11 + arch/x86/Kconfig | 7 + arch/x86/include/asm/cpufeature.h | 23 +- arch/x86/include/asm/cpufeatures.h | 15 +- arch/x86/include/asm/msr-index.h | 1 + arch/x86/include/asm/nospec-branch.h | 34 ++- arch/x86/include/asm/processor.h | 7 +- arch/x86/kernel/alternative.c | 2 +- arch/x86/kernel/cpu/amd.c | 19 ++ arch/x86/kernel/cpu/bugs.c | 197 ++++++++++++++++++ arch/x86/kernel/cpu/common.c | 17 +- arch/x86/kernel/cpu/mkcapflags.sh | 2 +- arch/x86/kernel/cpu/proc.c | 2 +- arch/x86/kernel/cpu/scattered.c | 3 + arch/x86/kernel/vmlinux.lds.S | 38 +++- arch/x86/kvm/cpuid.c | 3 + arch/x86/kvm/svm/svm.c | 4 +- arch/x86/kvm/svm/vmenter.S | 3 + arch/x86/lib/retpoline.S | 135 ++++++++++-- drivers/base/cpu.c | 8 + include/linux/cpu.h | 2 + include/linux/objtool.h | 28 +++ tools/arch/x86/include/asm/cpufeatures.h | 16 +- tools/arch/x86/include/asm/msr-index.h | 1 + tools/include/linux/objtool.h | 28 +++ tools/objtool/arch.h | 1 + tools/objtool/arch/x86/decode.c | 6 + tools/objtool/check.c | 41 +++- tools/objtool/elf.h | 1 + .../perf/trace/beauty/tracepoints/x86_msr.sh | 2 +- 32 files changed, 740 insertions(+), 51 deletions(-) create mode 100644 Documentation/admin-guide/hw-vuln/srso.rst -- 2.25.1

1 28

[PATCH OLK-5.10] net/sched: Retire rsvp classifier
by Dong Chenchen 06 Oct '23

06 Oct '23

From: Jamal Hadi Salim <jhs(a)mojatatu.com> stable inclusion from stable-v5.10.196 commit 8db844077ec9912d75952c80d76da71fc2412852 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/I84B2W CVE: CVE-2023-42755 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/patch/?id=… -------------------------------- commit 265b4da82dbf5df04bee5a5d46b7474b1aaf326a upstream. The rsvp classifier has served us well for about a quarter of a century but has has not been getting much maintenance attention due to lack of known users. Signed-off-by: Jamal Hadi Salim <jhs(a)mojatatu.com> Acked-by: Jiri Pirko <jiri(a)nvidia.com> Signed-off-by: Paolo Abeni <pabeni(a)redhat.com> Signed-off-by: Kyle Zeng <zengyhkyle(a)gmail.com> Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> Signed-off-by: Dong Chenchen <dongchenchen2(a)huawei.com> --- net/sched/Kconfig | 28 -- net/sched/Makefile | 2 - net/sched/cls_rsvp.c | 24 -- net/sched/cls_rsvp.h | 777 ------------------------------------------ net/sched/cls_rsvp6.c | 24 -- 5 files changed, 855 deletions(-) delete mode 100644 net/sched/cls_rsvp.c delete mode 100644 net/sched/cls_rsvp.h delete mode 100644 net/sched/cls_rsvp6.c diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 697522371914..2046c16b29f0 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -548,34 +548,6 @@ config CLS_U32_MARK help Say Y here to be able to use netfilter marks as u32 key. -config NET_CLS_RSVP - tristate "IPv4 Resource Reservation Protocol (RSVP)" - select NET_CLS - help - The Resource Reservation Protocol (RSVP) permits end systems to - request a minimum and maximum data flow rate for a connection; this - is important for real time data such as streaming sound or video. - - Say Y here if you want to be able to classify outgoing packets based - on their RSVP requests. - - To compile this code as a module, choose M here: the - module will be called cls_rsvp. - -config NET_CLS_RSVP6 - tristate "IPv6 Resource Reservation Protocol (RSVP6)" - select NET_CLS - help - The Resource Reservation Protocol (RSVP) permits end systems to - request a minimum and maximum data flow rate for a connection; this - is important for real time data such as streaming sound or video. - - Say Y here if you want to be able to classify outgoing packets based - on their RSVP requests and you are using the IPv6 protocol. - - To compile this code as a module, choose M here: the - module will be called cls_rsvp6. - config NET_CLS_FLOW tristate "Flow classifier" select NET_CLS diff --git a/net/sched/Makefile b/net/sched/Makefile index 4311fdb21119..df2bcd785f7d 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -68,8 +68,6 @@ obj-$(CONFIG_NET_SCH_TAPRIO) += sch_taprio.o obj-$(CONFIG_NET_CLS_U32) += cls_u32.o obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o obj-$(CONFIG_NET_CLS_FW) += cls_fw.o -obj-$(CONFIG_NET_CLS_RSVP) += cls_rsvp.o -obj-$(CONFIG_NET_CLS_RSVP6) += cls_rsvp6.o obj-$(CONFIG_NET_CLS_BASIC) += cls_basic.o obj-$(CONFIG_NET_CLS_FLOW) += cls_flow.o obj-$(CONFIG_NET_CLS_CGROUP) += cls_cgroup.o diff --git a/net/sched/cls_rsvp.c b/net/sched/cls_rsvp.c deleted file mode 100644 index de1c1d4da597..000000000000 --- a/net/sched/cls_rsvp.c +++ /dev/null @@ -1,24 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * net/sched/cls_rsvp.c Special RSVP packet classifier for IPv4. - * - * Authors: Alexey Kuznetsov, <kuznet(a)ms2.inr.ac.ru> - */ - -#include <linux/module.h> -#include <linux/types.h> -#include <linux/kernel.h> -#include <linux/string.h> -#include <linux/errno.h> -#include <linux/skbuff.h> -#include <net/ip.h> -#include <net/netlink.h> -#include <net/act_api.h> -#include <net/pkt_cls.h> - -#define RSVP_DST_LEN 1 -#define RSVP_ID "rsvp" -#define RSVP_OPS cls_rsvp_ops - -#include "cls_rsvp.h" -MODULE_LICENSE("GPL"); diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h deleted file mode 100644 index d36949d9382c..000000000000 --- a/net/sched/cls_rsvp.h +++ /dev/null @@ -1,777 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * net/sched/cls_rsvp.h Template file for RSVPv[46] classifiers. - * - * Authors: Alexey Kuznetsov, <kuznet(a)ms2.inr.ac.ru> - */ - -/* - Comparing to general packet classification problem, - RSVP needs only sevaral relatively simple rules: - - * (dst, protocol) are always specified, - so that we are able to hash them. - * src may be exact, or may be wildcard, so that - we can keep a hash table plus one wildcard entry. - * source port (or flow label) is important only if src is given. - - IMPLEMENTATION. - - We use a two level hash table: The top level is keyed by - destination address and protocol ID, every bucket contains a list - of "rsvp sessions", identified by destination address, protocol and - DPI(="Destination Port ID"): triple (key, mask, offset). - - Every bucket has a smaller hash table keyed by source address - (cf. RSVP flowspec) and one wildcard entry for wildcard reservations. - Every bucket is again a list of "RSVP flows", selected by - source address and SPI(="Source Port ID" here rather than - "security parameter index"): triple (key, mask, offset). - - - NOTE 1. All the packets with IPv6 extension headers (but AH and ESP) - and all fragmented packets go to the best-effort traffic class. - - - NOTE 2. Two "port id"'s seems to be redundant, rfc2207 requires - only one "Generalized Port Identifier". So that for classic - ah, esp (and udp,tcp) both *pi should coincide or one of them - should be wildcard. - - At first sight, this redundancy is just a waste of CPU - resources. But DPI and SPI add the possibility to assign different - priorities to GPIs. Look also at note 4 about tunnels below. - - - NOTE 3. One complication is the case of tunneled packets. - We implement it as following: if the first lookup - matches a special session with "tunnelhdr" value not zero, - flowid doesn't contain the true flow ID, but the tunnel ID (1...255). - In this case, we pull tunnelhdr bytes and restart lookup - with tunnel ID added to the list of keys. Simple and stupid 8)8) - It's enough for PIMREG and IPIP. - - - NOTE 4. Two GPIs make it possible to parse even GRE packets. - F.e. DPI can select ETH_P_IP (and necessary flags to make - tunnelhdr correct) in GRE protocol field and SPI matches - GRE key. Is it not nice? 8)8) - - - Well, as result, despite its simplicity, we get a pretty - powerful classification engine. */ - - -struct rsvp_head { - u32 tmap[256/32]; - u32 hgenerator; - u8 tgenerator; - struct rsvp_session __rcu *ht[256]; - struct rcu_head rcu; -}; - -struct rsvp_session { - struct rsvp_session __rcu *next; - __be32 dst[RSVP_DST_LEN]; - struct tc_rsvp_gpi dpi; - u8 protocol; - u8 tunnelid; - /* 16 (src,sport) hash slots, and one wildcard source slot */ - struct rsvp_filter __rcu *ht[16 + 1]; - struct rcu_head rcu; -}; - - -struct rsvp_filter { - struct rsvp_filter __rcu *next; - __be32 src[RSVP_DST_LEN]; - struct tc_rsvp_gpi spi; - u8 tunnelhdr; - - struct tcf_result res; - struct tcf_exts exts; - - u32 handle; - struct rsvp_session *sess; - struct rcu_work rwork; -}; - -static inline unsigned int hash_dst(__be32 *dst, u8 protocol, u8 tunnelid) -{ - unsigned int h = (__force __u32)dst[RSVP_DST_LEN - 1]; - - h ^= h>>16; - h ^= h>>8; - return (h ^ protocol ^ tunnelid) & 0xFF; -} - -static inline unsigned int hash_src(__be32 *src) -{ - unsigned int h = (__force __u32)src[RSVP_DST_LEN-1]; - - h ^= h>>16; - h ^= h>>8; - h ^= h>>4; - return h & 0xF; -} - -#define RSVP_APPLY_RESULT() \ -{ \ - int r = tcf_exts_exec(skb, &f->exts, res); \ - if (r < 0) \ - continue; \ - else if (r > 0) \ - return r; \ -} - -static int rsvp_classify(struct sk_buff *skb, const struct tcf_proto *tp, - struct tcf_result *res) -{ - struct rsvp_head *head = rcu_dereference_bh(tp->root); - struct rsvp_session *s; - struct rsvp_filter *f; - unsigned int h1, h2; - __be32 *dst, *src; - u8 protocol; - u8 tunnelid = 0; - u8 *xprt; -#if RSVP_DST_LEN == 4 - struct ipv6hdr *nhptr; - - if (!pskb_network_may_pull(skb, sizeof(*nhptr))) - return -1; - nhptr = ipv6_hdr(skb); -#else - struct iphdr *nhptr; - - if (!pskb_network_may_pull(skb, sizeof(*nhptr))) - return -1; - nhptr = ip_hdr(skb); -#endif -restart: - -#if RSVP_DST_LEN == 4 - src = &nhptr->saddr.s6_addr32[0]; - dst = &nhptr->daddr.s6_addr32[0]; - protocol = nhptr->nexthdr; - xprt = ((u8 *)nhptr) + sizeof(struct ipv6hdr); -#else - src = &nhptr->saddr; - dst = &nhptr->daddr; - protocol = nhptr->protocol; - xprt = ((u8 *)nhptr) + (nhptr->ihl<<2); - if (ip_is_fragment(nhptr)) - return -1; -#endif - - h1 = hash_dst(dst, protocol, tunnelid); - h2 = hash_src(src); - - for (s = rcu_dereference_bh(head->ht[h1]); s; - s = rcu_dereference_bh(s->next)) { - if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN - 1] && - protocol == s->protocol && - !(s->dpi.mask & - (*(u32 *)(xprt + s->dpi.offset) ^ s->dpi.key)) && -#if RSVP_DST_LEN == 4 - dst[0] == s->dst[0] && - dst[1] == s->dst[1] && - dst[2] == s->dst[2] && -#endif - tunnelid == s->tunnelid) { - - for (f = rcu_dereference_bh(s->ht[h2]); f; - f = rcu_dereference_bh(f->next)) { - if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN - 1] && - !(f->spi.mask & (*(u32 *)(xprt + f->spi.offset) ^ f->spi.key)) -#if RSVP_DST_LEN == 4 - && - src[0] == f->src[0] && - src[1] == f->src[1] && - src[2] == f->src[2] -#endif - ) { - *res = f->res; - RSVP_APPLY_RESULT(); - -matched: - if (f->tunnelhdr == 0) - return 0; - - tunnelid = f->res.classid; - nhptr = (void *)(xprt + f->tunnelhdr - sizeof(*nhptr)); - goto restart; - } - } - - /* And wildcard bucket... */ - for (f = rcu_dereference_bh(s->ht[16]); f; - f = rcu_dereference_bh(f->next)) { - *res = f->res; - RSVP_APPLY_RESULT(); - goto matched; - } - return -1; - } - } - return -1; -} - -static void rsvp_replace(struct tcf_proto *tp, struct rsvp_filter *n, u32 h) -{ - struct rsvp_head *head = rtnl_dereference(tp->root); - struct rsvp_session *s; - struct rsvp_filter __rcu **ins; - struct rsvp_filter *pins; - unsigned int h1 = h & 0xFF; - unsigned int h2 = (h >> 8) & 0xFF; - - for (s = rtnl_dereference(head->ht[h1]); s; - s = rtnl_dereference(s->next)) { - for (ins = &s->ht[h2], pins = rtnl_dereference(*ins); ; - ins = &pins->next, pins = rtnl_dereference(*ins)) { - if (pins->handle == h) { - RCU_INIT_POINTER(n->next, pins->next); - rcu_assign_pointer(*ins, n); - return; - } - } - } - - /* Something went wrong if we are trying to replace a non-existant - * node. Mind as well halt instead of silently failing. - */ - BUG_ON(1); -} - -static void *rsvp_get(struct tcf_proto *tp, u32 handle) -{ - struct rsvp_head *head = rtnl_dereference(tp->root); - struct rsvp_session *s; - struct rsvp_filter *f; - unsigned int h1 = handle & 0xFF; - unsigned int h2 = (handle >> 8) & 0xFF; - - if (h2 > 16) - return NULL; - - for (s = rtnl_dereference(head->ht[h1]); s; - s = rtnl_dereference(s->next)) { - for (f = rtnl_dereference(s->ht[h2]); f; - f = rtnl_dereference(f->next)) { - if (f->handle == handle) - return f; - } - } - return NULL; -} - -static int rsvp_init(struct tcf_proto *tp) -{ - struct rsvp_head *data; - - data = kzalloc(sizeof(struct rsvp_head), GFP_KERNEL); - if (data) { - rcu_assign_pointer(tp->root, data); - return 0; - } - return -ENOBUFS; -} - -static void __rsvp_delete_filter(struct rsvp_filter *f) -{ - tcf_exts_destroy(&f->exts); - tcf_exts_put_net(&f->exts); - kfree(f); -} - -static void rsvp_delete_filter_work(struct work_struct *work) -{ - struct rsvp_filter *f = container_of(to_rcu_work(work), - struct rsvp_filter, - rwork); - rtnl_lock(); - __rsvp_delete_filter(f); - rtnl_unlock(); -} - -static void rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f) -{ - tcf_unbind_filter(tp, &f->res); - /* all classifiers are required to call tcf_exts_destroy() after rcu - * grace period, since converted-to-rcu actions are relying on that - * in cleanup() callback - */ - if (tcf_exts_get_net(&f->exts)) - tcf_queue_work(&f->rwork, rsvp_delete_filter_work); - else - __rsvp_delete_filter(f); -} - -static void rsvp_destroy(struct tcf_proto *tp, bool rtnl_held, - struct netlink_ext_ack *extack) -{ - struct rsvp_head *data = rtnl_dereference(tp->root); - int h1, h2; - - if (data == NULL) - return; - - for (h1 = 0; h1 < 256; h1++) { - struct rsvp_session *s; - - while ((s = rtnl_dereference(data->ht[h1])) != NULL) { - RCU_INIT_POINTER(data->ht[h1], s->next); - - for (h2 = 0; h2 <= 16; h2++) { - struct rsvp_filter *f; - - while ((f = rtnl_dereference(s->ht[h2])) != NULL) { - rcu_assign_pointer(s->ht[h2], f->next); - rsvp_delete_filter(tp, f); - } - } - kfree_rcu(s, rcu); - } - } - kfree_rcu(data, rcu); -} - -static int rsvp_delete(struct tcf_proto *tp, void *arg, bool *last, - bool rtnl_held, struct netlink_ext_ack *extack) -{ - struct rsvp_head *head = rtnl_dereference(tp->root); - struct rsvp_filter *nfp, *f = arg; - struct rsvp_filter __rcu **fp; - unsigned int h = f->handle; - struct rsvp_session __rcu **sp; - struct rsvp_session *nsp, *s = f->sess; - int i, h1; - - fp = &s->ht[(h >> 8) & 0xFF]; - for (nfp = rtnl_dereference(*fp); nfp; - fp = &nfp->next, nfp = rtnl_dereference(*fp)) { - if (nfp == f) { - RCU_INIT_POINTER(*fp, f->next); - rsvp_delete_filter(tp, f); - - /* Strip tree */ - - for (i = 0; i <= 16; i++) - if (s->ht[i]) - goto out; - - /* OK, session has no flows */ - sp = &head->ht[h & 0xFF]; - for (nsp = rtnl_dereference(*sp); nsp; - sp = &nsp->next, nsp = rtnl_dereference(*sp)) { - if (nsp == s) { - RCU_INIT_POINTER(*sp, s->next); - kfree_rcu(s, rcu); - goto out; - } - } - - break; - } - } - -out: - *last = true; - for (h1 = 0; h1 < 256; h1++) { - if (rcu_access_pointer(head->ht[h1])) { - *last = false; - break; - } - } - - return 0; -} - -static unsigned int gen_handle(struct tcf_proto *tp, unsigned salt) -{ - struct rsvp_head *data = rtnl_dereference(tp->root); - int i = 0xFFFF; - - while (i-- > 0) { - u32 h; - - if ((data->hgenerator += 0x10000) == 0) - data->hgenerator = 0x10000; - h = data->hgenerator|salt; - if (!rsvp_get(tp, h)) - return h; - } - return 0; -} - -static int tunnel_bts(struct rsvp_head *data) -{ - int n = data->tgenerator >> 5; - u32 b = 1 << (data->tgenerator & 0x1F); - - if (data->tmap[n] & b) - return 0; - data->tmap[n] |= b; - return 1; -} - -static void tunnel_recycle(struct rsvp_head *data) -{ - struct rsvp_session __rcu **sht = data->ht; - u32 tmap[256/32]; - int h1, h2; - - memset(tmap, 0, sizeof(tmap)); - - for (h1 = 0; h1 < 256; h1++) { - struct rsvp_session *s; - for (s = rtnl_dereference(sht[h1]); s; - s = rtnl_dereference(s->next)) { - for (h2 = 0; h2 <= 16; h2++) { - struct rsvp_filter *f; - - for (f = rtnl_dereference(s->ht[h2]); f; - f = rtnl_dereference(f->next)) { - if (f->tunnelhdr == 0) - continue; - data->tgenerator = f->res.classid; - tunnel_bts(data); - } - } - } - } - - memcpy(data->tmap, tmap, sizeof(tmap)); -} - -static u32 gen_tunnel(struct rsvp_head *data) -{ - int i, k; - - for (k = 0; k < 2; k++) { - for (i = 255; i > 0; i--) { - if (++data->tgenerator == 0) - data->tgenerator = 1; - if (tunnel_bts(data)) - return data->tgenerator; - } - tunnel_recycle(data); - } - return 0; -} - -static const struct nla_policy rsvp_policy[TCA_RSVP_MAX + 1] = { - [TCA_RSVP_CLASSID] = { .type = NLA_U32 }, - [TCA_RSVP_DST] = { .len = RSVP_DST_LEN * sizeof(u32) }, - [TCA_RSVP_SRC] = { .len = RSVP_DST_LEN * sizeof(u32) }, - [TCA_RSVP_PINFO] = { .len = sizeof(struct tc_rsvp_pinfo) }, -}; - -static int rsvp_change(struct net *net, struct sk_buff *in_skb, - struct tcf_proto *tp, unsigned long base, - u32 handle, - struct nlattr **tca, - void **arg, bool ovr, bool rtnl_held, - struct netlink_ext_ack *extack) -{ - struct rsvp_head *data = rtnl_dereference(tp->root); - struct rsvp_filter *f, *nfp; - struct rsvp_filter __rcu **fp; - struct rsvp_session *nsp, *s; - struct rsvp_session __rcu **sp; - struct tc_rsvp_pinfo *pinfo = NULL; - struct nlattr *opt = tca[TCA_OPTIONS]; - struct nlattr *tb[TCA_RSVP_MAX + 1]; - struct tcf_exts e; - unsigned int h1, h2; - __be32 *dst; - int err; - - if (opt == NULL) - return handle ? -EINVAL : 0; - - err = nla_parse_nested_deprecated(tb, TCA_RSVP_MAX, opt, rsvp_policy, - NULL); - if (err < 0) - return err; - - err = tcf_exts_init(&e, net, TCA_RSVP_ACT, TCA_RSVP_POLICE); - if (err < 0) - return err; - err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr, true, - extack); - if (err < 0) - goto errout2; - - f = *arg; - if (f) { - /* Node exists: adjust only classid */ - struct rsvp_filter *n; - - if (f->handle != handle && handle) - goto errout2; - - n = kmemdup(f, sizeof(*f), GFP_KERNEL); - if (!n) { - err = -ENOMEM; - goto errout2; - } - - err = tcf_exts_init(&n->exts, net, TCA_RSVP_ACT, - TCA_RSVP_POLICE); - if (err < 0) { - kfree(n); - goto errout2; - } - - if (tb[TCA_RSVP_CLASSID]) { - n->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]); - tcf_bind_filter(tp, &n->res, base); - } - - tcf_exts_change(&n->exts, &e); - rsvp_replace(tp, n, handle); - return 0; - } - - /* Now more serious part... */ - err = -EINVAL; - if (handle) - goto errout2; - if (tb[TCA_RSVP_DST] == NULL) - goto errout2; - - err = -ENOBUFS; - f = kzalloc(sizeof(struct rsvp_filter), GFP_KERNEL); - if (f == NULL) - goto errout2; - - err = tcf_exts_init(&f->exts, net, TCA_RSVP_ACT, TCA_RSVP_POLICE); - if (err < 0) - goto errout; - h2 = 16; - if (tb[TCA_RSVP_SRC]) { - memcpy(f->src, nla_data(tb[TCA_RSVP_SRC]), sizeof(f->src)); - h2 = hash_src(f->src); - } - if (tb[TCA_RSVP_PINFO]) { - pinfo = nla_data(tb[TCA_RSVP_PINFO]); - f->spi = pinfo->spi; - f->tunnelhdr = pinfo->tunnelhdr; - } - if (tb[TCA_RSVP_CLASSID]) - f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]); - - dst = nla_data(tb[TCA_RSVP_DST]); - h1 = hash_dst(dst, pinfo ? pinfo->protocol : 0, pinfo ? pinfo->tunnelid : 0); - - err = -ENOMEM; - if ((f->handle = gen_handle(tp, h1 | (h2<<8))) == 0) - goto errout; - - if (f->tunnelhdr) { - err = -EINVAL; - if (f->res.classid > 255) - goto errout; - - err = -ENOMEM; - if (f->res.classid == 0 && - (f->res.classid = gen_tunnel(data)) == 0) - goto errout; - } - - for (sp = &data->ht[h1]; - (s = rtnl_dereference(*sp)) != NULL; - sp = &s->next) { - if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] && - pinfo && pinfo->protocol == s->protocol && - memcmp(&pinfo->dpi, &s->dpi, sizeof(s->dpi)) == 0 && -#if RSVP_DST_LEN == 4 - dst[0] == s->dst[0] && - dst[1] == s->dst[1] && - dst[2] == s->dst[2] && -#endif - pinfo->tunnelid == s->tunnelid) { - -insert: - /* OK, we found appropriate session */ - - fp = &s->ht[h2]; - - f->sess = s; - if (f->tunnelhdr == 0) - tcf_bind_filter(tp, &f->res, base); - - tcf_exts_change(&f->exts, &e); - - fp = &s->ht[h2]; - for (nfp = rtnl_dereference(*fp); nfp; - fp = &nfp->next, nfp = rtnl_dereference(*fp)) { - __u32 mask = nfp->spi.mask & f->spi.mask; - - if (mask != f->spi.mask) - break; - } - RCU_INIT_POINTER(f->next, nfp); - rcu_assign_pointer(*fp, f); - - *arg = f; - return 0; - } - } - - /* No session found. Create new one. */ - - err = -ENOBUFS; - s = kzalloc(sizeof(struct rsvp_session), GFP_KERNEL); - if (s == NULL) - goto errout; - memcpy(s->dst, dst, sizeof(s->dst)); - - if (pinfo) { - s->dpi = pinfo->dpi; - s->protocol = pinfo->protocol; - s->tunnelid = pinfo->tunnelid; - } - sp = &data->ht[h1]; - for (nsp = rtnl_dereference(*sp); nsp; - sp = &nsp->next, nsp = rtnl_dereference(*sp)) { - if ((nsp->dpi.mask & s->dpi.mask) != s->dpi.mask) - break; - } - RCU_INIT_POINTER(s->next, nsp); - rcu_assign_pointer(*sp, s); - - goto insert; - -errout: - tcf_exts_destroy(&f->exts); - kfree(f); -errout2: - tcf_exts_destroy(&e); - return err; -} - -static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg, - bool rtnl_held) -{ - struct rsvp_head *head = rtnl_dereference(tp->root); - unsigned int h, h1; - - if (arg->stop) - return; - - for (h = 0; h < 256; h++) { - struct rsvp_session *s; - - for (s = rtnl_dereference(head->ht[h]); s; - s = rtnl_dereference(s->next)) { - for (h1 = 0; h1 <= 16; h1++) { - struct rsvp_filter *f; - - for (f = rtnl_dereference(s->ht[h1]); f; - f = rtnl_dereference(f->next)) { - if (arg->count < arg->skip) { - arg->count++; - continue; - } - if (arg->fn(tp, f, arg) < 0) { - arg->stop = 1; - return; - } - arg->count++; - } - } - } - } -} - -static int rsvp_dump(struct net *net, struct tcf_proto *tp, void *fh, - struct sk_buff *skb, struct tcmsg *t, bool rtnl_held) -{ - struct rsvp_filter *f = fh; - struct rsvp_session *s; - struct nlattr *nest; - struct tc_rsvp_pinfo pinfo; - - if (f == NULL) - return skb->len; - s = f->sess; - - t->tcm_handle = f->handle; - - nest = nla_nest_start_noflag(skb, TCA_OPTIONS); - if (nest == NULL) - goto nla_put_failure; - - if (nla_put(skb, TCA_RSVP_DST, sizeof(s->dst), &s->dst)) - goto nla_put_failure; - pinfo.dpi = s->dpi; - pinfo.spi = f->spi; - pinfo.protocol = s->protocol; - pinfo.tunnelid = s->tunnelid; - pinfo.tunnelhdr = f->tunnelhdr; - pinfo.pad = 0; - if (nla_put(skb, TCA_RSVP_PINFO, sizeof(pinfo), &pinfo)) - goto nla_put_failure; - if (f->res.classid && - nla_put_u32(skb, TCA_RSVP_CLASSID, f->res.classid)) - goto nla_put_failure; - if (((f->handle >> 8) & 0xFF) != 16 && - nla_put(skb, TCA_RSVP_SRC, sizeof(f->src), f->src)) - goto nla_put_failure; - - if (tcf_exts_dump(skb, &f->exts) < 0) - goto nla_put_failure; - - nla_nest_end(skb, nest); - - if (tcf_exts_dump_stats(skb, &f->exts) < 0) - goto nla_put_failure; - return skb->len; - -nla_put_failure: - nla_nest_cancel(skb, nest); - return -1; -} - -static void rsvp_bind_class(void *fh, u32 classid, unsigned long cl, void *q, - unsigned long base) -{ - struct rsvp_filter *f = fh; - - if (f && f->res.classid == classid) { - if (cl) - __tcf_bind_filter(q, &f->res, base); - else - __tcf_unbind_filter(q, &f->res); - } -} - -static struct tcf_proto_ops RSVP_OPS __read_mostly = { - .kind = RSVP_ID, - .classify = rsvp_classify, - .init = rsvp_init, - .destroy = rsvp_destroy, - .get = rsvp_get, - .change = rsvp_change, - .delete = rsvp_delete, - .walk = rsvp_walk, - .dump = rsvp_dump, - .bind_class = rsvp_bind_class, - .owner = THIS_MODULE, -}; - -static int __init init_rsvp(void) -{ - return register_tcf_proto_ops(&RSVP_OPS); -} - -static void __exit exit_rsvp(void) -{ - unregister_tcf_proto_ops(&RSVP_OPS); -} - -module_init(init_rsvp) -module_exit(exit_rsvp) diff --git a/net/sched/cls_rsvp6.c b/net/sched/cls_rsvp6.c deleted file mode 100644 index 64078846000e..000000000000 --- a/net/sched/cls_rsvp6.c +++ /dev/null @@ -1,24 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * net/sched/cls_rsvp6.c Special RSVP packet classifier for IPv6. - * - * Authors: Alexey Kuznetsov, <kuznet(a)ms2.inr.ac.ru> - */ - -#include <linux/module.h> -#include <linux/types.h> -#include <linux/kernel.h> -#include <linux/string.h> -#include <linux/errno.h> -#include <linux/ipv6.h> -#include <linux/skbuff.h> -#include <net/act_api.h> -#include <net/pkt_cls.h> -#include <net/netlink.h> - -#define RSVP_DST_LEN 4 -#define RSVP_ID "rsvp6" -#define RSVP_OPS cls_rsvp6_ops - -#include "cls_rsvp.h" -MODULE_LICENSE("GPL"); -- 2.25.1

2 1

[PATCH openEuler-1.0-LTS] net/sched: Retire rsvp classifier
by Dong Chenchen 06 Oct '23

06 Oct '23

From: Jamal Hadi Salim <jhs(a)mojatatu.com> stable inclusion from stable-v4.19.294 commit 6ca0ea6a46e7a2d70fb1b1f6a886efe2b2365e16 category: bugfix bugzilla: 189257, https://gitee.com/src-openeuler/kernel/issues/I84B2W CVE: CVE-2023-42755 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/patch/?id=… -------------------------------- commit 265b4da82dbf5df04bee5a5d46b7474b1aaf326a upstream. The rsvp classifier has served us well for about a quarter of a century but has has not been getting much maintenance attention due to lack of known users. Signed-off-by: Jamal Hadi Salim <jhs(a)mojatatu.com> Acked-by: Jiri Pirko <jiri(a)nvidia.com> Signed-off-by: Paolo Abeni <pabeni(a)redhat.com> Signed-off-by: Kyle Zeng <zengyhkyle(a)gmail.com> Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> Signed-off-by: Dong Chenchen <dongchenchen2(a)huawei.com> --- net/sched/Kconfig | 28 -- net/sched/Makefile | 2 - net/sched/cls_rsvp.c | 28 -- net/sched/cls_rsvp.h | 770 ------------------------------------------ net/sched/cls_rsvp6.c | 28 -- 5 files changed, 856 deletions(-) delete mode 100644 net/sched/cls_rsvp.c delete mode 100644 net/sched/cls_rsvp.h delete mode 100644 net/sched/cls_rsvp6.c diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 4547022ed7f4..7698a8974a47 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -503,34 +503,6 @@ config CLS_U32_MARK ---help--- Say Y here to be able to use netfilter marks as u32 key. -config NET_CLS_RSVP - tristate "IPv4 Resource Reservation Protocol (RSVP)" - select NET_CLS - ---help--- - The Resource Reservation Protocol (RSVP) permits end systems to - request a minimum and maximum data flow rate for a connection; this - is important for real time data such as streaming sound or video. - - Say Y here if you want to be able to classify outgoing packets based - on their RSVP requests. - - To compile this code as a module, choose M here: the - module will be called cls_rsvp. - -config NET_CLS_RSVP6 - tristate "IPv6 Resource Reservation Protocol (RSVP6)" - select NET_CLS - ---help--- - The Resource Reservation Protocol (RSVP) permits end systems to - request a minimum and maximum data flow rate for a connection; this - is important for real time data such as streaming sound or video. - - Say Y here if you want to be able to classify outgoing packets based - on their RSVP requests and you are using the IPv6 protocol. - - To compile this code as a module, choose M here: the - module will be called cls_rsvp6. - config NET_CLS_FLOW tristate "Flow classifier" select NET_CLS diff --git a/net/sched/Makefile b/net/sched/Makefile index 5eed580cdb42..3139c32e1947 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -61,8 +61,6 @@ obj-$(CONFIG_NET_SCH_ETF) += sch_etf.o obj-$(CONFIG_NET_CLS_U32) += cls_u32.o obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o obj-$(CONFIG_NET_CLS_FW) += cls_fw.o -obj-$(CONFIG_NET_CLS_RSVP) += cls_rsvp.o -obj-$(CONFIG_NET_CLS_RSVP6) += cls_rsvp6.o obj-$(CONFIG_NET_CLS_BASIC) += cls_basic.o obj-$(CONFIG_NET_CLS_FLOW) += cls_flow.o obj-$(CONFIG_NET_CLS_CGROUP) += cls_cgroup.o diff --git a/net/sched/cls_rsvp.c b/net/sched/cls_rsvp.c deleted file mode 100644 index cbb5e0d600f3..000000000000 --- a/net/sched/cls_rsvp.c +++ /dev/null @@ -1,28 +0,0 @@ -/* - * net/sched/cls_rsvp.c Special RSVP packet classifier for IPv4. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * Authors: Alexey Kuznetsov, <kuznet(a)ms2.inr.ac.ru> - */ - -#include <linux/module.h> -#include <linux/types.h> -#include <linux/kernel.h> -#include <linux/string.h> -#include <linux/errno.h> -#include <linux/skbuff.h> -#include <net/ip.h> -#include <net/netlink.h> -#include <net/act_api.h> -#include <net/pkt_cls.h> - -#define RSVP_DST_LEN 1 -#define RSVP_ID "rsvp" -#define RSVP_OPS cls_rsvp_ops - -#include "cls_rsvp.h" -MODULE_LICENSE("GPL"); diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h deleted file mode 100644 index a1e9f7cbcffc..000000000000 --- a/net/sched/cls_rsvp.h +++ /dev/null @@ -1,770 +0,0 @@ -/* - * net/sched/cls_rsvp.h Template file for RSVPv[46] classifiers. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * Authors: Alexey Kuznetsov, <kuznet(a)ms2.inr.ac.ru> - */ - -/* - Comparing to general packet classification problem, - RSVP needs only sevaral relatively simple rules: - - * (dst, protocol) are always specified, - so that we are able to hash them. - * src may be exact, or may be wildcard, so that - we can keep a hash table plus one wildcard entry. - * source port (or flow label) is important only if src is given. - - IMPLEMENTATION. - - We use a two level hash table: The top level is keyed by - destination address and protocol ID, every bucket contains a list - of "rsvp sessions", identified by destination address, protocol and - DPI(="Destination Port ID"): triple (key, mask, offset). - - Every bucket has a smaller hash table keyed by source address - (cf. RSVP flowspec) and one wildcard entry for wildcard reservations. - Every bucket is again a list of "RSVP flows", selected by - source address and SPI(="Source Port ID" here rather than - "security parameter index"): triple (key, mask, offset). - - - NOTE 1. All the packets with IPv6 extension headers (but AH and ESP) - and all fragmented packets go to the best-effort traffic class. - - - NOTE 2. Two "port id"'s seems to be redundant, rfc2207 requires - only one "Generalized Port Identifier". So that for classic - ah, esp (and udp,tcp) both *pi should coincide or one of them - should be wildcard. - - At first sight, this redundancy is just a waste of CPU - resources. But DPI and SPI add the possibility to assign different - priorities to GPIs. Look also at note 4 about tunnels below. - - - NOTE 3. One complication is the case of tunneled packets. - We implement it as following: if the first lookup - matches a special session with "tunnelhdr" value not zero, - flowid doesn't contain the true flow ID, but the tunnel ID (1...255). - In this case, we pull tunnelhdr bytes and restart lookup - with tunnel ID added to the list of keys. Simple and stupid 8)8) - It's enough for PIMREG and IPIP. - - - NOTE 4. Two GPIs make it possible to parse even GRE packets. - F.e. DPI can select ETH_P_IP (and necessary flags to make - tunnelhdr correct) in GRE protocol field and SPI matches - GRE key. Is it not nice? 8)8) - - - Well, as result, despite its simplicity, we get a pretty - powerful classification engine. */ - - -struct rsvp_head { - u32 tmap[256/32]; - u32 hgenerator; - u8 tgenerator; - struct rsvp_session __rcu *ht[256]; - struct rcu_head rcu; -}; - -struct rsvp_session { - struct rsvp_session __rcu *next; - __be32 dst[RSVP_DST_LEN]; - struct tc_rsvp_gpi dpi; - u8 protocol; - u8 tunnelid; - /* 16 (src,sport) hash slots, and one wildcard source slot */ - struct rsvp_filter __rcu *ht[16 + 1]; - struct rcu_head rcu; -}; - - -struct rsvp_filter { - struct rsvp_filter __rcu *next; - __be32 src[RSVP_DST_LEN]; - struct tc_rsvp_gpi spi; - u8 tunnelhdr; - - struct tcf_result res; - struct tcf_exts exts; - - u32 handle; - struct rsvp_session *sess; - struct rcu_work rwork; -}; - -static inline unsigned int hash_dst(__be32 *dst, u8 protocol, u8 tunnelid) -{ - unsigned int h = (__force __u32)dst[RSVP_DST_LEN - 1]; - - h ^= h>>16; - h ^= h>>8; - return (h ^ protocol ^ tunnelid) & 0xFF; -} - -static inline unsigned int hash_src(__be32 *src) -{ - unsigned int h = (__force __u32)src[RSVP_DST_LEN-1]; - - h ^= h>>16; - h ^= h>>8; - h ^= h>>4; - return h & 0xF; -} - -#define RSVP_APPLY_RESULT() \ -{ \ - int r = tcf_exts_exec(skb, &f->exts, res); \ - if (r < 0) \ - continue; \ - else if (r > 0) \ - return r; \ -} - -static int rsvp_classify(struct sk_buff *skb, const struct tcf_proto *tp, - struct tcf_result *res) -{ - struct rsvp_head *head = rcu_dereference_bh(tp->root); - struct rsvp_session *s; - struct rsvp_filter *f; - unsigned int h1, h2; - __be32 *dst, *src; - u8 protocol; - u8 tunnelid = 0; - u8 *xprt; -#if RSVP_DST_LEN == 4 - struct ipv6hdr *nhptr; - - if (!pskb_network_may_pull(skb, sizeof(*nhptr))) - return -1; - nhptr = ipv6_hdr(skb); -#else - struct iphdr *nhptr; - - if (!pskb_network_may_pull(skb, sizeof(*nhptr))) - return -1; - nhptr = ip_hdr(skb); -#endif -restart: - -#if RSVP_DST_LEN == 4 - src = &nhptr->saddr.s6_addr32[0]; - dst = &nhptr->daddr.s6_addr32[0]; - protocol = nhptr->nexthdr; - xprt = ((u8 *)nhptr) + sizeof(struct ipv6hdr); -#else - src = &nhptr->saddr; - dst = &nhptr->daddr; - protocol = nhptr->protocol; - xprt = ((u8 *)nhptr) + (nhptr->ihl<<2); - if (ip_is_fragment(nhptr)) - return -1; -#endif - - h1 = hash_dst(dst, protocol, tunnelid); - h2 = hash_src(src); - - for (s = rcu_dereference_bh(head->ht[h1]); s; - s = rcu_dereference_bh(s->next)) { - if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN - 1] && - protocol == s->protocol && - !(s->dpi.mask & - (*(u32 *)(xprt + s->dpi.offset) ^ s->dpi.key)) && -#if RSVP_DST_LEN == 4 - dst[0] == s->dst[0] && - dst[1] == s->dst[1] && - dst[2] == s->dst[2] && -#endif - tunnelid == s->tunnelid) { - - for (f = rcu_dereference_bh(s->ht[h2]); f; - f = rcu_dereference_bh(f->next)) { - if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN - 1] && - !(f->spi.mask & (*(u32 *)(xprt + f->spi.offset) ^ f->spi.key)) -#if RSVP_DST_LEN == 4 - && - src[0] == f->src[0] && - src[1] == f->src[1] && - src[2] == f->src[2] -#endif - ) { - *res = f->res; - RSVP_APPLY_RESULT(); - -matched: - if (f->tunnelhdr == 0) - return 0; - - tunnelid = f->res.classid; - nhptr = (void *)(xprt + f->tunnelhdr - sizeof(*nhptr)); - goto restart; - } - } - - /* And wildcard bucket... */ - for (f = rcu_dereference_bh(s->ht[16]); f; - f = rcu_dereference_bh(f->next)) { - *res = f->res; - RSVP_APPLY_RESULT(); - goto matched; - } - return -1; - } - } - return -1; -} - -static void rsvp_replace(struct tcf_proto *tp, struct rsvp_filter *n, u32 h) -{ - struct rsvp_head *head = rtnl_dereference(tp->root); - struct rsvp_session *s; - struct rsvp_filter __rcu **ins; - struct rsvp_filter *pins; - unsigned int h1 = h & 0xFF; - unsigned int h2 = (h >> 8) & 0xFF; - - for (s = rtnl_dereference(head->ht[h1]); s; - s = rtnl_dereference(s->next)) { - for (ins = &s->ht[h2], pins = rtnl_dereference(*ins); ; - ins = &pins->next, pins = rtnl_dereference(*ins)) { - if (pins->handle == h) { - RCU_INIT_POINTER(n->next, pins->next); - rcu_assign_pointer(*ins, n); - return; - } - } - } - - /* Something went wrong if we are trying to replace a non-existant - * node. Mind as well halt instead of silently failing. - */ - BUG_ON(1); -} - -static void *rsvp_get(struct tcf_proto *tp, u32 handle) -{ - struct rsvp_head *head = rtnl_dereference(tp->root); - struct rsvp_session *s; - struct rsvp_filter *f; - unsigned int h1 = handle & 0xFF; - unsigned int h2 = (handle >> 8) & 0xFF; - - if (h2 > 16) - return NULL; - - for (s = rtnl_dereference(head->ht[h1]); s; - s = rtnl_dereference(s->next)) { - for (f = rtnl_dereference(s->ht[h2]); f; - f = rtnl_dereference(f->next)) { - if (f->handle == handle) - return f; - } - } - return NULL; -} - -static int rsvp_init(struct tcf_proto *tp) -{ - struct rsvp_head *data; - - data = kzalloc(sizeof(struct rsvp_head), GFP_KERNEL); - if (data) { - rcu_assign_pointer(tp->root, data); - return 0; - } - return -ENOBUFS; -} - -static void __rsvp_delete_filter(struct rsvp_filter *f) -{ - tcf_exts_destroy(&f->exts); - tcf_exts_put_net(&f->exts); - kfree(f); -} - -static void rsvp_delete_filter_work(struct work_struct *work) -{ - struct rsvp_filter *f = container_of(to_rcu_work(work), - struct rsvp_filter, - rwork); - rtnl_lock(); - __rsvp_delete_filter(f); - rtnl_unlock(); -} - -static void rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f) -{ - tcf_unbind_filter(tp, &f->res); - /* all classifiers are required to call tcf_exts_destroy() after rcu - * grace period, since converted-to-rcu actions are relying on that - * in cleanup() callback - */ - if (tcf_exts_get_net(&f->exts)) - tcf_queue_work(&f->rwork, rsvp_delete_filter_work); - else - __rsvp_delete_filter(f); -} - -static void rsvp_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) -{ - struct rsvp_head *data = rtnl_dereference(tp->root); - int h1, h2; - - if (data == NULL) - return; - - for (h1 = 0; h1 < 256; h1++) { - struct rsvp_session *s; - - while ((s = rtnl_dereference(data->ht[h1])) != NULL) { - RCU_INIT_POINTER(data->ht[h1], s->next); - - for (h2 = 0; h2 <= 16; h2++) { - struct rsvp_filter *f; - - while ((f = rtnl_dereference(s->ht[h2])) != NULL) { - rcu_assign_pointer(s->ht[h2], f->next); - rsvp_delete_filter(tp, f); - } - } - kfree_rcu(s, rcu); - } - } - kfree_rcu(data, rcu); -} - -static int rsvp_delete(struct tcf_proto *tp, void *arg, bool *last, - struct netlink_ext_ack *extack) -{ - struct rsvp_head *head = rtnl_dereference(tp->root); - struct rsvp_filter *nfp, *f = arg; - struct rsvp_filter __rcu **fp; - unsigned int h = f->handle; - struct rsvp_session __rcu **sp; - struct rsvp_session *nsp, *s = f->sess; - int i, h1; - - fp = &s->ht[(h >> 8) & 0xFF]; - for (nfp = rtnl_dereference(*fp); nfp; - fp = &nfp->next, nfp = rtnl_dereference(*fp)) { - if (nfp == f) { - RCU_INIT_POINTER(*fp, f->next); - rsvp_delete_filter(tp, f); - - /* Strip tree */ - - for (i = 0; i <= 16; i++) - if (s->ht[i]) - goto out; - - /* OK, session has no flows */ - sp = &head->ht[h & 0xFF]; - for (nsp = rtnl_dereference(*sp); nsp; - sp = &nsp->next, nsp = rtnl_dereference(*sp)) { - if (nsp == s) { - RCU_INIT_POINTER(*sp, s->next); - kfree_rcu(s, rcu); - goto out; - } - } - - break; - } - } - -out: - *last = true; - for (h1 = 0; h1 < 256; h1++) { - if (rcu_access_pointer(head->ht[h1])) { - *last = false; - break; - } - } - - return 0; -} - -static unsigned int gen_handle(struct tcf_proto *tp, unsigned salt) -{ - struct rsvp_head *data = rtnl_dereference(tp->root); - int i = 0xFFFF; - - while (i-- > 0) { - u32 h; - - if ((data->hgenerator += 0x10000) == 0) - data->hgenerator = 0x10000; - h = data->hgenerator|salt; - if (!rsvp_get(tp, h)) - return h; - } - return 0; -} - -static int tunnel_bts(struct rsvp_head *data) -{ - int n = data->tgenerator >> 5; - u32 b = 1 << (data->tgenerator & 0x1F); - - if (data->tmap[n] & b) - return 0; - data->tmap[n] |= b; - return 1; -} - -static void tunnel_recycle(struct rsvp_head *data) -{ - struct rsvp_session __rcu **sht = data->ht; - u32 tmap[256/32]; - int h1, h2; - - memset(tmap, 0, sizeof(tmap)); - - for (h1 = 0; h1 < 256; h1++) { - struct rsvp_session *s; - for (s = rtnl_dereference(sht[h1]); s; - s = rtnl_dereference(s->next)) { - for (h2 = 0; h2 <= 16; h2++) { - struct rsvp_filter *f; - - for (f = rtnl_dereference(s->ht[h2]); f; - f = rtnl_dereference(f->next)) { - if (f->tunnelhdr == 0) - continue; - data->tgenerator = f->res.classid; - tunnel_bts(data); - } - } - } - } - - memcpy(data->tmap, tmap, sizeof(tmap)); -} - -static u32 gen_tunnel(struct rsvp_head *data) -{ - int i, k; - - for (k = 0; k < 2; k++) { - for (i = 255; i > 0; i--) { - if (++data->tgenerator == 0) - data->tgenerator = 1; - if (tunnel_bts(data)) - return data->tgenerator; - } - tunnel_recycle(data); - } - return 0; -} - -static const struct nla_policy rsvp_policy[TCA_RSVP_MAX + 1] = { - [TCA_RSVP_CLASSID] = { .type = NLA_U32 }, - [TCA_RSVP_DST] = { .len = RSVP_DST_LEN * sizeof(u32) }, - [TCA_RSVP_SRC] = { .len = RSVP_DST_LEN * sizeof(u32) }, - [TCA_RSVP_PINFO] = { .len = sizeof(struct tc_rsvp_pinfo) }, -}; - -static int rsvp_change(struct net *net, struct sk_buff *in_skb, - struct tcf_proto *tp, unsigned long base, - u32 handle, - struct nlattr **tca, - void **arg, bool ovr, struct netlink_ext_ack *extack) -{ - struct rsvp_head *data = rtnl_dereference(tp->root); - struct rsvp_filter *f, *nfp; - struct rsvp_filter __rcu **fp; - struct rsvp_session *nsp, *s; - struct rsvp_session __rcu **sp; - struct tc_rsvp_pinfo *pinfo = NULL; - struct nlattr *opt = tca[TCA_OPTIONS]; - struct nlattr *tb[TCA_RSVP_MAX + 1]; - struct tcf_exts e; - unsigned int h1, h2; - __be32 *dst; - int err; - - if (opt == NULL) - return handle ? -EINVAL : 0; - - err = nla_parse_nested(tb, TCA_RSVP_MAX, opt, rsvp_policy, NULL); - if (err < 0) - return err; - - err = tcf_exts_init(&e, TCA_RSVP_ACT, TCA_RSVP_POLICE); - if (err < 0) - return err; - err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr, extack); - if (err < 0) - goto errout2; - - f = *arg; - if (f) { - /* Node exists: adjust only classid */ - struct rsvp_filter *n; - - if (f->handle != handle && handle) - goto errout2; - - n = kmemdup(f, sizeof(*f), GFP_KERNEL); - if (!n) { - err = -ENOMEM; - goto errout2; - } - - err = tcf_exts_init(&n->exts, TCA_RSVP_ACT, TCA_RSVP_POLICE); - if (err < 0) { - kfree(n); - goto errout2; - } - - if (tb[TCA_RSVP_CLASSID]) { - n->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]); - tcf_bind_filter(tp, &n->res, base); - } - - tcf_exts_change(&n->exts, &e); - rsvp_replace(tp, n, handle); - return 0; - } - - /* Now more serious part... */ - err = -EINVAL; - if (handle) - goto errout2; - if (tb[TCA_RSVP_DST] == NULL) - goto errout2; - - err = -ENOBUFS; - f = kzalloc(sizeof(struct rsvp_filter), GFP_KERNEL); - if (f == NULL) - goto errout2; - - err = tcf_exts_init(&f->exts, TCA_RSVP_ACT, TCA_RSVP_POLICE); - if (err < 0) - goto errout; - h2 = 16; - if (tb[TCA_RSVP_SRC]) { - memcpy(f->src, nla_data(tb[TCA_RSVP_SRC]), sizeof(f->src)); - h2 = hash_src(f->src); - } - if (tb[TCA_RSVP_PINFO]) { - pinfo = nla_data(tb[TCA_RSVP_PINFO]); - f->spi = pinfo->spi; - f->tunnelhdr = pinfo->tunnelhdr; - } - if (tb[TCA_RSVP_CLASSID]) - f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]); - - dst = nla_data(tb[TCA_RSVP_DST]); - h1 = hash_dst(dst, pinfo ? pinfo->protocol : 0, pinfo ? pinfo->tunnelid : 0); - - err = -ENOMEM; - if ((f->handle = gen_handle(tp, h1 | (h2<<8))) == 0) - goto errout; - - if (f->tunnelhdr) { - err = -EINVAL; - if (f->res.classid > 255) - goto errout; - - err = -ENOMEM; - if (f->res.classid == 0 && - (f->res.classid = gen_tunnel(data)) == 0) - goto errout; - } - - for (sp = &data->ht[h1]; - (s = rtnl_dereference(*sp)) != NULL; - sp = &s->next) { - if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] && - pinfo && pinfo->protocol == s->protocol && - memcmp(&pinfo->dpi, &s->dpi, sizeof(s->dpi)) == 0 && -#if RSVP_DST_LEN == 4 - dst[0] == s->dst[0] && - dst[1] == s->dst[1] && - dst[2] == s->dst[2] && -#endif - pinfo->tunnelid == s->tunnelid) { - -insert: - /* OK, we found appropriate session */ - - fp = &s->ht[h2]; - - f->sess = s; - if (f->tunnelhdr == 0) - tcf_bind_filter(tp, &f->res, base); - - tcf_exts_change(&f->exts, &e); - - fp = &s->ht[h2]; - for (nfp = rtnl_dereference(*fp); nfp; - fp = &nfp->next, nfp = rtnl_dereference(*fp)) { - __u32 mask = nfp->spi.mask & f->spi.mask; - - if (mask != f->spi.mask) - break; - } - RCU_INIT_POINTER(f->next, nfp); - rcu_assign_pointer(*fp, f); - - *arg = f; - return 0; - } - } - - /* No session found. Create new one. */ - - err = -ENOBUFS; - s = kzalloc(sizeof(struct rsvp_session), GFP_KERNEL); - if (s == NULL) - goto errout; - memcpy(s->dst, dst, sizeof(s->dst)); - - if (pinfo) { - s->dpi = pinfo->dpi; - s->protocol = pinfo->protocol; - s->tunnelid = pinfo->tunnelid; - } - sp = &data->ht[h1]; - for (nsp = rtnl_dereference(*sp); nsp; - sp = &nsp->next, nsp = rtnl_dereference(*sp)) { - if ((nsp->dpi.mask & s->dpi.mask) != s->dpi.mask) - break; - } - RCU_INIT_POINTER(s->next, nsp); - rcu_assign_pointer(*sp, s); - - goto insert; - -errout: - tcf_exts_destroy(&f->exts); - kfree(f); -errout2: - tcf_exts_destroy(&e); - return err; -} - -static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg) -{ - struct rsvp_head *head = rtnl_dereference(tp->root); - unsigned int h, h1; - - if (arg->stop) - return; - - for (h = 0; h < 256; h++) { - struct rsvp_session *s; - - for (s = rtnl_dereference(head->ht[h]); s; - s = rtnl_dereference(s->next)) { - for (h1 = 0; h1 <= 16; h1++) { - struct rsvp_filter *f; - - for (f = rtnl_dereference(s->ht[h1]); f; - f = rtnl_dereference(f->next)) { - if (arg->count < arg->skip) { - arg->count++; - continue; - } - if (arg->fn(tp, f, arg) < 0) { - arg->stop = 1; - return; - } - arg->count++; - } - } - } - } -} - -static int rsvp_dump(struct net *net, struct tcf_proto *tp, void *fh, - struct sk_buff *skb, struct tcmsg *t) -{ - struct rsvp_filter *f = fh; - struct rsvp_session *s; - struct nlattr *nest; - struct tc_rsvp_pinfo pinfo; - - if (f == NULL) - return skb->len; - s = f->sess; - - t->tcm_handle = f->handle; - - nest = nla_nest_start(skb, TCA_OPTIONS); - if (nest == NULL) - goto nla_put_failure; - - if (nla_put(skb, TCA_RSVP_DST, sizeof(s->dst), &s->dst)) - goto nla_put_failure; - pinfo.dpi = s->dpi; - pinfo.spi = f->spi; - pinfo.protocol = s->protocol; - pinfo.tunnelid = s->tunnelid; - pinfo.tunnelhdr = f->tunnelhdr; - pinfo.pad = 0; - if (nla_put(skb, TCA_RSVP_PINFO, sizeof(pinfo), &pinfo)) - goto nla_put_failure; - if (f->res.classid && - nla_put_u32(skb, TCA_RSVP_CLASSID, f->res.classid)) - goto nla_put_failure; - if (((f->handle >> 8) & 0xFF) != 16 && - nla_put(skb, TCA_RSVP_SRC, sizeof(f->src), f->src)) - goto nla_put_failure; - - if (tcf_exts_dump(skb, &f->exts) < 0) - goto nla_put_failure; - - nla_nest_end(skb, nest); - - if (tcf_exts_dump_stats(skb, &f->exts) < 0) - goto nla_put_failure; - return skb->len; - -nla_put_failure: - nla_nest_cancel(skb, nest); - return -1; -} - -static void rsvp_bind_class(void *fh, u32 classid, unsigned long cl) -{ - struct rsvp_filter *f = fh; - - if (f && f->res.classid == classid) - f->res.class = cl; -} - -static struct tcf_proto_ops RSVP_OPS __read_mostly = { - .kind = RSVP_ID, - .classify = rsvp_classify, - .init = rsvp_init, - .destroy = rsvp_destroy, - .get = rsvp_get, - .change = rsvp_change, - .delete = rsvp_delete, - .walk = rsvp_walk, - .dump = rsvp_dump, - .bind_class = rsvp_bind_class, - .owner = THIS_MODULE, -}; - -static int __init init_rsvp(void) -{ - return register_tcf_proto_ops(&RSVP_OPS); -} - -static void __exit exit_rsvp(void) -{ - unregister_tcf_proto_ops(&RSVP_OPS); -} - -module_init(init_rsvp) -module_exit(exit_rsvp) diff --git a/net/sched/cls_rsvp6.c b/net/sched/cls_rsvp6.c deleted file mode 100644 index dd08aea2aee5..000000000000 --- a/net/sched/cls_rsvp6.c +++ /dev/null @@ -1,28 +0,0 @@ -/* - * net/sched/cls_rsvp6.c Special RSVP packet classifier for IPv6. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * Authors: Alexey Kuznetsov, <kuznet(a)ms2.inr.ac.ru> - */ - -#include <linux/module.h> -#include <linux/types.h> -#include <linux/kernel.h> -#include <linux/string.h> -#include <linux/errno.h> -#include <linux/ipv6.h> -#include <linux/skbuff.h> -#include <net/act_api.h> -#include <net/pkt_cls.h> -#include <net/netlink.h> - -#define RSVP_DST_LEN 4 -#define RSVP_ID "rsvp6" -#define RSVP_OPS cls_rsvp6_ops - -#include "cls_rsvp.h" -MODULE_LICENSE("GPL"); -- 2.25.1

2 1

[PATCH 07/15] Complete the developing of hikptool ub_dfx function
by veega2022 06 Oct '23

06 Oct '23

The hikptool ub_dfx command is submitted for the first time. This command can be used to dump register information of the LRB, PFA, and PM modules. Signed-off-by: Jianqiang Li <lijianqiang16(a)huawei.com> --- libhikptdev/include/hikptdev_plug.h | 1 + net/hikp_net_lib.h | 9 + net/ub/ub_dfx/hikp_ub_dfx.c | 319 ++++++++++++++++++++++++++++ net/ub/ub_dfx/hikp_ub_dfx.h | 100 +++++++++ tool_lib/tool_lib.h | 2 +- 5 files changed, 430 insertions(+), 1 deletion(-) create mode 100644 net/ub/ub_dfx/hikp_ub_dfx.c create mode 100644 net/ub/ub_dfx/hikp_ub_dfx.h diff --git a/libhikptdev/include/hikptdev_plug.h b/libhikptdev/include/hikptdev_plug.h index 42bea6b..56cea78 100644 --- a/libhikptdev/include/hikptdev_plug.h +++ b/libhikptdev/include/hikptdev_plug.h @@ -43,6 +43,7 @@ enum cmd_module_type { MAC_MOD = 8, DPDK_MOD = 9, CXL_MOD = 10, + UB_MOD = 11, }; void hikp_unlock(void); diff --git a/net/hikp_net_lib.h b/net/hikp_net_lib.h index cc99d0c..af0a51d 100644 --- a/net/hikp_net_lib.h +++ b/net/hikp_net_lib.h @@ -98,6 +98,15 @@ enum roce_cmd_type { GET_ROCEE_TSP_CMD, }; +enum ub_cmd_type { + GET_UNIC_PPP_CMD = 0x1, + GET_UB_DFX_INFO_CMD, + GET_UB_LINK_INFO_CMD, + GET_UB_BP_INFO_CMD, + GET_UB_CRD_INFO_CMD, + GET_UB_BASIC_INFO_CMD, +}; + #define HIKP_MAX_PF_NUM 8 #define HIKP_NIC_MAX_FUNC_NUM 256 diff --git a/net/ub/ub_dfx/hikp_ub_dfx.c b/net/ub/ub_dfx/hikp_ub_dfx.c new file mode 100644 index 0000000..c50f555 --- /dev/null +++ b/net/ub/ub_dfx/hikp_ub_dfx.c @@ -0,0 +1,319 @@ +/* + * Copyright (c) 2023 Hisilicon Technologies Co., Ltd. + * Hikptool is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * http://license.coscl.org.cn/MulanPSL2 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * + * See the Mulan PSL v2 for more details. + */ + +#include "tool_cmd.h" +#include "hikp_net_lib.h" +#include "hikp_ub_dfx.h" + +struct ub_dfx_param g_ub_dfx_param = { 0 }; + +static const struct dfx_module_cmd g_ub_dfx_module_parse[] = { + {"LRB", LRB_DFX_REG_DUMP}, + {"PFA", PFA_DFX_REG_DUMP}, + {"PM", PM_DFX_REG_DUMP} +}; + +static const struct dfx_type_parse g_dfx_type_parse[] = { + {INCORRECT_REG_TYPE, WIDTH_32_BIT, "INCORRECT TYPE"}, + {TYPE_32_STATS, WIDTH_32_BIT, "32 bit statistics"}, + {TYPE_32_RUNNING_STATUS, WIDTH_32_BIT, "32 bit running status"}, + {TYPE_64_STATS, WIDTH_64_BIT, "64 bit statistics"}, +}; + +static void dfx_help_info(const struct major_cmd_ctrl *self) +{ + printf("\n Usage: %s %s\n", self->cmd_ptr->name, "-i <interface>\n"); + printf("\n %s\n", self->cmd_ptr->help_info); + printf(" Options:\n\n"); + printf(" %s, %-25s %s\n", "-h", "--help", "display this help and exit"); + printf(" %s, %-25s %s\n", "-i", "--interface=<interface>", + "device target or bdf id, e.g. ubn0 or 0000:35:00.0"); + printf(" %s\n", " [-m/--module LRB/PFA/PM] : this is necessary param\n"); +} + +static int hikp_ub_dfx_help(struct major_cmd_ctrl *self, const char *argv) +{ + dfx_help_info(self); + return 0; +} + +static int hikp_ub_dfx_target(struct major_cmd_ctrl *self, const char *argv) +{ + self->err_no = tool_check_and_get_valid_bdf_id(argv, &(g_ub_dfx_param.target)); + if (self->err_no != 0) { + snprintf(self->err_str, sizeof(self->err_str), "Unknown device %s.", argv); + return self->err_no; + } + + return 0; +} + +static int hikp_ub_dfx_module_select(struct major_cmd_ctrl *self, const char *argv) +{ + size_t arr_size = HIKP_ARRAY_SIZE(g_ub_dfx_module_parse); + bool is_found; + size_t i; + + for (i = 0; i < arr_size; i++) { + is_found = strncmp(argv, (const char *)g_ub_dfx_module_parse[i].module_name, + sizeof(g_ub_dfx_module_parse[i].module_name)) == 0; + if (is_found) { + g_ub_dfx_param.sub_cmd_code = g_ub_dfx_module_parse[i].sub_cmd_code; + g_ub_dfx_param.module_idx = i; + g_ub_dfx_param.flag |= MODULE_SET_FLAG; + return 0; + } + } + dfx_help_info(self); + snprintf(self->err_str, sizeof(self->err_str), "-m/--module param error!!!"); + self->err_no = -EINVAL; + + return -EINVAL; +} + +static int hikp_ub_dfx_get_blk_data(struct hikp_cmd_ret **cmd_ret, + uint32_t blk_id, uint32_t sub_cmd_code) +{ + struct hikp_cmd_header req_header = { 0 }; + struct ub_dfx_req_para req_data = { 0 }; + + req_data.bdf = g_ub_dfx_param.target.bdf; + req_data.block_id = blk_id; + hikp_cmd_init(&req_header, UB_MOD, GET_UB_DFX_INFO_CMD, sub_cmd_code); + *cmd_ret = hikp_cmd_alloc(&req_header, &req_data, sizeof(req_data)); + + return hikp_rsp_normal_check(*cmd_ret); +} + +static int hikp_ub_get_first_blk_dfx(struct ub_dfx_rsp_head *rsp_head, uint32_t **reg_data, + uint32_t *max_dfx_size, uint32_t *version) +{ + struct ub_dfx_rsp *dfx_rsp = NULL; + struct hikp_cmd_ret *cmd_ret; + int ret; + + ret = hikp_ub_dfx_get_blk_data(&cmd_ret, 0, g_ub_dfx_param.sub_cmd_code); + if (ret < 0) + goto err_out; + + dfx_rsp = (struct ub_dfx_rsp *)(cmd_ret->rsp_data); + *version = cmd_ret->version; + *rsp_head = dfx_rsp->rsp_head; + if (rsp_head->total_blk_num == 0) { + /* if total block number is zero, set total type number to zero anyway */ + rsp_head->total_type_num = 0; + goto err_out; + } + *max_dfx_size = (uint32_t)(rsp_head->total_blk_num * MAX_DFX_DATA_NUM * sizeof(uint32_t)); + *reg_data = (uint32_t *)calloc(1, *max_dfx_size); + if (*reg_data == NULL) { + HIKP_ERROR_PRINT("malloc log memory 0x%x failed.\n", *max_dfx_size); + ret = -ENOMEM; + goto err_out; + } + + if (rsp_head->cur_blk_size > *max_dfx_size) { + free(*reg_data); + *reg_data = NULL; + HIKP_ERROR_PRINT("blk0 reg_data copy size error, data size: 0x%x, max size: 0x%x\n", + rsp_head->cur_blk_size, *max_dfx_size); + ret = -EINVAL; + goto err_out; + } + memcpy(*reg_data, dfx_rsp->reg_data, rsp_head->cur_blk_size); + + *max_dfx_size -= (uint32_t)rsp_head->cur_blk_size; +err_out: + free(cmd_ret); + cmd_ret = NULL; + + return ret; +} + +static int hikp_ub_get_blk_dfx(struct ub_dfx_rsp_head *rsp_head, uint32_t blk_id, + uint32_t *reg_data, uint32_t *max_dfx_size) +{ + struct ub_dfx_rsp *dfx_rsp = NULL; + struct hikp_cmd_ret *cmd_ret; + int ret; + + ret = hikp_ub_dfx_get_blk_data(&cmd_ret, blk_id, g_ub_dfx_param.sub_cmd_code); + if (ret < 0) + goto err_out; + + dfx_rsp = (struct ub_dfx_rsp *)(cmd_ret->rsp_data); + *rsp_head = dfx_rsp->rsp_head; + if (rsp_head->cur_blk_size > *max_dfx_size) { + HIKP_ERROR_PRINT("blk%u reg_data copy size error, " + "data size: 0x%x, max size: 0x%x\n", + blk_id, rsp_head->cur_blk_size, *max_dfx_size); + ret = -EINVAL; + goto err_out; + } + memcpy(reg_data, dfx_rsp->reg_data, rsp_head->cur_blk_size); + *max_dfx_size -= (uint32_t)rsp_head->cur_blk_size; + +err_out: + free(cmd_ret); + cmd_ret = NULL; + + return ret; +} + +static bool is_type_found(uint16_t type_id, uint32_t *index) +{ + size_t arr_size = HIKP_ARRAY_SIZE(g_dfx_type_parse); + size_t i; + + for (i = 0; i < arr_size; i++) { + if (g_dfx_type_parse[i].type_id == type_id) { + *index = i; + return true; + } + } + + return false; +} + +static void hikp_ub_dfx_print_type_head(uint8_t type_id, uint8_t *last_type_id) +{ + uint32_t index = 0; + + if (type_id != *last_type_id) { + printf("-----------------------------------------------------\n"); + if (is_type_found(type_id, &index)) + printf("type name: %s\n\n", g_dfx_type_parse[index].type_name); + else + HIKP_WARN_PRINT("type name: unknown type, type id is %u\n\n", type_id); + + *last_type_id = type_id; + } +} + +static void hikp_ub_dfx_print_b32(uint32_t num, uint32_t *reg_data) +{ + uint32_t word_num = num * WORD_NUM_PER_REG; + uint16_t offset; + uint32_t value; + uint32_t index; + uint32_t i; + + for (i = 0, index = 1; i < word_num; i = i + WORD_NUM_PER_REG, index++) { + offset = (uint16_t)HI_GET_BITFIELD(reg_data[i], 0, DFX_REG_ADDR_MASK); + value = reg_data[i + 1]; + printf("%03u: 0x%04x\t0x%08x\n", index, offset, value); + } +} + +static void hikp_ub_dfx_print_b64(uint32_t num, uint32_t *reg_data) +{ + uint32_t word_num = num * WORD_NUM_PER_REG; + uint16_t offset; + uint64_t value; + uint32_t index; + uint32_t i; + + for (i = 0, index = 1; i < word_num; i = i + WORD_NUM_PER_REG, index++) { + offset = (uint16_t)HI_GET_BITFIELD(reg_data[i], 0, DFX_REG_ADDR_MASK); + value = (uint64_t)reg_data[i + 1] | + (HI_GET_BITFIELD((uint64_t)reg_data[i], DFX_REG_VALUE_OFF, + DFX_REG_VALUE_MASK) << BIT_NUM_OF_WORD); + printf("%03u: 0x%04x\t0x%016lx\n", index, offset, value); + } +} + +static void hikp_ub_dfx_print(const struct ub_dfx_rsp_head *rsp_head, uint32_t *reg_data) +{ + struct ub_dfx_type_head *type_head; + uint8_t last_type_id = 0; + uint32_t *ptr = reg_data; + uint8_t i; + + printf("****************** module %s reg dump start ********************\n", + g_ub_dfx_module_parse[g_ub_dfx_param.module_idx].module_name); + for (i = 0; i < rsp_head->total_type_num; i++) { + type_head = (struct ub_dfx_type_head *)ptr; + if (type_head->type_id == INCORRECT_REG_TYPE) { + HIKP_ERROR_PRINT("No.%u type is incorrect reg type\n", i + 1u); + break; + } + hikp_ub_dfx_print_type_head(type_head->type_id, &last_type_id); + ptr++; + if (type_head->bit_width == WIDTH_32_BIT) { + hikp_ub_dfx_print_b32((uint32_t)type_head->reg_num, ptr); + } else if (type_head->bit_width == WIDTH_64_BIT) { + hikp_ub_dfx_print_b64((uint32_t)type_head->reg_num, ptr); + } else { + HIKP_ERROR_PRINT("type%u's bit width error.\n", type_head->type_id); + break; + } + ptr += (uint32_t)type_head->reg_num * WORD_NUM_PER_REG; + } + printf("################### ====== dump end ====== ######################\n"); +} + +static void hikp_ub_dfx_execute(struct major_cmd_ctrl *self) +{ + struct ub_dfx_rsp_head rsp_head = { 0 }; + struct ub_dfx_rsp_head tmp_head = { 0 }; + uint32_t *reg_data = NULL; + uint32_t max_dfx_size = 0; + uint32_t real_reg_size; + uint32_t version; + uint32_t i; + + if (!(g_ub_dfx_param.flag & MODULE_SET_FLAG)) { + self->err_no = -EINVAL; + snprintf(self->err_str, sizeof(self->err_str), "Please specify a module."); + dfx_help_info(self); + return; + } + + self->err_no = hikp_ub_get_first_blk_dfx(&rsp_head, &reg_data, &max_dfx_size, &version); + if (self->err_no != 0) { + snprintf(self->err_str, sizeof(self->err_str), "get the first block dfx fail."); + return; + } + real_reg_size = (uint32_t)rsp_head.cur_blk_size; + for (i = 1; i < rsp_head.total_blk_num; i++) { + self->err_no = hikp_ub_get_blk_dfx(&tmp_head, i, + reg_data + (real_reg_size / sizeof(uint32_t)), + &max_dfx_size); + if (self->err_no != 0) { + snprintf(self->err_str, sizeof(self->err_str), + "getting block%u reg fail.", i); + free(reg_data); + return; + } + real_reg_size += (uint32_t)tmp_head.cur_blk_size; + memset(&tmp_head, 0, sizeof(struct ub_dfx_rsp_head)); + } + + printf("DFX cmd version: 0x%x\n\n", version); + hikp_ub_dfx_print((const struct ub_dfx_rsp_head *)&rsp_head, reg_data); + free(reg_data); +} + +static void cmd_ub_dfx_init(void) +{ + struct major_cmd_ctrl *major_cmd = get_major_cmd(); + + major_cmd->option_count = 0; + major_cmd->execute = hikp_ub_dfx_execute; + + cmd_option_register("-h", "--help", false, hikp_ub_dfx_help); + cmd_option_register("-i", "--interface", true, hikp_ub_dfx_target); + cmd_option_register("-m", "--module", true, hikp_ub_dfx_module_select); +} + +HIKP_CMD_DECLARE("ub_dfx", "dump ub dfx info of hardware", cmd_ub_dfx_init); diff --git a/net/ub/ub_dfx/hikp_ub_dfx.h b/net/ub/ub_dfx/hikp_ub_dfx.h new file mode 100644 index 0000000..4ba37a1 --- /dev/null +++ b/net/ub/ub_dfx/hikp_ub_dfx.h @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2023 Hisilicon Technologies Co., Ltd. + * Hikptool is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * http://license.coscl.org.cn/MulanPSL2 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * + * See the Mulan PSL v2 for more details. + */ + +#ifndef HIKP_UB_DFX_H +#define HIKP_UB_DFX_H + +#include "hikp_net_lib.h" + +#define MAX_DFX_DATA_NUM 59 +#define MODULE_SET_FLAG 0x1 + +enum ub_dfx_cmd_type { + LRB_DFX_REG_DUMP = 0, + PFA_DFX_REG_DUMP = 1, + PM_DFX_REG_DUMP = 2, + INVALID_MODULE = 0xFFFFFFFF, +}; + +enum ub_dfx_reg_type { + INCORRECT_REG_TYPE = 0, + TYPE_32_STATS = 1, + TYPE_32_RUNNING_STATUS = 2, + TYPE_64_STATS = 3, + TYPE_INVALID = 255, +}; + +#define MAX_TYPE_NAME_LEN 40 + +enum ub_dfx_reg_width { + WIDTH_32_BIT = 32, + WIDTH_64_BIT = 64, +}; + +struct dfx_type_parse { + uint8_t type_id; + uint8_t bit_width; + uint8_t type_name[MAX_TYPE_NAME_LEN]; +}; + +struct ub_dfx_param { + struct tool_target target; + uint32_t sub_cmd_code; + uint8_t module_idx; + uint8_t flag; +}; + +#define MAX_MODULE_NAME_LEN 20 +struct dfx_module_cmd { + uint8_t module_name[MAX_MODULE_NAME_LEN]; + uint32_t sub_cmd_code; +}; + +struct ub_dfx_req_para { + struct bdf_t bdf; + uint8_t block_id; +}; + +struct ub_dfx_type_head { + uint8_t type_id; + uint8_t bit_width; + uint8_t reg_num; + uint8_t flag; +}; + +struct ub_dfx_rsp_head { + uint8_t total_blk_num; + uint8_t total_type_num; + uint8_t cur_blk_size; + uint8_t rsvd; +}; + +/********************************************************* + * All registers are returned as key-value pairs, and divided + * into three groups of data. + * 1. 32bit regs: R0 bit0~bit15: offset, R1 bit0~bit31: value + * 2. 64bit regs: R0 bit0~bit15: offset, R0 bit16~bit31 high16 value, R1 bit0~bit31: low32 value + *********************************************************/ +#define DFX_REG_VALUE_OFF 16 +#define DFX_REG_VALUE_MASK 0xFFFF +#define DFX_REG_ADDR_MASK 0xFFFF + +#define WORD_NUM_PER_REG 2 +#define BIT_NUM_OF_WORD 32 + +struct ub_dfx_rsp { + struct ub_dfx_rsp_head rsp_head; + uint32_t reg_data[MAX_DFX_DATA_NUM]; +}; + +#endif /* HIKP_UB_DFX_H */ diff --git a/tool_lib/tool_lib.h b/tool_lib/tool_lib.h index b211175..bf37465 100644 --- a/tool_lib/tool_lib.h +++ b/tool_lib/tool_lib.h @@ -18,7 +18,7 @@ #define TOOL_NAME "hikptool" -#define TOOL_VER "1.0.14" +#define TOOL_VER "1.0.15" #define HI_GET_BITFIELD(value, start, mask) (((value) >> (start)) & (mask)) #define HI_SET_FIELD(origin, shift, val) ((origin) |= (val) << (shift)) -- 2.36.1.windows.1

2 21