Kernel
Threads by month
- ----- 2025 -----
- February
- January
- ----- 2024 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2023 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2022 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2021 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2020 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2019 -----
- December
October 2023
- 47 participants
- 220 discussions
Baokun Li (1):
xfs: propagate the return value of xfs_log_force() to avoid soft
lockup
Colin Ian King (2):
xfs: remove redundant initializations of pointers drop_leaf and
save_leaf
xfs: remove redundant pointer lip
Darrick J. Wong (9):
xfs: use setattr_copy to set vfs inode attributes
xfs: remove kmem_zone typedef
xfs: rename _zone variables to _cache
xfs: compact deferred intent item structures
xfs: create slab caches for frequently-used deferred items
xfs: rename xfs_bmap_add_free to xfs_free_extent_later
xfs: reduce the size of struct xfs_extent_free_item
xfs: remove unused parameter from refcount code
xfs: pass xfs_extent_free_item directly through the log intent code
Dave Chinner (19):
xfs: don't assert fail on perag references on teardown
xfs: set prealloc flag in xfs_alloc_file_space()
xfs: validity check agbnos on the AGFL
xfs: validate block number being freed before adding to xefi
xfs: don't reverse order of items in bulk AIL insertion
xfs: use deferred frees for btree block freeing
xfs: pass alloc flags through to xfs_extent_busy_flush()
xfs: allow extent free intents to be retried
xfs: don't block in busy flushing when freeing extents
xfs: journal geometry is not properly bounds checked
xfs: AGF length has never been bounds checked
xfs: fix bounds check in xfs_defer_agfl_block()
xfs: block reservation too large for minleft allocation
xfs: punching delalloc extents on write failure is racy
xfs: use byte ranges for write cleanup ranges
xfs,iomap: move delalloc punching to iomap
iomap: buffered write failure should not truncate the page cache
xfs: xfs_bmap_punch_delalloc_range() should take a byte range
xfs: fix off-by-one-block in xfs_discard_folio()
Gaosheng Cui (1):
xfs: remove xfs_setattr_time() declaration
Guo Xuenan (1):
xfs: set minleft correctly for randomly sparse inode allocations
Jiapeng Chong (1):
xfs: Remove redundant assignment to busy
Long Li (6):
xfs: fix dir3 block read verify fail during log recover
Revert "xfs: propagate the return value of xfs_log_force() to avoid
soft lockup"
xfs: xfs_trans_cancel() path must check for log shutdown
xfs: don't verify agf length when log recovery
xfs: shutdown to ensure submits buffers on LSN boundaries
xfs: update the last_sync_lsn with ctx start lsn
yangerkun (4):
xfs: keep growfs sb log item active until ail flush success
xfs: fix xfs shutdown since we reserve more blocks in agfl fixup
xfs: longest free extent no need consider postalloc
xfs: shutdown xfs once inode double free
fs/xfs/kmem.h | 4 -
fs/xfs/libxfs/xfs_alloc.c | 390 +++++++++++++++++++++--------
fs/xfs/libxfs/xfs_alloc.h | 51 +++-
fs/xfs/libxfs/xfs_alloc_btree.c | 2 +-
fs/xfs/libxfs/xfs_attr_leaf.c | 2 -
fs/xfs/libxfs/xfs_bmap.c | 90 +++----
fs/xfs/libxfs/xfs_bmap.h | 37 +--
fs/xfs/libxfs/xfs_bmap_btree.c | 27 +-
fs/xfs/libxfs/xfs_btree.c | 4 +-
fs/xfs/libxfs/xfs_btree.h | 2 +-
fs/xfs/libxfs/xfs_da_btree.c | 6 +-
fs/xfs/libxfs/xfs_da_btree.h | 3 +-
fs/xfs/libxfs/xfs_defer.c | 70 +++++-
fs/xfs/libxfs/xfs_defer.h | 3 +
fs/xfs/libxfs/xfs_ialloc.c | 32 ++-
fs/xfs/libxfs/xfs_ialloc_btree.c | 8 +-
fs/xfs/libxfs/xfs_inode_fork.c | 4 +-
fs/xfs/libxfs/xfs_inode_fork.h | 2 +-
fs/xfs/libxfs/xfs_refcount.c | 56 +++--
fs/xfs/libxfs/xfs_refcount.h | 7 +-
fs/xfs/libxfs/xfs_refcount_btree.c | 11 +-
fs/xfs/libxfs/xfs_rmap.c | 21 +-
fs/xfs/libxfs/xfs_rmap.h | 7 +-
fs/xfs/libxfs/xfs_rmap_btree.c | 2 +-
fs/xfs/libxfs/xfs_sb.c | 56 ++++-
fs/xfs/libxfs/xfs_types.c | 23 ++
fs/xfs/libxfs/xfs_types.h | 2 +
fs/xfs/xfs_aops.c | 32 +--
fs/xfs/xfs_bmap_item.c | 16 +-
fs/xfs/xfs_bmap_item.h | 6 +-
fs/xfs/xfs_bmap_util.c | 19 +-
fs/xfs/xfs_bmap_util.h | 2 +-
fs/xfs/xfs_buf.c | 16 +-
fs/xfs/xfs_buf_item.c | 10 +-
fs/xfs/xfs_buf_item.h | 11 +-
fs/xfs/xfs_buf_item_recover.c | 9 +-
fs/xfs/xfs_dquot.c | 26 +-
fs/xfs/xfs_extent_busy.c | 36 ++-
fs/xfs/xfs_extent_busy.h | 6 +-
fs/xfs/xfs_extfree_item.c | 137 +++++++---
fs/xfs/xfs_extfree_item.h | 6 +-
fs/xfs/xfs_file.c | 8 -
fs/xfs/xfs_icache.c | 8 +-
fs/xfs/xfs_icreate_item.c | 6 +-
fs/xfs/xfs_icreate_item.h | 2 +-
fs/xfs/xfs_inode.c | 2 +-
fs/xfs/xfs_inode.h | 2 +-
fs/xfs/xfs_inode_item.c | 6 +-
fs/xfs/xfs_inode_item.h | 2 +-
fs/xfs/xfs_iomap.c | 292 ++++++++++++++++++---
fs/xfs/xfs_iops.c | 56 +----
fs/xfs/xfs_iops.h | 1 -
fs/xfs/xfs_log.c | 72 +++---
fs/xfs/xfs_log_priv.h | 2 +-
fs/xfs/xfs_log_recover.c | 6 +-
fs/xfs/xfs_mount.c | 12 +-
fs/xfs/xfs_mru_cache.c | 2 +-
fs/xfs/xfs_pnfs.c | 3 +-
fs/xfs/xfs_qm.h | 2 +-
fs/xfs/xfs_refcount_item.c | 16 +-
fs/xfs/xfs_refcount_item.h | 6 +-
fs/xfs/xfs_reflink.c | 7 +-
fs/xfs/xfs_rmap_item.c | 16 +-
fs/xfs/xfs_rmap_item.h | 6 +-
fs/xfs/xfs_super.c | 233 ++++++++---------
fs/xfs/xfs_trans.c | 24 +-
fs/xfs/xfs_trans.h | 2 +-
fs/xfs/xfs_trans_ail.c | 5 +-
fs/xfs/xfs_trans_dquot.c | 4 +-
mm/filemap.c | 1 +
70 files changed, 1358 insertions(+), 700 deletions(-)
--
2.31.1
2
45

[PATCH openEuler-1.0-LTS] mm: memory-failure: use rcu lock instead of tasklist_lock when collect_procs()
by Tong Tiangen 08 Oct '23
by Tong Tiangen 08 Oct '23
08 Oct '23
mainline inclusion
from mainline-v6.6-rc1
commit d256d1cd8da1cbc4615de69df71c87ce623fec2f
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I85WL9
CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?…
--------------------------------
We found a softlock issue in our test, analyzed the logs, and found that
the relevant CPU call trace as follows:
CPU0:
_do_fork
-> copy_process()
-> write_lock_irq(&tasklist_lock) //Disable irq,waiting for
//tasklist_lock
CPU1:
wp_page_copy()
->pte_offset_map_lock()
-> spin_lock(&page->ptl); //Hold page->ptl
-> ptep_clear_flush()
-> flush_tlb_others() ...
-> smp_call_function_many()
-> arch_send_call_function_ipi_mask()
-> csd_lock_wait() //Waiting for other CPUs respond
//IPI
CPU2:
collect_procs_anon()
-> read_lock(&tasklist_lock) //Hold tasklist_lock
->for_each_process(tsk)
-> page_mapped_in_vma()
-> page_vma_mapped_walk()
-> map_pte()
->spin_lock(&page->ptl) //Waiting for page->ptl
We can see that CPU1 waiting for CPU0 respond IPI,CPU0 waiting for CPU2
unlock tasklist_lock, CPU2 waiting for CPU1 unlock page->ptl. As a result,
softlockup is triggered.
For collect_procs_anon(), what we're doing is task list iteration, during
the iteration, with the help of call_rcu(), the task_struct object is freed
only after one or more grace periods elapse. the logic as follows:
release_task()
-> __exit_signal()
-> __unhash_process()
-> list_del_rcu()
-> put_task_struct_rcu_user()
-> call_rcu(&task->rcu, delayed_put_task_struct)
delayed_put_task_struct()
-> put_task_struct()
-> if (refcount_sub_and_test())
__put_task_struct()
-> free_task()
Therefore, under the protection of the rcu lock, we can safely use
get_task_struct() to ensure a safe reference to task_struct during the
iteration.
By removing the use of tasklist_lock in task list iteration, we can break
the softlock chain above.
The same logic can also be applied to:
- collect_procs_file()
- collect_procs_fsdax()
- collect_procs_ksm()
Link: https://lkml.kernel.org/r/20230828022527.241693-1-tongtiangen@huawei.com
Signed-off-by: Tong Tiangen <tongtiangen(a)huawei.com>
Acked-by: Naoya Horiguchi <naoya.horiguchi(a)nec.com>
Cc: Kefeng Wang <wangkefeng.wang(a)huawei.com>
Cc: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Cc: Miaohe Lin <linmiaohe(a)huawei.com>
Cc: Paul E. McKenney <paulmck(a)kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
Conflicts:
mm/filemap.c
mm/ksm.c
mm/memory-failure.c
Signed-off-by: Tong Tiangen <tongtiangen(a)huawei.com>
---
mm/filemap.c | 3 ---
mm/memory-failure.c | 12 ++++++------
2 files changed, 6 insertions(+), 9 deletions(-)
diff --git a/mm/filemap.c b/mm/filemap.c
index f33504b784a2..5bf4645256cb 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -166,9 +166,6 @@ static void page_cache_kill(struct page *page)
* bdi.wb->list_lock (zap_pte_range->set_page_dirty)
* ->inode->i_lock (zap_pte_range->set_page_dirty)
* ->private_lock (zap_pte_range->__set_page_dirty_buffers)
- *
- * ->i_mmap_rwsem
- * ->tasklist_lock (memory_failure, collect_procs_ao)
*/
static int page_cache_tree_insert(struct address_space *mapping,
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index fff715a27253..8924d7d9bffa 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -427,8 +427,8 @@ static void kill_procs(struct list_head *to_kill, int forcekill, bool fail,
* on behalf of the thread group. Return task_struct of the (first found)
* dedicated thread if found, and return NULL otherwise.
*
- * We already hold read_lock(&tasklist_lock) in the caller, so we don't
- * have to call rcu_read_lock/unlock() in this function.
+ * We already hold rcu lock in the caller, so we don't have to call
+ * rcu_read_lock/unlock() in this function.
*/
static struct task_struct *find_early_kill_thread(struct task_struct *tsk)
{
@@ -478,7 +478,7 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill,
return;
pgoff = page_to_pgoff(page);
- read_lock(&tasklist_lock);
+ rcu_read_lock();
for_each_process (tsk) {
struct anon_vma_chain *vmac;
struct task_struct *t = task_early_kill(tsk, force_early);
@@ -494,7 +494,7 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill,
add_to_kill(t, page, vma, to_kill, tkc);
}
}
- read_unlock(&tasklist_lock);
+ rcu_read_unlock();
page_unlock_anon_vma_read(av);
}
@@ -509,7 +509,7 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill,
struct address_space *mapping = page->mapping;
i_mmap_lock_read(mapping);
- read_lock(&tasklist_lock);
+ rcu_read_lock();
for_each_process(tsk) {
pgoff_t pgoff = page_to_pgoff(page);
struct task_struct *t = task_early_kill(tsk, force_early);
@@ -529,7 +529,7 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill,
add_to_kill(t, page, vma, to_kill, tkc);
}
}
- read_unlock(&tasklist_lock);
+ rcu_read_unlock();
i_mmap_unlock_read(mapping);
}
--
2.25.1
2
1

[PATCH openEuler-1.0-LTS v2] x86/topology: Fix erroneous smp_num_siblings on Intel Hybrid platforms
by Yu Liao 08 Oct '23
by Yu Liao 08 Oct '23
08 Oct '23
From: Zhang Rui <rui.zhang(a)intel.com>
stable inclusion
from stable-v4.19.293
commit f5867e5b6a780710251318cb2047e24c49935e43
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I85XNK
CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id…
--------------------------------
[ Upstream commit edc0a2b5957652f4685ef3516f519f84807087db ]
Traditionally, all CPUs in a system have identical numbers of SMT
siblings. That changes with hybrid processors where some logical CPUs
have a sibling and others have none.
Today, the CPU boot code sets the global variable smp_num_siblings when
every CPU thread is brought up. The last thread to boot will overwrite
it with the number of siblings of *that* thread. That last thread to
boot will "win". If the thread is a Pcore, smp_num_siblings == 2. If it
is an Ecore, smp_num_siblings == 1.
smp_num_siblings describes if the *system* supports SMT. It should
specify the maximum number of SMT threads among all cores.
Ensure that smp_num_siblings represents the system-wide maximum number
of siblings by always increasing its value. Never allow it to decrease.
On MeteorLake-P platform, this fixes a problem that the Ecore CPUs are
not updated in any cpu sibling map because the system is treated as an
UP system when probing Ecore CPUs.
Below shows part of the CPU topology information before and after the
fix, for both Pcore and Ecore CPU (cpu0 is Pcore, cpu 12 is Ecore).
...
-/sys/devices/system/cpu/cpu0/topology/package_cpus:000fff
-/sys/devices/system/cpu/cpu0/topology/package_cpus_list:0-11
+/sys/devices/system/cpu/cpu0/topology/package_cpus:3fffff
+/sys/devices/system/cpu/cpu0/topology/package_cpus_list:0-21
...
-/sys/devices/system/cpu/cpu12/topology/package_cpus:001000
-/sys/devices/system/cpu/cpu12/topology/package_cpus_list:12
+/sys/devices/system/cpu/cpu12/topology/package_cpus:3fffff
+/sys/devices/system/cpu/cpu12/topology/package_cpus_list:0-21
Notice that the "before" 'package_cpus_list' has only one CPU. This
means that userspace tools like lscpu will see a little laptop like
an 11-socket system:
-Core(s) per socket: 1
-Socket(s): 11
+Core(s) per socket: 16
+Socket(s): 1
This is also expected to make the scheduler do rather wonky things
too.
[ dhansen: remove CPUID detail from changelog, add end user effects ]
CC: stable(a)kernel.org
Fixes: bbb65d2d365e ("x86: use cpuid vector 0xb when available for detecting cpu topology")
Fixes: 95f3d39ccf7a ("x86/cpu/topology: Provide detect_extended_topology_early()")
Suggested-by: Len Brown <len.brown(a)intel.com>
Signed-off-by: Zhang Rui <rui.zhang(a)intel.com>
Signed-off-by: Dave Hansen <dave.hansen(a)linux.intel.com>
Acked-by: Peter Zijlstra (Intel) <peterz(a)infradead.org>
Link: https://lore.kernel.org/all/20230323015640.27906-1-rui.zhang%40intel.com
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
Conflicts:
arch/x86/kernel/cpu/topology.c
Signed-off-by: Yu Liao <liaoyu15(a)huawei.com>
---
arch/x86/kernel/cpu/topology.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c
index 7c130bdfd746..5c15e1329fcb 100644
--- a/arch/x86/kernel/cpu/topology.c
+++ b/arch/x86/kernel/cpu/topology.c
@@ -77,7 +77,7 @@ int detect_extended_topology_early(struct cpuinfo_x86 *c)
* initial apic id, which also represents 32-bit extended x2apic id.
*/
c->initial_apicid = edx;
- smp_num_siblings = LEVEL_MAX_SIBLINGS(ebx);
+ smp_num_siblings = max_t(int, smp_num_siblings, LEVEL_MAX_SIBLINGS(ebx));
#endif
return 0;
}
@@ -106,7 +106,8 @@ int detect_extended_topology(struct cpuinfo_x86 *c)
*/
cpuid_count(leaf, SMT_LEVEL, &eax, &ebx, &ecx, &edx);
c->initial_apicid = edx;
- core_level_siblings = smp_num_siblings = LEVEL_MAX_SIBLINGS(ebx);
+ core_level_siblings = LEVEL_MAX_SIBLINGS(ebx);
+ smp_num_siblings = max_t(int, smp_num_siblings, LEVEL_MAX_SIBLINGS(ebx));
core_plus_mask_width = ht_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
die_level_siblings = LEVEL_MAX_SIBLINGS(ebx);
die_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
--
2.25.1
2
1

[PATCH openEuler-1.0-LTS] x86/topology: Fix erroneous smp_num_siblings on Intel Hybrid platforms
by Yu Liao 07 Oct '23
by Yu Liao 07 Oct '23
07 Oct '23
From: Zhang Rui <rui.zhang(a)intel.com>
stable inclusion
from stable-v4.19.293
commit f5867e5b6a780710251318cb2047e24c49935e43
category: bugfix
bugzilla: 188350
CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id…
--------------------------------
[ Upstream commit edc0a2b5957652f4685ef3516f519f84807087db ]
Traditionally, all CPUs in a system have identical numbers of SMT
siblings. That changes with hybrid processors where some logical CPUs
have a sibling and others have none.
Today, the CPU boot code sets the global variable smp_num_siblings when
every CPU thread is brought up. The last thread to boot will overwrite
it with the number of siblings of *that* thread. That last thread to
boot will "win". If the thread is a Pcore, smp_num_siblings == 2. If it
is an Ecore, smp_num_siblings == 1.
smp_num_siblings describes if the *system* supports SMT. It should
specify the maximum number of SMT threads among all cores.
Ensure that smp_num_siblings represents the system-wide maximum number
of siblings by always increasing its value. Never allow it to decrease.
On MeteorLake-P platform, this fixes a problem that the Ecore CPUs are
not updated in any cpu sibling map because the system is treated as an
UP system when probing Ecore CPUs.
Below shows part of the CPU topology information before and after the
fix, for both Pcore and Ecore CPU (cpu0 is Pcore, cpu 12 is Ecore).
...
-/sys/devices/system/cpu/cpu0/topology/package_cpus:000fff
-/sys/devices/system/cpu/cpu0/topology/package_cpus_list:0-11
+/sys/devices/system/cpu/cpu0/topology/package_cpus:3fffff
+/sys/devices/system/cpu/cpu0/topology/package_cpus_list:0-21
...
-/sys/devices/system/cpu/cpu12/topology/package_cpus:001000
-/sys/devices/system/cpu/cpu12/topology/package_cpus_list:12
+/sys/devices/system/cpu/cpu12/topology/package_cpus:3fffff
+/sys/devices/system/cpu/cpu12/topology/package_cpus_list:0-21
Notice that the "before" 'package_cpus_list' has only one CPU. This
means that userspace tools like lscpu will see a little laptop like
an 11-socket system:
-Core(s) per socket: 1
-Socket(s): 11
+Core(s) per socket: 16
+Socket(s): 1
This is also expected to make the scheduler do rather wonky things
too.
[ dhansen: remove CPUID detail from changelog, add end user effects ]
CC: stable(a)kernel.org
Fixes: bbb65d2d365e ("x86: use cpuid vector 0xb when available for detecting cpu topology")
Fixes: 95f3d39ccf7a ("x86/cpu/topology: Provide detect_extended_topology_early()")
Suggested-by: Len Brown <len.brown(a)intel.com>
Signed-off-by: Zhang Rui <rui.zhang(a)intel.com>
Signed-off-by: Dave Hansen <dave.hansen(a)linux.intel.com>
Acked-by: Peter Zijlstra (Intel) <peterz(a)infradead.org>
Link: https://lore.kernel.org/all/20230323015640.27906-1-rui.zhang%40intel.com
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
Conflicts:
arch/x86/kernel/cpu/topology.c
Signed-off-by: Yu Liao <liaoyu15(a)huawei.com>
---
arch/x86/kernel/cpu/topology.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c
index 7c130bdfd746..5c15e1329fcb 100644
--- a/arch/x86/kernel/cpu/topology.c
+++ b/arch/x86/kernel/cpu/topology.c
@@ -77,7 +77,7 @@ int detect_extended_topology_early(struct cpuinfo_x86 *c)
* initial apic id, which also represents 32-bit extended x2apic id.
*/
c->initial_apicid = edx;
- smp_num_siblings = LEVEL_MAX_SIBLINGS(ebx);
+ smp_num_siblings = max_t(int, smp_num_siblings, LEVEL_MAX_SIBLINGS(ebx));
#endif
return 0;
}
@@ -106,7 +106,8 @@ int detect_extended_topology(struct cpuinfo_x86 *c)
*/
cpuid_count(leaf, SMT_LEVEL, &eax, &ebx, &ecx, &edx);
c->initial_apicid = edx;
- core_level_siblings = smp_num_siblings = LEVEL_MAX_SIBLINGS(ebx);
+ core_level_siblings = LEVEL_MAX_SIBLINGS(ebx);
+ smp_num_siblings = max_t(int, smp_num_siblings, LEVEL_MAX_SIBLINGS(ebx));
core_plus_mask_width = ht_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
die_level_siblings = LEVEL_MAX_SIBLINGS(ebx);
die_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
--
2.25.1
2
1
New architectural features and CPUID bits related to the
Speculative Return Stack Overflow (SRSO) vulnerability.
Arnaldo Carvalho de Melo (2):
tools headers cpufeatures: Sync with the kernel sources
tools arch x86: Sync the msr-index.h copy with the kernel sources
Borislav Petkov (AMD) (9):
x86/bugs: Increase the x86 bugs vector size to two u32s
x86/srso: Add a Speculative RAS Overflow mitigation
x86/srso: Add IBPB_BRTYPE support
x86/srso: Add SRSO_NO support
x86/srso: Add IBPB
x86/srso: Add IBPB on VMEXIT
x86/srso: Tie SBPB bit setting to microcode patch detection
x86/srso: Disable the mitigation on unaffected configurations
x86/srso: Correct the mitigation status when SMT is disabled
Jialin Zhang (1):
x86/cpufeatures: Fix abi breakage caused by NCAPINTS in cpufeature
header file.
Josh Poimboeuf (2):
x86/srso: Fix return thunks in generated code
objtool: Add frame-pointer-specific function ignore
Kim Phillips (1):
x86/cpu, kvm: Add support for CPUID_80000021_EAX
Nick Desaulniers (1):
x86/srso: Fix build breakage with the LLVM linker
Nikolay Borisov (1):
kabi: Allow extra bugsints (bsc#1213927).
Peter Zijlstra (10):
x86/ibt: Add ANNOTATE_NOENDBR
x86/cpu: Fix __x86_return_thunk symbol type
x86/cpu: Fix up srso_safe_ret() and __x86_return_thunk()
x86/alternative: Make custom return thunk unconditional
x86/cpu: Clean up SRSO return thunk mess
x86/cpu: Rename original retbleed methods
x86/cpu: Rename srso_(.*)_alias to srso_alias_\1
x86/cpu: Cleanup the untrain mess
objtool/x86: Fixup frame-pointer vs rethunk
objtool/x86: Fix SRSO mess
Sean Christopherson (1):
x86/retpoline: Don't clobber RFLAGS during srso_safe_ret()
Documentation/admin-guide/hw-vuln/index.rst | 1 +
Documentation/admin-guide/hw-vuln/srso.rst | 133 ++++++++++++
.../admin-guide/kernel-parameters.txt | 11 +
arch/x86/Kconfig | 7 +
arch/x86/include/asm/cpufeature.h | 23 +-
arch/x86/include/asm/cpufeatures.h | 15 +-
arch/x86/include/asm/msr-index.h | 1 +
arch/x86/include/asm/nospec-branch.h | 34 ++-
arch/x86/include/asm/processor.h | 7 +-
arch/x86/kernel/alternative.c | 2 +-
arch/x86/kernel/cpu/amd.c | 19 ++
arch/x86/kernel/cpu/bugs.c | 197 ++++++++++++++++++
arch/x86/kernel/cpu/common.c | 17 +-
arch/x86/kernel/cpu/mkcapflags.sh | 2 +-
arch/x86/kernel/cpu/proc.c | 2 +-
arch/x86/kernel/cpu/scattered.c | 3 +
arch/x86/kernel/vmlinux.lds.S | 38 +++-
arch/x86/kvm/cpuid.c | 3 +
arch/x86/kvm/svm/svm.c | 4 +-
arch/x86/kvm/svm/vmenter.S | 3 +
arch/x86/lib/retpoline.S | 135 ++++++++++--
drivers/base/cpu.c | 8 +
include/linux/cpu.h | 2 +
include/linux/objtool.h | 28 +++
tools/arch/x86/include/asm/cpufeatures.h | 16 +-
tools/arch/x86/include/asm/msr-index.h | 1 +
tools/include/linux/objtool.h | 28 +++
tools/objtool/arch.h | 1 +
tools/objtool/arch/x86/decode.c | 6 +
tools/objtool/check.c | 41 +++-
tools/objtool/elf.h | 1 +
.../perf/trace/beauty/tracepoints/x86_msr.sh | 2 +-
32 files changed, 740 insertions(+), 51 deletions(-)
create mode 100644 Documentation/admin-guide/hw-vuln/srso.rst
--
2.25.1
3
30
From: Kyle Zeng <zengyhkyle(a)gmail.com>
mainline inclusion
from mainline-v6.6-rc3
commit 0113d9c9d1ccc07f5a3710dac4aa24b6d711278c
category: bugfix
bugzilla: https://gitee.com/src-openeuler/kernel/issues/I85DZB
CVE: CVE-2023-42754
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?…
--------------------------------
Currently, we assume the skb is associated with a device before calling
__ip_options_compile, which is not always the case if it is re-routed by
ipvs.
When skb->dev is NULL, dev_net(skb->dev) will become null-dereference.
This patch adds a check for the edge case and switch to use the net_device
from the rtable when skb->dev is NULL.
Fixes: ed0de45a1008 ("ipv4: recompile ip options in ipv4_link_failure")
Suggested-by: David Ahern <dsahern(a)kernel.org>
Signed-off-by: Kyle Zeng <zengyhkyle(a)gmail.com>
Cc: Stephen Suryaputra <ssuryaextr(a)gmail.com>
Cc: Vadim Fedorenko <vfedorenko(a)novek.ru>
Reviewed-by: David Ahern <dsahern(a)kernel.org>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Lu Wei <luwei32(a)huawei.com>
---
net/ipv4/route.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 68f67bf99ed7..86096e2e43b0 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1212,6 +1212,7 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
static void ipv4_send_dest_unreach(struct sk_buff *skb)
{
+ struct net_device *dev;
struct ip_options opt;
int res;
@@ -1229,7 +1230,8 @@ static void ipv4_send_dest_unreach(struct sk_buff *skb)
opt.optlen = ip_hdr(skb)->ihl * 4 - sizeof(struct iphdr);
rcu_read_lock();
- res = __ip_options_compile(dev_net(skb->dev), &opt, skb, NULL);
+ dev = skb->dev ? skb->dev : skb_rtable(skb)->dst.dev;
+ res = __ip_options_compile(dev_net(dev), &opt, skb, NULL);
rcu_read_unlock();
if (res)
--
2.34.1
2
1
New architectural features and CPUID bits related to the
Speculative Return Stack Overflow (SRSO) vulnerability.
Arnaldo Carvalho de Melo (2):
tools headers cpufeatures: Sync with the kernel sources
tools arch x86: Sync the msr-index.h copy with the kernel sources
Borislav Petkov (AMD) (9):
x86/bugs: Increase the x86 bugs vector size to two u32s
x86/srso: Add a Speculative RAS Overflow mitigation
x86/srso: Add IBPB_BRTYPE support
x86/srso: Add SRSO_NO support
x86/srso: Add IBPB
x86/srso: Add IBPB on VMEXIT
x86/srso: Tie SBPB bit setting to microcode patch detection
x86/srso: Disable the mitigation on unaffected configurations
x86/srso: Correct the mitigation status when SMT is disabled
Jialin Zhang (1):
x86/cpufeatures: Fix abi breakage caused by NCAPINTS in cpufeature
header file.
Josh Poimboeuf (2):
x86/srso: Fix return thunks in generated code
objtool: Add frame-pointer-specific function ignore
Kim Phillips (1):
x86/cpu, kvm: Add support for CPUID_80000021_EAX
Nick Desaulniers (1):
x86/srso: Fix build breakage with the LLVM linker
Nikolay Borisov (1):
kabi: Allow extra bugsints (bsc#1213927).
Peter Zijlstra (10):
x86/ibt: Add ANNOTATE_NOENDBR
x86/cpu: Fix __x86_return_thunk symbol type
x86/cpu: Fix up srso_safe_ret() and __x86_return_thunk()
x86/alternative: Make custom return thunk unconditional
x86/cpu: Clean up SRSO return thunk mess
x86/cpu: Rename original retbleed methods
x86/cpu: Rename srso_(.*)_alias to srso_alias_\1
x86/cpu: Cleanup the untrain mess
objtool/x86: Fixup frame-pointer vs rethunk
objtool/x86: Fix SRSO mess
Sean Christopherson (1):
x86/retpoline: Don't clobber RFLAGS during srso_safe_ret()
Documentation/admin-guide/hw-vuln/index.rst | 1 +
Documentation/admin-guide/hw-vuln/srso.rst | 133 ++++++++++++
.../admin-guide/kernel-parameters.txt | 11 +
arch/x86/Kconfig | 7 +
arch/x86/include/asm/cpufeature.h | 23 +-
arch/x86/include/asm/cpufeatures.h | 15 +-
arch/x86/include/asm/msr-index.h | 1 +
arch/x86/include/asm/nospec-branch.h | 34 ++-
arch/x86/include/asm/processor.h | 7 +-
arch/x86/kernel/alternative.c | 2 +-
arch/x86/kernel/cpu/amd.c | 19 ++
arch/x86/kernel/cpu/bugs.c | 197 ++++++++++++++++++
arch/x86/kernel/cpu/common.c | 17 +-
arch/x86/kernel/cpu/mkcapflags.sh | 2 +-
arch/x86/kernel/cpu/proc.c | 2 +-
arch/x86/kernel/cpu/scattered.c | 3 +
arch/x86/kernel/vmlinux.lds.S | 38 +++-
arch/x86/kvm/cpuid.c | 3 +
arch/x86/kvm/svm/svm.c | 4 +-
arch/x86/kvm/svm/vmenter.S | 3 +
arch/x86/lib/retpoline.S | 135 ++++++++++--
drivers/base/cpu.c | 8 +
include/linux/cpu.h | 2 +
include/linux/objtool.h | 28 +++
tools/arch/x86/include/asm/cpufeatures.h | 16 +-
tools/arch/x86/include/asm/msr-index.h | 1 +
tools/include/linux/objtool.h | 28 +++
tools/objtool/arch.h | 1 +
tools/objtool/arch/x86/decode.c | 6 +
tools/objtool/check.c | 41 +++-
tools/objtool/elf.h | 1 +
.../perf/trace/beauty/tracepoints/x86_msr.sh | 2 +-
32 files changed, 740 insertions(+), 51 deletions(-)
create mode 100644 Documentation/admin-guide/hw-vuln/srso.rst
--
2.25.1
1
28
From: Jamal Hadi Salim <jhs(a)mojatatu.com>
stable inclusion
from stable-v5.10.196
commit 8db844077ec9912d75952c80d76da71fc2412852
category: bugfix
bugzilla: https://gitee.com/src-openeuler/kernel/issues/I84B2W
CVE: CVE-2023-42755
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/patch/?id=…
--------------------------------
commit 265b4da82dbf5df04bee5a5d46b7474b1aaf326a upstream.
The rsvp classifier has served us well for about a quarter of a century but has
has not been getting much maintenance attention due to lack of known users.
Signed-off-by: Jamal Hadi Salim <jhs(a)mojatatu.com>
Acked-by: Jiri Pirko <jiri(a)nvidia.com>
Signed-off-by: Paolo Abeni <pabeni(a)redhat.com>
Signed-off-by: Kyle Zeng <zengyhkyle(a)gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Signed-off-by: Dong Chenchen <dongchenchen2(a)huawei.com>
---
net/sched/Kconfig | 28 --
net/sched/Makefile | 2 -
net/sched/cls_rsvp.c | 24 --
net/sched/cls_rsvp.h | 777 ------------------------------------------
net/sched/cls_rsvp6.c | 24 --
5 files changed, 855 deletions(-)
delete mode 100644 net/sched/cls_rsvp.c
delete mode 100644 net/sched/cls_rsvp.h
delete mode 100644 net/sched/cls_rsvp6.c
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 697522371914..2046c16b29f0 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -548,34 +548,6 @@ config CLS_U32_MARK
help
Say Y here to be able to use netfilter marks as u32 key.
-config NET_CLS_RSVP
- tristate "IPv4 Resource Reservation Protocol (RSVP)"
- select NET_CLS
- help
- The Resource Reservation Protocol (RSVP) permits end systems to
- request a minimum and maximum data flow rate for a connection; this
- is important for real time data such as streaming sound or video.
-
- Say Y here if you want to be able to classify outgoing packets based
- on their RSVP requests.
-
- To compile this code as a module, choose M here: the
- module will be called cls_rsvp.
-
-config NET_CLS_RSVP6
- tristate "IPv6 Resource Reservation Protocol (RSVP6)"
- select NET_CLS
- help
- The Resource Reservation Protocol (RSVP) permits end systems to
- request a minimum and maximum data flow rate for a connection; this
- is important for real time data such as streaming sound or video.
-
- Say Y here if you want to be able to classify outgoing packets based
- on their RSVP requests and you are using the IPv6 protocol.
-
- To compile this code as a module, choose M here: the
- module will be called cls_rsvp6.
-
config NET_CLS_FLOW
tristate "Flow classifier"
select NET_CLS
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 4311fdb21119..df2bcd785f7d 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -68,8 +68,6 @@ obj-$(CONFIG_NET_SCH_TAPRIO) += sch_taprio.o
obj-$(CONFIG_NET_CLS_U32) += cls_u32.o
obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o
obj-$(CONFIG_NET_CLS_FW) += cls_fw.o
-obj-$(CONFIG_NET_CLS_RSVP) += cls_rsvp.o
-obj-$(CONFIG_NET_CLS_RSVP6) += cls_rsvp6.o
obj-$(CONFIG_NET_CLS_BASIC) += cls_basic.o
obj-$(CONFIG_NET_CLS_FLOW) += cls_flow.o
obj-$(CONFIG_NET_CLS_CGROUP) += cls_cgroup.o
diff --git a/net/sched/cls_rsvp.c b/net/sched/cls_rsvp.c
deleted file mode 100644
index de1c1d4da597..000000000000
--- a/net/sched/cls_rsvp.c
+++ /dev/null
@@ -1,24 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * net/sched/cls_rsvp.c Special RSVP packet classifier for IPv4.
- *
- * Authors: Alexey Kuznetsov, <kuznet(a)ms2.inr.ac.ru>
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/errno.h>
-#include <linux/skbuff.h>
-#include <net/ip.h>
-#include <net/netlink.h>
-#include <net/act_api.h>
-#include <net/pkt_cls.h>
-
-#define RSVP_DST_LEN 1
-#define RSVP_ID "rsvp"
-#define RSVP_OPS cls_rsvp_ops
-
-#include "cls_rsvp.h"
-MODULE_LICENSE("GPL");
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
deleted file mode 100644
index d36949d9382c..000000000000
--- a/net/sched/cls_rsvp.h
+++ /dev/null
@@ -1,777 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * net/sched/cls_rsvp.h Template file for RSVPv[46] classifiers.
- *
- * Authors: Alexey Kuznetsov, <kuznet(a)ms2.inr.ac.ru>
- */
-
-/*
- Comparing to general packet classification problem,
- RSVP needs only sevaral relatively simple rules:
-
- * (dst, protocol) are always specified,
- so that we are able to hash them.
- * src may be exact, or may be wildcard, so that
- we can keep a hash table plus one wildcard entry.
- * source port (or flow label) is important only if src is given.
-
- IMPLEMENTATION.
-
- We use a two level hash table: The top level is keyed by
- destination address and protocol ID, every bucket contains a list
- of "rsvp sessions", identified by destination address, protocol and
- DPI(="Destination Port ID"): triple (key, mask, offset).
-
- Every bucket has a smaller hash table keyed by source address
- (cf. RSVP flowspec) and one wildcard entry for wildcard reservations.
- Every bucket is again a list of "RSVP flows", selected by
- source address and SPI(="Source Port ID" here rather than
- "security parameter index"): triple (key, mask, offset).
-
-
- NOTE 1. All the packets with IPv6 extension headers (but AH and ESP)
- and all fragmented packets go to the best-effort traffic class.
-
-
- NOTE 2. Two "port id"'s seems to be redundant, rfc2207 requires
- only one "Generalized Port Identifier". So that for classic
- ah, esp (and udp,tcp) both *pi should coincide or one of them
- should be wildcard.
-
- At first sight, this redundancy is just a waste of CPU
- resources. But DPI and SPI add the possibility to assign different
- priorities to GPIs. Look also at note 4 about tunnels below.
-
-
- NOTE 3. One complication is the case of tunneled packets.
- We implement it as following: if the first lookup
- matches a special session with "tunnelhdr" value not zero,
- flowid doesn't contain the true flow ID, but the tunnel ID (1...255).
- In this case, we pull tunnelhdr bytes and restart lookup
- with tunnel ID added to the list of keys. Simple and stupid 8)8)
- It's enough for PIMREG and IPIP.
-
-
- NOTE 4. Two GPIs make it possible to parse even GRE packets.
- F.e. DPI can select ETH_P_IP (and necessary flags to make
- tunnelhdr correct) in GRE protocol field and SPI matches
- GRE key. Is it not nice? 8)8)
-
-
- Well, as result, despite its simplicity, we get a pretty
- powerful classification engine. */
-
-
-struct rsvp_head {
- u32 tmap[256/32];
- u32 hgenerator;
- u8 tgenerator;
- struct rsvp_session __rcu *ht[256];
- struct rcu_head rcu;
-};
-
-struct rsvp_session {
- struct rsvp_session __rcu *next;
- __be32 dst[RSVP_DST_LEN];
- struct tc_rsvp_gpi dpi;
- u8 protocol;
- u8 tunnelid;
- /* 16 (src,sport) hash slots, and one wildcard source slot */
- struct rsvp_filter __rcu *ht[16 + 1];
- struct rcu_head rcu;
-};
-
-
-struct rsvp_filter {
- struct rsvp_filter __rcu *next;
- __be32 src[RSVP_DST_LEN];
- struct tc_rsvp_gpi spi;
- u8 tunnelhdr;
-
- struct tcf_result res;
- struct tcf_exts exts;
-
- u32 handle;
- struct rsvp_session *sess;
- struct rcu_work rwork;
-};
-
-static inline unsigned int hash_dst(__be32 *dst, u8 protocol, u8 tunnelid)
-{
- unsigned int h = (__force __u32)dst[RSVP_DST_LEN - 1];
-
- h ^= h>>16;
- h ^= h>>8;
- return (h ^ protocol ^ tunnelid) & 0xFF;
-}
-
-static inline unsigned int hash_src(__be32 *src)
-{
- unsigned int h = (__force __u32)src[RSVP_DST_LEN-1];
-
- h ^= h>>16;
- h ^= h>>8;
- h ^= h>>4;
- return h & 0xF;
-}
-
-#define RSVP_APPLY_RESULT() \
-{ \
- int r = tcf_exts_exec(skb, &f->exts, res); \
- if (r < 0) \
- continue; \
- else if (r > 0) \
- return r; \
-}
-
-static int rsvp_classify(struct sk_buff *skb, const struct tcf_proto *tp,
- struct tcf_result *res)
-{
- struct rsvp_head *head = rcu_dereference_bh(tp->root);
- struct rsvp_session *s;
- struct rsvp_filter *f;
- unsigned int h1, h2;
- __be32 *dst, *src;
- u8 protocol;
- u8 tunnelid = 0;
- u8 *xprt;
-#if RSVP_DST_LEN == 4
- struct ipv6hdr *nhptr;
-
- if (!pskb_network_may_pull(skb, sizeof(*nhptr)))
- return -1;
- nhptr = ipv6_hdr(skb);
-#else
- struct iphdr *nhptr;
-
- if (!pskb_network_may_pull(skb, sizeof(*nhptr)))
- return -1;
- nhptr = ip_hdr(skb);
-#endif
-restart:
-
-#if RSVP_DST_LEN == 4
- src = &nhptr->saddr.s6_addr32[0];
- dst = &nhptr->daddr.s6_addr32[0];
- protocol = nhptr->nexthdr;
- xprt = ((u8 *)nhptr) + sizeof(struct ipv6hdr);
-#else
- src = &nhptr->saddr;
- dst = &nhptr->daddr;
- protocol = nhptr->protocol;
- xprt = ((u8 *)nhptr) + (nhptr->ihl<<2);
- if (ip_is_fragment(nhptr))
- return -1;
-#endif
-
- h1 = hash_dst(dst, protocol, tunnelid);
- h2 = hash_src(src);
-
- for (s = rcu_dereference_bh(head->ht[h1]); s;
- s = rcu_dereference_bh(s->next)) {
- if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN - 1] &&
- protocol == s->protocol &&
- !(s->dpi.mask &
- (*(u32 *)(xprt + s->dpi.offset) ^ s->dpi.key)) &&
-#if RSVP_DST_LEN == 4
- dst[0] == s->dst[0] &&
- dst[1] == s->dst[1] &&
- dst[2] == s->dst[2] &&
-#endif
- tunnelid == s->tunnelid) {
-
- for (f = rcu_dereference_bh(s->ht[h2]); f;
- f = rcu_dereference_bh(f->next)) {
- if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN - 1] &&
- !(f->spi.mask & (*(u32 *)(xprt + f->spi.offset) ^ f->spi.key))
-#if RSVP_DST_LEN == 4
- &&
- src[0] == f->src[0] &&
- src[1] == f->src[1] &&
- src[2] == f->src[2]
-#endif
- ) {
- *res = f->res;
- RSVP_APPLY_RESULT();
-
-matched:
- if (f->tunnelhdr == 0)
- return 0;
-
- tunnelid = f->res.classid;
- nhptr = (void *)(xprt + f->tunnelhdr - sizeof(*nhptr));
- goto restart;
- }
- }
-
- /* And wildcard bucket... */
- for (f = rcu_dereference_bh(s->ht[16]); f;
- f = rcu_dereference_bh(f->next)) {
- *res = f->res;
- RSVP_APPLY_RESULT();
- goto matched;
- }
- return -1;
- }
- }
- return -1;
-}
-
-static void rsvp_replace(struct tcf_proto *tp, struct rsvp_filter *n, u32 h)
-{
- struct rsvp_head *head = rtnl_dereference(tp->root);
- struct rsvp_session *s;
- struct rsvp_filter __rcu **ins;
- struct rsvp_filter *pins;
- unsigned int h1 = h & 0xFF;
- unsigned int h2 = (h >> 8) & 0xFF;
-
- for (s = rtnl_dereference(head->ht[h1]); s;
- s = rtnl_dereference(s->next)) {
- for (ins = &s->ht[h2], pins = rtnl_dereference(*ins); ;
- ins = &pins->next, pins = rtnl_dereference(*ins)) {
- if (pins->handle == h) {
- RCU_INIT_POINTER(n->next, pins->next);
- rcu_assign_pointer(*ins, n);
- return;
- }
- }
- }
-
- /* Something went wrong if we are trying to replace a non-existant
- * node. Mind as well halt instead of silently failing.
- */
- BUG_ON(1);
-}
-
-static void *rsvp_get(struct tcf_proto *tp, u32 handle)
-{
- struct rsvp_head *head = rtnl_dereference(tp->root);
- struct rsvp_session *s;
- struct rsvp_filter *f;
- unsigned int h1 = handle & 0xFF;
- unsigned int h2 = (handle >> 8) & 0xFF;
-
- if (h2 > 16)
- return NULL;
-
- for (s = rtnl_dereference(head->ht[h1]); s;
- s = rtnl_dereference(s->next)) {
- for (f = rtnl_dereference(s->ht[h2]); f;
- f = rtnl_dereference(f->next)) {
- if (f->handle == handle)
- return f;
- }
- }
- return NULL;
-}
-
-static int rsvp_init(struct tcf_proto *tp)
-{
- struct rsvp_head *data;
-
- data = kzalloc(sizeof(struct rsvp_head), GFP_KERNEL);
- if (data) {
- rcu_assign_pointer(tp->root, data);
- return 0;
- }
- return -ENOBUFS;
-}
-
-static void __rsvp_delete_filter(struct rsvp_filter *f)
-{
- tcf_exts_destroy(&f->exts);
- tcf_exts_put_net(&f->exts);
- kfree(f);
-}
-
-static void rsvp_delete_filter_work(struct work_struct *work)
-{
- struct rsvp_filter *f = container_of(to_rcu_work(work),
- struct rsvp_filter,
- rwork);
- rtnl_lock();
- __rsvp_delete_filter(f);
- rtnl_unlock();
-}
-
-static void rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
-{
- tcf_unbind_filter(tp, &f->res);
- /* all classifiers are required to call tcf_exts_destroy() after rcu
- * grace period, since converted-to-rcu actions are relying on that
- * in cleanup() callback
- */
- if (tcf_exts_get_net(&f->exts))
- tcf_queue_work(&f->rwork, rsvp_delete_filter_work);
- else
- __rsvp_delete_filter(f);
-}
-
-static void rsvp_destroy(struct tcf_proto *tp, bool rtnl_held,
- struct netlink_ext_ack *extack)
-{
- struct rsvp_head *data = rtnl_dereference(tp->root);
- int h1, h2;
-
- if (data == NULL)
- return;
-
- for (h1 = 0; h1 < 256; h1++) {
- struct rsvp_session *s;
-
- while ((s = rtnl_dereference(data->ht[h1])) != NULL) {
- RCU_INIT_POINTER(data->ht[h1], s->next);
-
- for (h2 = 0; h2 <= 16; h2++) {
- struct rsvp_filter *f;
-
- while ((f = rtnl_dereference(s->ht[h2])) != NULL) {
- rcu_assign_pointer(s->ht[h2], f->next);
- rsvp_delete_filter(tp, f);
- }
- }
- kfree_rcu(s, rcu);
- }
- }
- kfree_rcu(data, rcu);
-}
-
-static int rsvp_delete(struct tcf_proto *tp, void *arg, bool *last,
- bool rtnl_held, struct netlink_ext_ack *extack)
-{
- struct rsvp_head *head = rtnl_dereference(tp->root);
- struct rsvp_filter *nfp, *f = arg;
- struct rsvp_filter __rcu **fp;
- unsigned int h = f->handle;
- struct rsvp_session __rcu **sp;
- struct rsvp_session *nsp, *s = f->sess;
- int i, h1;
-
- fp = &s->ht[(h >> 8) & 0xFF];
- for (nfp = rtnl_dereference(*fp); nfp;
- fp = &nfp->next, nfp = rtnl_dereference(*fp)) {
- if (nfp == f) {
- RCU_INIT_POINTER(*fp, f->next);
- rsvp_delete_filter(tp, f);
-
- /* Strip tree */
-
- for (i = 0; i <= 16; i++)
- if (s->ht[i])
- goto out;
-
- /* OK, session has no flows */
- sp = &head->ht[h & 0xFF];
- for (nsp = rtnl_dereference(*sp); nsp;
- sp = &nsp->next, nsp = rtnl_dereference(*sp)) {
- if (nsp == s) {
- RCU_INIT_POINTER(*sp, s->next);
- kfree_rcu(s, rcu);
- goto out;
- }
- }
-
- break;
- }
- }
-
-out:
- *last = true;
- for (h1 = 0; h1 < 256; h1++) {
- if (rcu_access_pointer(head->ht[h1])) {
- *last = false;
- break;
- }
- }
-
- return 0;
-}
-
-static unsigned int gen_handle(struct tcf_proto *tp, unsigned salt)
-{
- struct rsvp_head *data = rtnl_dereference(tp->root);
- int i = 0xFFFF;
-
- while (i-- > 0) {
- u32 h;
-
- if ((data->hgenerator += 0x10000) == 0)
- data->hgenerator = 0x10000;
- h = data->hgenerator|salt;
- if (!rsvp_get(tp, h))
- return h;
- }
- return 0;
-}
-
-static int tunnel_bts(struct rsvp_head *data)
-{
- int n = data->tgenerator >> 5;
- u32 b = 1 << (data->tgenerator & 0x1F);
-
- if (data->tmap[n] & b)
- return 0;
- data->tmap[n] |= b;
- return 1;
-}
-
-static void tunnel_recycle(struct rsvp_head *data)
-{
- struct rsvp_session __rcu **sht = data->ht;
- u32 tmap[256/32];
- int h1, h2;
-
- memset(tmap, 0, sizeof(tmap));
-
- for (h1 = 0; h1 < 256; h1++) {
- struct rsvp_session *s;
- for (s = rtnl_dereference(sht[h1]); s;
- s = rtnl_dereference(s->next)) {
- for (h2 = 0; h2 <= 16; h2++) {
- struct rsvp_filter *f;
-
- for (f = rtnl_dereference(s->ht[h2]); f;
- f = rtnl_dereference(f->next)) {
- if (f->tunnelhdr == 0)
- continue;
- data->tgenerator = f->res.classid;
- tunnel_bts(data);
- }
- }
- }
- }
-
- memcpy(data->tmap, tmap, sizeof(tmap));
-}
-
-static u32 gen_tunnel(struct rsvp_head *data)
-{
- int i, k;
-
- for (k = 0; k < 2; k++) {
- for (i = 255; i > 0; i--) {
- if (++data->tgenerator == 0)
- data->tgenerator = 1;
- if (tunnel_bts(data))
- return data->tgenerator;
- }
- tunnel_recycle(data);
- }
- return 0;
-}
-
-static const struct nla_policy rsvp_policy[TCA_RSVP_MAX + 1] = {
- [TCA_RSVP_CLASSID] = { .type = NLA_U32 },
- [TCA_RSVP_DST] = { .len = RSVP_DST_LEN * sizeof(u32) },
- [TCA_RSVP_SRC] = { .len = RSVP_DST_LEN * sizeof(u32) },
- [TCA_RSVP_PINFO] = { .len = sizeof(struct tc_rsvp_pinfo) },
-};
-
-static int rsvp_change(struct net *net, struct sk_buff *in_skb,
- struct tcf_proto *tp, unsigned long base,
- u32 handle,
- struct nlattr **tca,
- void **arg, bool ovr, bool rtnl_held,
- struct netlink_ext_ack *extack)
-{
- struct rsvp_head *data = rtnl_dereference(tp->root);
- struct rsvp_filter *f, *nfp;
- struct rsvp_filter __rcu **fp;
- struct rsvp_session *nsp, *s;
- struct rsvp_session __rcu **sp;
- struct tc_rsvp_pinfo *pinfo = NULL;
- struct nlattr *opt = tca[TCA_OPTIONS];
- struct nlattr *tb[TCA_RSVP_MAX + 1];
- struct tcf_exts e;
- unsigned int h1, h2;
- __be32 *dst;
- int err;
-
- if (opt == NULL)
- return handle ? -EINVAL : 0;
-
- err = nla_parse_nested_deprecated(tb, TCA_RSVP_MAX, opt, rsvp_policy,
- NULL);
- if (err < 0)
- return err;
-
- err = tcf_exts_init(&e, net, TCA_RSVP_ACT, TCA_RSVP_POLICE);
- if (err < 0)
- return err;
- err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr, true,
- extack);
- if (err < 0)
- goto errout2;
-
- f = *arg;
- if (f) {
- /* Node exists: adjust only classid */
- struct rsvp_filter *n;
-
- if (f->handle != handle && handle)
- goto errout2;
-
- n = kmemdup(f, sizeof(*f), GFP_KERNEL);
- if (!n) {
- err = -ENOMEM;
- goto errout2;
- }
-
- err = tcf_exts_init(&n->exts, net, TCA_RSVP_ACT,
- TCA_RSVP_POLICE);
- if (err < 0) {
- kfree(n);
- goto errout2;
- }
-
- if (tb[TCA_RSVP_CLASSID]) {
- n->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]);
- tcf_bind_filter(tp, &n->res, base);
- }
-
- tcf_exts_change(&n->exts, &e);
- rsvp_replace(tp, n, handle);
- return 0;
- }
-
- /* Now more serious part... */
- err = -EINVAL;
- if (handle)
- goto errout2;
- if (tb[TCA_RSVP_DST] == NULL)
- goto errout2;
-
- err = -ENOBUFS;
- f = kzalloc(sizeof(struct rsvp_filter), GFP_KERNEL);
- if (f == NULL)
- goto errout2;
-
- err = tcf_exts_init(&f->exts, net, TCA_RSVP_ACT, TCA_RSVP_POLICE);
- if (err < 0)
- goto errout;
- h2 = 16;
- if (tb[TCA_RSVP_SRC]) {
- memcpy(f->src, nla_data(tb[TCA_RSVP_SRC]), sizeof(f->src));
- h2 = hash_src(f->src);
- }
- if (tb[TCA_RSVP_PINFO]) {
- pinfo = nla_data(tb[TCA_RSVP_PINFO]);
- f->spi = pinfo->spi;
- f->tunnelhdr = pinfo->tunnelhdr;
- }
- if (tb[TCA_RSVP_CLASSID])
- f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]);
-
- dst = nla_data(tb[TCA_RSVP_DST]);
- h1 = hash_dst(dst, pinfo ? pinfo->protocol : 0, pinfo ? pinfo->tunnelid : 0);
-
- err = -ENOMEM;
- if ((f->handle = gen_handle(tp, h1 | (h2<<8))) == 0)
- goto errout;
-
- if (f->tunnelhdr) {
- err = -EINVAL;
- if (f->res.classid > 255)
- goto errout;
-
- err = -ENOMEM;
- if (f->res.classid == 0 &&
- (f->res.classid = gen_tunnel(data)) == 0)
- goto errout;
- }
-
- for (sp = &data->ht[h1];
- (s = rtnl_dereference(*sp)) != NULL;
- sp = &s->next) {
- if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
- pinfo && pinfo->protocol == s->protocol &&
- memcmp(&pinfo->dpi, &s->dpi, sizeof(s->dpi)) == 0 &&
-#if RSVP_DST_LEN == 4
- dst[0] == s->dst[0] &&
- dst[1] == s->dst[1] &&
- dst[2] == s->dst[2] &&
-#endif
- pinfo->tunnelid == s->tunnelid) {
-
-insert:
- /* OK, we found appropriate session */
-
- fp = &s->ht[h2];
-
- f->sess = s;
- if (f->tunnelhdr == 0)
- tcf_bind_filter(tp, &f->res, base);
-
- tcf_exts_change(&f->exts, &e);
-
- fp = &s->ht[h2];
- for (nfp = rtnl_dereference(*fp); nfp;
- fp = &nfp->next, nfp = rtnl_dereference(*fp)) {
- __u32 mask = nfp->spi.mask & f->spi.mask;
-
- if (mask != f->spi.mask)
- break;
- }
- RCU_INIT_POINTER(f->next, nfp);
- rcu_assign_pointer(*fp, f);
-
- *arg = f;
- return 0;
- }
- }
-
- /* No session found. Create new one. */
-
- err = -ENOBUFS;
- s = kzalloc(sizeof(struct rsvp_session), GFP_KERNEL);
- if (s == NULL)
- goto errout;
- memcpy(s->dst, dst, sizeof(s->dst));
-
- if (pinfo) {
- s->dpi = pinfo->dpi;
- s->protocol = pinfo->protocol;
- s->tunnelid = pinfo->tunnelid;
- }
- sp = &data->ht[h1];
- for (nsp = rtnl_dereference(*sp); nsp;
- sp = &nsp->next, nsp = rtnl_dereference(*sp)) {
- if ((nsp->dpi.mask & s->dpi.mask) != s->dpi.mask)
- break;
- }
- RCU_INIT_POINTER(s->next, nsp);
- rcu_assign_pointer(*sp, s);
-
- goto insert;
-
-errout:
- tcf_exts_destroy(&f->exts);
- kfree(f);
-errout2:
- tcf_exts_destroy(&e);
- return err;
-}
-
-static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg,
- bool rtnl_held)
-{
- struct rsvp_head *head = rtnl_dereference(tp->root);
- unsigned int h, h1;
-
- if (arg->stop)
- return;
-
- for (h = 0; h < 256; h++) {
- struct rsvp_session *s;
-
- for (s = rtnl_dereference(head->ht[h]); s;
- s = rtnl_dereference(s->next)) {
- for (h1 = 0; h1 <= 16; h1++) {
- struct rsvp_filter *f;
-
- for (f = rtnl_dereference(s->ht[h1]); f;
- f = rtnl_dereference(f->next)) {
- if (arg->count < arg->skip) {
- arg->count++;
- continue;
- }
- if (arg->fn(tp, f, arg) < 0) {
- arg->stop = 1;
- return;
- }
- arg->count++;
- }
- }
- }
- }
-}
-
-static int rsvp_dump(struct net *net, struct tcf_proto *tp, void *fh,
- struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
-{
- struct rsvp_filter *f = fh;
- struct rsvp_session *s;
- struct nlattr *nest;
- struct tc_rsvp_pinfo pinfo;
-
- if (f == NULL)
- return skb->len;
- s = f->sess;
-
- t->tcm_handle = f->handle;
-
- nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
- if (nest == NULL)
- goto nla_put_failure;
-
- if (nla_put(skb, TCA_RSVP_DST, sizeof(s->dst), &s->dst))
- goto nla_put_failure;
- pinfo.dpi = s->dpi;
- pinfo.spi = f->spi;
- pinfo.protocol = s->protocol;
- pinfo.tunnelid = s->tunnelid;
- pinfo.tunnelhdr = f->tunnelhdr;
- pinfo.pad = 0;
- if (nla_put(skb, TCA_RSVP_PINFO, sizeof(pinfo), &pinfo))
- goto nla_put_failure;
- if (f->res.classid &&
- nla_put_u32(skb, TCA_RSVP_CLASSID, f->res.classid))
- goto nla_put_failure;
- if (((f->handle >> 8) & 0xFF) != 16 &&
- nla_put(skb, TCA_RSVP_SRC, sizeof(f->src), f->src))
- goto nla_put_failure;
-
- if (tcf_exts_dump(skb, &f->exts) < 0)
- goto nla_put_failure;
-
- nla_nest_end(skb, nest);
-
- if (tcf_exts_dump_stats(skb, &f->exts) < 0)
- goto nla_put_failure;
- return skb->len;
-
-nla_put_failure:
- nla_nest_cancel(skb, nest);
- return -1;
-}
-
-static void rsvp_bind_class(void *fh, u32 classid, unsigned long cl, void *q,
- unsigned long base)
-{
- struct rsvp_filter *f = fh;
-
- if (f && f->res.classid == classid) {
- if (cl)
- __tcf_bind_filter(q, &f->res, base);
- else
- __tcf_unbind_filter(q, &f->res);
- }
-}
-
-static struct tcf_proto_ops RSVP_OPS __read_mostly = {
- .kind = RSVP_ID,
- .classify = rsvp_classify,
- .init = rsvp_init,
- .destroy = rsvp_destroy,
- .get = rsvp_get,
- .change = rsvp_change,
- .delete = rsvp_delete,
- .walk = rsvp_walk,
- .dump = rsvp_dump,
- .bind_class = rsvp_bind_class,
- .owner = THIS_MODULE,
-};
-
-static int __init init_rsvp(void)
-{
- return register_tcf_proto_ops(&RSVP_OPS);
-}
-
-static void __exit exit_rsvp(void)
-{
- unregister_tcf_proto_ops(&RSVP_OPS);
-}
-
-module_init(init_rsvp)
-module_exit(exit_rsvp)
diff --git a/net/sched/cls_rsvp6.c b/net/sched/cls_rsvp6.c
deleted file mode 100644
index 64078846000e..000000000000
--- a/net/sched/cls_rsvp6.c
+++ /dev/null
@@ -1,24 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * net/sched/cls_rsvp6.c Special RSVP packet classifier for IPv6.
- *
- * Authors: Alexey Kuznetsov, <kuznet(a)ms2.inr.ac.ru>
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/errno.h>
-#include <linux/ipv6.h>
-#include <linux/skbuff.h>
-#include <net/act_api.h>
-#include <net/pkt_cls.h>
-#include <net/netlink.h>
-
-#define RSVP_DST_LEN 4
-#define RSVP_ID "rsvp6"
-#define RSVP_OPS cls_rsvp6_ops
-
-#include "cls_rsvp.h"
-MODULE_LICENSE("GPL");
--
2.25.1
2
1
From: Jamal Hadi Salim <jhs(a)mojatatu.com>
stable inclusion
from stable-v4.19.294
commit 6ca0ea6a46e7a2d70fb1b1f6a886efe2b2365e16
category: bugfix
bugzilla: 189257, https://gitee.com/src-openeuler/kernel/issues/I84B2W
CVE: CVE-2023-42755
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/patch/?id=…
--------------------------------
commit 265b4da82dbf5df04bee5a5d46b7474b1aaf326a upstream.
The rsvp classifier has served us well for about a quarter of a century but has
has not been getting much maintenance attention due to lack of known users.
Signed-off-by: Jamal Hadi Salim <jhs(a)mojatatu.com>
Acked-by: Jiri Pirko <jiri(a)nvidia.com>
Signed-off-by: Paolo Abeni <pabeni(a)redhat.com>
Signed-off-by: Kyle Zeng <zengyhkyle(a)gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Signed-off-by: Dong Chenchen <dongchenchen2(a)huawei.com>
---
net/sched/Kconfig | 28 --
net/sched/Makefile | 2 -
net/sched/cls_rsvp.c | 28 --
net/sched/cls_rsvp.h | 770 ------------------------------------------
net/sched/cls_rsvp6.c | 28 --
5 files changed, 856 deletions(-)
delete mode 100644 net/sched/cls_rsvp.c
delete mode 100644 net/sched/cls_rsvp.h
delete mode 100644 net/sched/cls_rsvp6.c
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 4547022ed7f4..7698a8974a47 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -503,34 +503,6 @@ config CLS_U32_MARK
---help---
Say Y here to be able to use netfilter marks as u32 key.
-config NET_CLS_RSVP
- tristate "IPv4 Resource Reservation Protocol (RSVP)"
- select NET_CLS
- ---help---
- The Resource Reservation Protocol (RSVP) permits end systems to
- request a minimum and maximum data flow rate for a connection; this
- is important for real time data such as streaming sound or video.
-
- Say Y here if you want to be able to classify outgoing packets based
- on their RSVP requests.
-
- To compile this code as a module, choose M here: the
- module will be called cls_rsvp.
-
-config NET_CLS_RSVP6
- tristate "IPv6 Resource Reservation Protocol (RSVP6)"
- select NET_CLS
- ---help---
- The Resource Reservation Protocol (RSVP) permits end systems to
- request a minimum and maximum data flow rate for a connection; this
- is important for real time data such as streaming sound or video.
-
- Say Y here if you want to be able to classify outgoing packets based
- on their RSVP requests and you are using the IPv6 protocol.
-
- To compile this code as a module, choose M here: the
- module will be called cls_rsvp6.
-
config NET_CLS_FLOW
tristate "Flow classifier"
select NET_CLS
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 5eed580cdb42..3139c32e1947 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -61,8 +61,6 @@ obj-$(CONFIG_NET_SCH_ETF) += sch_etf.o
obj-$(CONFIG_NET_CLS_U32) += cls_u32.o
obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o
obj-$(CONFIG_NET_CLS_FW) += cls_fw.o
-obj-$(CONFIG_NET_CLS_RSVP) += cls_rsvp.o
-obj-$(CONFIG_NET_CLS_RSVP6) += cls_rsvp6.o
obj-$(CONFIG_NET_CLS_BASIC) += cls_basic.o
obj-$(CONFIG_NET_CLS_FLOW) += cls_flow.o
obj-$(CONFIG_NET_CLS_CGROUP) += cls_cgroup.o
diff --git a/net/sched/cls_rsvp.c b/net/sched/cls_rsvp.c
deleted file mode 100644
index cbb5e0d600f3..000000000000
--- a/net/sched/cls_rsvp.c
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * net/sched/cls_rsvp.c Special RSVP packet classifier for IPv4.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- * Authors: Alexey Kuznetsov, <kuznet(a)ms2.inr.ac.ru>
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/errno.h>
-#include <linux/skbuff.h>
-#include <net/ip.h>
-#include <net/netlink.h>
-#include <net/act_api.h>
-#include <net/pkt_cls.h>
-
-#define RSVP_DST_LEN 1
-#define RSVP_ID "rsvp"
-#define RSVP_OPS cls_rsvp_ops
-
-#include "cls_rsvp.h"
-MODULE_LICENSE("GPL");
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
deleted file mode 100644
index a1e9f7cbcffc..000000000000
--- a/net/sched/cls_rsvp.h
+++ /dev/null
@@ -1,770 +0,0 @@
-/*
- * net/sched/cls_rsvp.h Template file for RSVPv[46] classifiers.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- * Authors: Alexey Kuznetsov, <kuznet(a)ms2.inr.ac.ru>
- */
-
-/*
- Comparing to general packet classification problem,
- RSVP needs only sevaral relatively simple rules:
-
- * (dst, protocol) are always specified,
- so that we are able to hash them.
- * src may be exact, or may be wildcard, so that
- we can keep a hash table plus one wildcard entry.
- * source port (or flow label) is important only if src is given.
-
- IMPLEMENTATION.
-
- We use a two level hash table: The top level is keyed by
- destination address and protocol ID, every bucket contains a list
- of "rsvp sessions", identified by destination address, protocol and
- DPI(="Destination Port ID"): triple (key, mask, offset).
-
- Every bucket has a smaller hash table keyed by source address
- (cf. RSVP flowspec) and one wildcard entry for wildcard reservations.
- Every bucket is again a list of "RSVP flows", selected by
- source address and SPI(="Source Port ID" here rather than
- "security parameter index"): triple (key, mask, offset).
-
-
- NOTE 1. All the packets with IPv6 extension headers (but AH and ESP)
- and all fragmented packets go to the best-effort traffic class.
-
-
- NOTE 2. Two "port id"'s seems to be redundant, rfc2207 requires
- only one "Generalized Port Identifier". So that for classic
- ah, esp (and udp,tcp) both *pi should coincide or one of them
- should be wildcard.
-
- At first sight, this redundancy is just a waste of CPU
- resources. But DPI and SPI add the possibility to assign different
- priorities to GPIs. Look also at note 4 about tunnels below.
-
-
- NOTE 3. One complication is the case of tunneled packets.
- We implement it as following: if the first lookup
- matches a special session with "tunnelhdr" value not zero,
- flowid doesn't contain the true flow ID, but the tunnel ID (1...255).
- In this case, we pull tunnelhdr bytes and restart lookup
- with tunnel ID added to the list of keys. Simple and stupid 8)8)
- It's enough for PIMREG and IPIP.
-
-
- NOTE 4. Two GPIs make it possible to parse even GRE packets.
- F.e. DPI can select ETH_P_IP (and necessary flags to make
- tunnelhdr correct) in GRE protocol field and SPI matches
- GRE key. Is it not nice? 8)8)
-
-
- Well, as result, despite its simplicity, we get a pretty
- powerful classification engine. */
-
-
-struct rsvp_head {
- u32 tmap[256/32];
- u32 hgenerator;
- u8 tgenerator;
- struct rsvp_session __rcu *ht[256];
- struct rcu_head rcu;
-};
-
-struct rsvp_session {
- struct rsvp_session __rcu *next;
- __be32 dst[RSVP_DST_LEN];
- struct tc_rsvp_gpi dpi;
- u8 protocol;
- u8 tunnelid;
- /* 16 (src,sport) hash slots, and one wildcard source slot */
- struct rsvp_filter __rcu *ht[16 + 1];
- struct rcu_head rcu;
-};
-
-
-struct rsvp_filter {
- struct rsvp_filter __rcu *next;
- __be32 src[RSVP_DST_LEN];
- struct tc_rsvp_gpi spi;
- u8 tunnelhdr;
-
- struct tcf_result res;
- struct tcf_exts exts;
-
- u32 handle;
- struct rsvp_session *sess;
- struct rcu_work rwork;
-};
-
-static inline unsigned int hash_dst(__be32 *dst, u8 protocol, u8 tunnelid)
-{
- unsigned int h = (__force __u32)dst[RSVP_DST_LEN - 1];
-
- h ^= h>>16;
- h ^= h>>8;
- return (h ^ protocol ^ tunnelid) & 0xFF;
-}
-
-static inline unsigned int hash_src(__be32 *src)
-{
- unsigned int h = (__force __u32)src[RSVP_DST_LEN-1];
-
- h ^= h>>16;
- h ^= h>>8;
- h ^= h>>4;
- return h & 0xF;
-}
-
-#define RSVP_APPLY_RESULT() \
-{ \
- int r = tcf_exts_exec(skb, &f->exts, res); \
- if (r < 0) \
- continue; \
- else if (r > 0) \
- return r; \
-}
-
-static int rsvp_classify(struct sk_buff *skb, const struct tcf_proto *tp,
- struct tcf_result *res)
-{
- struct rsvp_head *head = rcu_dereference_bh(tp->root);
- struct rsvp_session *s;
- struct rsvp_filter *f;
- unsigned int h1, h2;
- __be32 *dst, *src;
- u8 protocol;
- u8 tunnelid = 0;
- u8 *xprt;
-#if RSVP_DST_LEN == 4
- struct ipv6hdr *nhptr;
-
- if (!pskb_network_may_pull(skb, sizeof(*nhptr)))
- return -1;
- nhptr = ipv6_hdr(skb);
-#else
- struct iphdr *nhptr;
-
- if (!pskb_network_may_pull(skb, sizeof(*nhptr)))
- return -1;
- nhptr = ip_hdr(skb);
-#endif
-restart:
-
-#if RSVP_DST_LEN == 4
- src = &nhptr->saddr.s6_addr32[0];
- dst = &nhptr->daddr.s6_addr32[0];
- protocol = nhptr->nexthdr;
- xprt = ((u8 *)nhptr) + sizeof(struct ipv6hdr);
-#else
- src = &nhptr->saddr;
- dst = &nhptr->daddr;
- protocol = nhptr->protocol;
- xprt = ((u8 *)nhptr) + (nhptr->ihl<<2);
- if (ip_is_fragment(nhptr))
- return -1;
-#endif
-
- h1 = hash_dst(dst, protocol, tunnelid);
- h2 = hash_src(src);
-
- for (s = rcu_dereference_bh(head->ht[h1]); s;
- s = rcu_dereference_bh(s->next)) {
- if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN - 1] &&
- protocol == s->protocol &&
- !(s->dpi.mask &
- (*(u32 *)(xprt + s->dpi.offset) ^ s->dpi.key)) &&
-#if RSVP_DST_LEN == 4
- dst[0] == s->dst[0] &&
- dst[1] == s->dst[1] &&
- dst[2] == s->dst[2] &&
-#endif
- tunnelid == s->tunnelid) {
-
- for (f = rcu_dereference_bh(s->ht[h2]); f;
- f = rcu_dereference_bh(f->next)) {
- if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN - 1] &&
- !(f->spi.mask & (*(u32 *)(xprt + f->spi.offset) ^ f->spi.key))
-#if RSVP_DST_LEN == 4
- &&
- src[0] == f->src[0] &&
- src[1] == f->src[1] &&
- src[2] == f->src[2]
-#endif
- ) {
- *res = f->res;
- RSVP_APPLY_RESULT();
-
-matched:
- if (f->tunnelhdr == 0)
- return 0;
-
- tunnelid = f->res.classid;
- nhptr = (void *)(xprt + f->tunnelhdr - sizeof(*nhptr));
- goto restart;
- }
- }
-
- /* And wildcard bucket... */
- for (f = rcu_dereference_bh(s->ht[16]); f;
- f = rcu_dereference_bh(f->next)) {
- *res = f->res;
- RSVP_APPLY_RESULT();
- goto matched;
- }
- return -1;
- }
- }
- return -1;
-}
-
-static void rsvp_replace(struct tcf_proto *tp, struct rsvp_filter *n, u32 h)
-{
- struct rsvp_head *head = rtnl_dereference(tp->root);
- struct rsvp_session *s;
- struct rsvp_filter __rcu **ins;
- struct rsvp_filter *pins;
- unsigned int h1 = h & 0xFF;
- unsigned int h2 = (h >> 8) & 0xFF;
-
- for (s = rtnl_dereference(head->ht[h1]); s;
- s = rtnl_dereference(s->next)) {
- for (ins = &s->ht[h2], pins = rtnl_dereference(*ins); ;
- ins = &pins->next, pins = rtnl_dereference(*ins)) {
- if (pins->handle == h) {
- RCU_INIT_POINTER(n->next, pins->next);
- rcu_assign_pointer(*ins, n);
- return;
- }
- }
- }
-
- /* Something went wrong if we are trying to replace a non-existant
- * node. Mind as well halt instead of silently failing.
- */
- BUG_ON(1);
-}
-
-static void *rsvp_get(struct tcf_proto *tp, u32 handle)
-{
- struct rsvp_head *head = rtnl_dereference(tp->root);
- struct rsvp_session *s;
- struct rsvp_filter *f;
- unsigned int h1 = handle & 0xFF;
- unsigned int h2 = (handle >> 8) & 0xFF;
-
- if (h2 > 16)
- return NULL;
-
- for (s = rtnl_dereference(head->ht[h1]); s;
- s = rtnl_dereference(s->next)) {
- for (f = rtnl_dereference(s->ht[h2]); f;
- f = rtnl_dereference(f->next)) {
- if (f->handle == handle)
- return f;
- }
- }
- return NULL;
-}
-
-static int rsvp_init(struct tcf_proto *tp)
-{
- struct rsvp_head *data;
-
- data = kzalloc(sizeof(struct rsvp_head), GFP_KERNEL);
- if (data) {
- rcu_assign_pointer(tp->root, data);
- return 0;
- }
- return -ENOBUFS;
-}
-
-static void __rsvp_delete_filter(struct rsvp_filter *f)
-{
- tcf_exts_destroy(&f->exts);
- tcf_exts_put_net(&f->exts);
- kfree(f);
-}
-
-static void rsvp_delete_filter_work(struct work_struct *work)
-{
- struct rsvp_filter *f = container_of(to_rcu_work(work),
- struct rsvp_filter,
- rwork);
- rtnl_lock();
- __rsvp_delete_filter(f);
- rtnl_unlock();
-}
-
-static void rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
-{
- tcf_unbind_filter(tp, &f->res);
- /* all classifiers are required to call tcf_exts_destroy() after rcu
- * grace period, since converted-to-rcu actions are relying on that
- * in cleanup() callback
- */
- if (tcf_exts_get_net(&f->exts))
- tcf_queue_work(&f->rwork, rsvp_delete_filter_work);
- else
- __rsvp_delete_filter(f);
-}
-
-static void rsvp_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
-{
- struct rsvp_head *data = rtnl_dereference(tp->root);
- int h1, h2;
-
- if (data == NULL)
- return;
-
- for (h1 = 0; h1 < 256; h1++) {
- struct rsvp_session *s;
-
- while ((s = rtnl_dereference(data->ht[h1])) != NULL) {
- RCU_INIT_POINTER(data->ht[h1], s->next);
-
- for (h2 = 0; h2 <= 16; h2++) {
- struct rsvp_filter *f;
-
- while ((f = rtnl_dereference(s->ht[h2])) != NULL) {
- rcu_assign_pointer(s->ht[h2], f->next);
- rsvp_delete_filter(tp, f);
- }
- }
- kfree_rcu(s, rcu);
- }
- }
- kfree_rcu(data, rcu);
-}
-
-static int rsvp_delete(struct tcf_proto *tp, void *arg, bool *last,
- struct netlink_ext_ack *extack)
-{
- struct rsvp_head *head = rtnl_dereference(tp->root);
- struct rsvp_filter *nfp, *f = arg;
- struct rsvp_filter __rcu **fp;
- unsigned int h = f->handle;
- struct rsvp_session __rcu **sp;
- struct rsvp_session *nsp, *s = f->sess;
- int i, h1;
-
- fp = &s->ht[(h >> 8) & 0xFF];
- for (nfp = rtnl_dereference(*fp); nfp;
- fp = &nfp->next, nfp = rtnl_dereference(*fp)) {
- if (nfp == f) {
- RCU_INIT_POINTER(*fp, f->next);
- rsvp_delete_filter(tp, f);
-
- /* Strip tree */
-
- for (i = 0; i <= 16; i++)
- if (s->ht[i])
- goto out;
-
- /* OK, session has no flows */
- sp = &head->ht[h & 0xFF];
- for (nsp = rtnl_dereference(*sp); nsp;
- sp = &nsp->next, nsp = rtnl_dereference(*sp)) {
- if (nsp == s) {
- RCU_INIT_POINTER(*sp, s->next);
- kfree_rcu(s, rcu);
- goto out;
- }
- }
-
- break;
- }
- }
-
-out:
- *last = true;
- for (h1 = 0; h1 < 256; h1++) {
- if (rcu_access_pointer(head->ht[h1])) {
- *last = false;
- break;
- }
- }
-
- return 0;
-}
-
-static unsigned int gen_handle(struct tcf_proto *tp, unsigned salt)
-{
- struct rsvp_head *data = rtnl_dereference(tp->root);
- int i = 0xFFFF;
-
- while (i-- > 0) {
- u32 h;
-
- if ((data->hgenerator += 0x10000) == 0)
- data->hgenerator = 0x10000;
- h = data->hgenerator|salt;
- if (!rsvp_get(tp, h))
- return h;
- }
- return 0;
-}
-
-static int tunnel_bts(struct rsvp_head *data)
-{
- int n = data->tgenerator >> 5;
- u32 b = 1 << (data->tgenerator & 0x1F);
-
- if (data->tmap[n] & b)
- return 0;
- data->tmap[n] |= b;
- return 1;
-}
-
-static void tunnel_recycle(struct rsvp_head *data)
-{
- struct rsvp_session __rcu **sht = data->ht;
- u32 tmap[256/32];
- int h1, h2;
-
- memset(tmap, 0, sizeof(tmap));
-
- for (h1 = 0; h1 < 256; h1++) {
- struct rsvp_session *s;
- for (s = rtnl_dereference(sht[h1]); s;
- s = rtnl_dereference(s->next)) {
- for (h2 = 0; h2 <= 16; h2++) {
- struct rsvp_filter *f;
-
- for (f = rtnl_dereference(s->ht[h2]); f;
- f = rtnl_dereference(f->next)) {
- if (f->tunnelhdr == 0)
- continue;
- data->tgenerator = f->res.classid;
- tunnel_bts(data);
- }
- }
- }
- }
-
- memcpy(data->tmap, tmap, sizeof(tmap));
-}
-
-static u32 gen_tunnel(struct rsvp_head *data)
-{
- int i, k;
-
- for (k = 0; k < 2; k++) {
- for (i = 255; i > 0; i--) {
- if (++data->tgenerator == 0)
- data->tgenerator = 1;
- if (tunnel_bts(data))
- return data->tgenerator;
- }
- tunnel_recycle(data);
- }
- return 0;
-}
-
-static const struct nla_policy rsvp_policy[TCA_RSVP_MAX + 1] = {
- [TCA_RSVP_CLASSID] = { .type = NLA_U32 },
- [TCA_RSVP_DST] = { .len = RSVP_DST_LEN * sizeof(u32) },
- [TCA_RSVP_SRC] = { .len = RSVP_DST_LEN * sizeof(u32) },
- [TCA_RSVP_PINFO] = { .len = sizeof(struct tc_rsvp_pinfo) },
-};
-
-static int rsvp_change(struct net *net, struct sk_buff *in_skb,
- struct tcf_proto *tp, unsigned long base,
- u32 handle,
- struct nlattr **tca,
- void **arg, bool ovr, struct netlink_ext_ack *extack)
-{
- struct rsvp_head *data = rtnl_dereference(tp->root);
- struct rsvp_filter *f, *nfp;
- struct rsvp_filter __rcu **fp;
- struct rsvp_session *nsp, *s;
- struct rsvp_session __rcu **sp;
- struct tc_rsvp_pinfo *pinfo = NULL;
- struct nlattr *opt = tca[TCA_OPTIONS];
- struct nlattr *tb[TCA_RSVP_MAX + 1];
- struct tcf_exts e;
- unsigned int h1, h2;
- __be32 *dst;
- int err;
-
- if (opt == NULL)
- return handle ? -EINVAL : 0;
-
- err = nla_parse_nested(tb, TCA_RSVP_MAX, opt, rsvp_policy, NULL);
- if (err < 0)
- return err;
-
- err = tcf_exts_init(&e, TCA_RSVP_ACT, TCA_RSVP_POLICE);
- if (err < 0)
- return err;
- err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr, extack);
- if (err < 0)
- goto errout2;
-
- f = *arg;
- if (f) {
- /* Node exists: adjust only classid */
- struct rsvp_filter *n;
-
- if (f->handle != handle && handle)
- goto errout2;
-
- n = kmemdup(f, sizeof(*f), GFP_KERNEL);
- if (!n) {
- err = -ENOMEM;
- goto errout2;
- }
-
- err = tcf_exts_init(&n->exts, TCA_RSVP_ACT, TCA_RSVP_POLICE);
- if (err < 0) {
- kfree(n);
- goto errout2;
- }
-
- if (tb[TCA_RSVP_CLASSID]) {
- n->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]);
- tcf_bind_filter(tp, &n->res, base);
- }
-
- tcf_exts_change(&n->exts, &e);
- rsvp_replace(tp, n, handle);
- return 0;
- }
-
- /* Now more serious part... */
- err = -EINVAL;
- if (handle)
- goto errout2;
- if (tb[TCA_RSVP_DST] == NULL)
- goto errout2;
-
- err = -ENOBUFS;
- f = kzalloc(sizeof(struct rsvp_filter), GFP_KERNEL);
- if (f == NULL)
- goto errout2;
-
- err = tcf_exts_init(&f->exts, TCA_RSVP_ACT, TCA_RSVP_POLICE);
- if (err < 0)
- goto errout;
- h2 = 16;
- if (tb[TCA_RSVP_SRC]) {
- memcpy(f->src, nla_data(tb[TCA_RSVP_SRC]), sizeof(f->src));
- h2 = hash_src(f->src);
- }
- if (tb[TCA_RSVP_PINFO]) {
- pinfo = nla_data(tb[TCA_RSVP_PINFO]);
- f->spi = pinfo->spi;
- f->tunnelhdr = pinfo->tunnelhdr;
- }
- if (tb[TCA_RSVP_CLASSID])
- f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]);
-
- dst = nla_data(tb[TCA_RSVP_DST]);
- h1 = hash_dst(dst, pinfo ? pinfo->protocol : 0, pinfo ? pinfo->tunnelid : 0);
-
- err = -ENOMEM;
- if ((f->handle = gen_handle(tp, h1 | (h2<<8))) == 0)
- goto errout;
-
- if (f->tunnelhdr) {
- err = -EINVAL;
- if (f->res.classid > 255)
- goto errout;
-
- err = -ENOMEM;
- if (f->res.classid == 0 &&
- (f->res.classid = gen_tunnel(data)) == 0)
- goto errout;
- }
-
- for (sp = &data->ht[h1];
- (s = rtnl_dereference(*sp)) != NULL;
- sp = &s->next) {
- if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
- pinfo && pinfo->protocol == s->protocol &&
- memcmp(&pinfo->dpi, &s->dpi, sizeof(s->dpi)) == 0 &&
-#if RSVP_DST_LEN == 4
- dst[0] == s->dst[0] &&
- dst[1] == s->dst[1] &&
- dst[2] == s->dst[2] &&
-#endif
- pinfo->tunnelid == s->tunnelid) {
-
-insert:
- /* OK, we found appropriate session */
-
- fp = &s->ht[h2];
-
- f->sess = s;
- if (f->tunnelhdr == 0)
- tcf_bind_filter(tp, &f->res, base);
-
- tcf_exts_change(&f->exts, &e);
-
- fp = &s->ht[h2];
- for (nfp = rtnl_dereference(*fp); nfp;
- fp = &nfp->next, nfp = rtnl_dereference(*fp)) {
- __u32 mask = nfp->spi.mask & f->spi.mask;
-
- if (mask != f->spi.mask)
- break;
- }
- RCU_INIT_POINTER(f->next, nfp);
- rcu_assign_pointer(*fp, f);
-
- *arg = f;
- return 0;
- }
- }
-
- /* No session found. Create new one. */
-
- err = -ENOBUFS;
- s = kzalloc(sizeof(struct rsvp_session), GFP_KERNEL);
- if (s == NULL)
- goto errout;
- memcpy(s->dst, dst, sizeof(s->dst));
-
- if (pinfo) {
- s->dpi = pinfo->dpi;
- s->protocol = pinfo->protocol;
- s->tunnelid = pinfo->tunnelid;
- }
- sp = &data->ht[h1];
- for (nsp = rtnl_dereference(*sp); nsp;
- sp = &nsp->next, nsp = rtnl_dereference(*sp)) {
- if ((nsp->dpi.mask & s->dpi.mask) != s->dpi.mask)
- break;
- }
- RCU_INIT_POINTER(s->next, nsp);
- rcu_assign_pointer(*sp, s);
-
- goto insert;
-
-errout:
- tcf_exts_destroy(&f->exts);
- kfree(f);
-errout2:
- tcf_exts_destroy(&e);
- return err;
-}
-
-static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
-{
- struct rsvp_head *head = rtnl_dereference(tp->root);
- unsigned int h, h1;
-
- if (arg->stop)
- return;
-
- for (h = 0; h < 256; h++) {
- struct rsvp_session *s;
-
- for (s = rtnl_dereference(head->ht[h]); s;
- s = rtnl_dereference(s->next)) {
- for (h1 = 0; h1 <= 16; h1++) {
- struct rsvp_filter *f;
-
- for (f = rtnl_dereference(s->ht[h1]); f;
- f = rtnl_dereference(f->next)) {
- if (arg->count < arg->skip) {
- arg->count++;
- continue;
- }
- if (arg->fn(tp, f, arg) < 0) {
- arg->stop = 1;
- return;
- }
- arg->count++;
- }
- }
- }
- }
-}
-
-static int rsvp_dump(struct net *net, struct tcf_proto *tp, void *fh,
- struct sk_buff *skb, struct tcmsg *t)
-{
- struct rsvp_filter *f = fh;
- struct rsvp_session *s;
- struct nlattr *nest;
- struct tc_rsvp_pinfo pinfo;
-
- if (f == NULL)
- return skb->len;
- s = f->sess;
-
- t->tcm_handle = f->handle;
-
- nest = nla_nest_start(skb, TCA_OPTIONS);
- if (nest == NULL)
- goto nla_put_failure;
-
- if (nla_put(skb, TCA_RSVP_DST, sizeof(s->dst), &s->dst))
- goto nla_put_failure;
- pinfo.dpi = s->dpi;
- pinfo.spi = f->spi;
- pinfo.protocol = s->protocol;
- pinfo.tunnelid = s->tunnelid;
- pinfo.tunnelhdr = f->tunnelhdr;
- pinfo.pad = 0;
- if (nla_put(skb, TCA_RSVP_PINFO, sizeof(pinfo), &pinfo))
- goto nla_put_failure;
- if (f->res.classid &&
- nla_put_u32(skb, TCA_RSVP_CLASSID, f->res.classid))
- goto nla_put_failure;
- if (((f->handle >> 8) & 0xFF) != 16 &&
- nla_put(skb, TCA_RSVP_SRC, sizeof(f->src), f->src))
- goto nla_put_failure;
-
- if (tcf_exts_dump(skb, &f->exts) < 0)
- goto nla_put_failure;
-
- nla_nest_end(skb, nest);
-
- if (tcf_exts_dump_stats(skb, &f->exts) < 0)
- goto nla_put_failure;
- return skb->len;
-
-nla_put_failure:
- nla_nest_cancel(skb, nest);
- return -1;
-}
-
-static void rsvp_bind_class(void *fh, u32 classid, unsigned long cl)
-{
- struct rsvp_filter *f = fh;
-
- if (f && f->res.classid == classid)
- f->res.class = cl;
-}
-
-static struct tcf_proto_ops RSVP_OPS __read_mostly = {
- .kind = RSVP_ID,
- .classify = rsvp_classify,
- .init = rsvp_init,
- .destroy = rsvp_destroy,
- .get = rsvp_get,
- .change = rsvp_change,
- .delete = rsvp_delete,
- .walk = rsvp_walk,
- .dump = rsvp_dump,
- .bind_class = rsvp_bind_class,
- .owner = THIS_MODULE,
-};
-
-static int __init init_rsvp(void)
-{
- return register_tcf_proto_ops(&RSVP_OPS);
-}
-
-static void __exit exit_rsvp(void)
-{
- unregister_tcf_proto_ops(&RSVP_OPS);
-}
-
-module_init(init_rsvp)
-module_exit(exit_rsvp)
diff --git a/net/sched/cls_rsvp6.c b/net/sched/cls_rsvp6.c
deleted file mode 100644
index dd08aea2aee5..000000000000
--- a/net/sched/cls_rsvp6.c
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * net/sched/cls_rsvp6.c Special RSVP packet classifier for IPv6.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- * Authors: Alexey Kuznetsov, <kuznet(a)ms2.inr.ac.ru>
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/errno.h>
-#include <linux/ipv6.h>
-#include <linux/skbuff.h>
-#include <net/act_api.h>
-#include <net/pkt_cls.h>
-#include <net/netlink.h>
-
-#define RSVP_DST_LEN 4
-#define RSVP_ID "rsvp6"
-#define RSVP_OPS cls_rsvp6_ops
-
-#include "cls_rsvp.h"
-MODULE_LICENSE("GPL");
--
2.25.1
2
1

06 Oct '23
The hikptool ub_dfx command is submitted for the first time.
This command can be used to dump register information of the LRB, PFA, and PM modules.
Signed-off-by: Jianqiang Li <lijianqiang16(a)huawei.com>
---
libhikptdev/include/hikptdev_plug.h | 1 +
net/hikp_net_lib.h | 9 +
net/ub/ub_dfx/hikp_ub_dfx.c | 319 ++++++++++++++++++++++++++++
net/ub/ub_dfx/hikp_ub_dfx.h | 100 +++++++++
tool_lib/tool_lib.h | 2 +-
5 files changed, 430 insertions(+), 1 deletion(-)
create mode 100644 net/ub/ub_dfx/hikp_ub_dfx.c
create mode 100644 net/ub/ub_dfx/hikp_ub_dfx.h
diff --git a/libhikptdev/include/hikptdev_plug.h b/libhikptdev/include/hikptdev_plug.h
index 42bea6b..56cea78 100644
--- a/libhikptdev/include/hikptdev_plug.h
+++ b/libhikptdev/include/hikptdev_plug.h
@@ -43,6 +43,7 @@ enum cmd_module_type {
MAC_MOD = 8,
DPDK_MOD = 9,
CXL_MOD = 10,
+ UB_MOD = 11,
};
void hikp_unlock(void);
diff --git a/net/hikp_net_lib.h b/net/hikp_net_lib.h
index cc99d0c..af0a51d 100644
--- a/net/hikp_net_lib.h
+++ b/net/hikp_net_lib.h
@@ -98,6 +98,15 @@ enum roce_cmd_type {
GET_ROCEE_TSP_CMD,
};
+enum ub_cmd_type {
+ GET_UNIC_PPP_CMD = 0x1,
+ GET_UB_DFX_INFO_CMD,
+ GET_UB_LINK_INFO_CMD,
+ GET_UB_BP_INFO_CMD,
+ GET_UB_CRD_INFO_CMD,
+ GET_UB_BASIC_INFO_CMD,
+};
+
#define HIKP_MAX_PF_NUM 8
#define HIKP_NIC_MAX_FUNC_NUM 256
diff --git a/net/ub/ub_dfx/hikp_ub_dfx.c b/net/ub/ub_dfx/hikp_ub_dfx.c
new file mode 100644
index 0000000..c50f555
--- /dev/null
+++ b/net/ub/ub_dfx/hikp_ub_dfx.c
@@ -0,0 +1,319 @@
+/*
+ * Copyright (c) 2023 Hisilicon Technologies Co., Ltd.
+ * Hikptool is licensed under Mulan PSL v2.
+ * You can use this software according to the terms and conditions of the Mulan PSL v2.
+ * You may obtain a copy of Mulan PSL v2 at:
+ * http://license.coscl.org.cn/MulanPSL2
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
+ * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
+ * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
+ *
+ * See the Mulan PSL v2 for more details.
+ */
+
+#include "tool_cmd.h"
+#include "hikp_net_lib.h"
+#include "hikp_ub_dfx.h"
+
+struct ub_dfx_param g_ub_dfx_param = { 0 };
+
+static const struct dfx_module_cmd g_ub_dfx_module_parse[] = {
+ {"LRB", LRB_DFX_REG_DUMP},
+ {"PFA", PFA_DFX_REG_DUMP},
+ {"PM", PM_DFX_REG_DUMP}
+};
+
+static const struct dfx_type_parse g_dfx_type_parse[] = {
+ {INCORRECT_REG_TYPE, WIDTH_32_BIT, "INCORRECT TYPE"},
+ {TYPE_32_STATS, WIDTH_32_BIT, "32 bit statistics"},
+ {TYPE_32_RUNNING_STATUS, WIDTH_32_BIT, "32 bit running status"},
+ {TYPE_64_STATS, WIDTH_64_BIT, "64 bit statistics"},
+};
+
+static void dfx_help_info(const struct major_cmd_ctrl *self)
+{
+ printf("\n Usage: %s %s\n", self->cmd_ptr->name, "-i <interface>\n");
+ printf("\n %s\n", self->cmd_ptr->help_info);
+ printf(" Options:\n\n");
+ printf(" %s, %-25s %s\n", "-h", "--help", "display this help and exit");
+ printf(" %s, %-25s %s\n", "-i", "--interface=<interface>",
+ "device target or bdf id, e.g. ubn0 or 0000:35:00.0");
+ printf(" %s\n", " [-m/--module LRB/PFA/PM] : this is necessary param\n");
+}
+
+static int hikp_ub_dfx_help(struct major_cmd_ctrl *self, const char *argv)
+{
+ dfx_help_info(self);
+ return 0;
+}
+
+static int hikp_ub_dfx_target(struct major_cmd_ctrl *self, const char *argv)
+{
+ self->err_no = tool_check_and_get_valid_bdf_id(argv, &(g_ub_dfx_param.target));
+ if (self->err_no != 0) {
+ snprintf(self->err_str, sizeof(self->err_str), "Unknown device %s.", argv);
+ return self->err_no;
+ }
+
+ return 0;
+}
+
+static int hikp_ub_dfx_module_select(struct major_cmd_ctrl *self, const char *argv)
+{
+ size_t arr_size = HIKP_ARRAY_SIZE(g_ub_dfx_module_parse);
+ bool is_found;
+ size_t i;
+
+ for (i = 0; i < arr_size; i++) {
+ is_found = strncmp(argv, (const char *)g_ub_dfx_module_parse[i].module_name,
+ sizeof(g_ub_dfx_module_parse[i].module_name)) == 0;
+ if (is_found) {
+ g_ub_dfx_param.sub_cmd_code = g_ub_dfx_module_parse[i].sub_cmd_code;
+ g_ub_dfx_param.module_idx = i;
+ g_ub_dfx_param.flag |= MODULE_SET_FLAG;
+ return 0;
+ }
+ }
+ dfx_help_info(self);
+ snprintf(self->err_str, sizeof(self->err_str), "-m/--module param error!!!");
+ self->err_no = -EINVAL;
+
+ return -EINVAL;
+}
+
+static int hikp_ub_dfx_get_blk_data(struct hikp_cmd_ret **cmd_ret,
+ uint32_t blk_id, uint32_t sub_cmd_code)
+{
+ struct hikp_cmd_header req_header = { 0 };
+ struct ub_dfx_req_para req_data = { 0 };
+
+ req_data.bdf = g_ub_dfx_param.target.bdf;
+ req_data.block_id = blk_id;
+ hikp_cmd_init(&req_header, UB_MOD, GET_UB_DFX_INFO_CMD, sub_cmd_code);
+ *cmd_ret = hikp_cmd_alloc(&req_header, &req_data, sizeof(req_data));
+
+ return hikp_rsp_normal_check(*cmd_ret);
+}
+
+static int hikp_ub_get_first_blk_dfx(struct ub_dfx_rsp_head *rsp_head, uint32_t **reg_data,
+ uint32_t *max_dfx_size, uint32_t *version)
+{
+ struct ub_dfx_rsp *dfx_rsp = NULL;
+ struct hikp_cmd_ret *cmd_ret;
+ int ret;
+
+ ret = hikp_ub_dfx_get_blk_data(&cmd_ret, 0, g_ub_dfx_param.sub_cmd_code);
+ if (ret < 0)
+ goto err_out;
+
+ dfx_rsp = (struct ub_dfx_rsp *)(cmd_ret->rsp_data);
+ *version = cmd_ret->version;
+ *rsp_head = dfx_rsp->rsp_head;
+ if (rsp_head->total_blk_num == 0) {
+ /* if total block number is zero, set total type number to zero anyway */
+ rsp_head->total_type_num = 0;
+ goto err_out;
+ }
+ *max_dfx_size = (uint32_t)(rsp_head->total_blk_num * MAX_DFX_DATA_NUM * sizeof(uint32_t));
+ *reg_data = (uint32_t *)calloc(1, *max_dfx_size);
+ if (*reg_data == NULL) {
+ HIKP_ERROR_PRINT("malloc log memory 0x%x failed.\n", *max_dfx_size);
+ ret = -ENOMEM;
+ goto err_out;
+ }
+
+ if (rsp_head->cur_blk_size > *max_dfx_size) {
+ free(*reg_data);
+ *reg_data = NULL;
+ HIKP_ERROR_PRINT("blk0 reg_data copy size error, data size: 0x%x, max size: 0x%x\n",
+ rsp_head->cur_blk_size, *max_dfx_size);
+ ret = -EINVAL;
+ goto err_out;
+ }
+ memcpy(*reg_data, dfx_rsp->reg_data, rsp_head->cur_blk_size);
+
+ *max_dfx_size -= (uint32_t)rsp_head->cur_blk_size;
+err_out:
+ free(cmd_ret);
+ cmd_ret = NULL;
+
+ return ret;
+}
+
+static int hikp_ub_get_blk_dfx(struct ub_dfx_rsp_head *rsp_head, uint32_t blk_id,
+ uint32_t *reg_data, uint32_t *max_dfx_size)
+{
+ struct ub_dfx_rsp *dfx_rsp = NULL;
+ struct hikp_cmd_ret *cmd_ret;
+ int ret;
+
+ ret = hikp_ub_dfx_get_blk_data(&cmd_ret, blk_id, g_ub_dfx_param.sub_cmd_code);
+ if (ret < 0)
+ goto err_out;
+
+ dfx_rsp = (struct ub_dfx_rsp *)(cmd_ret->rsp_data);
+ *rsp_head = dfx_rsp->rsp_head;
+ if (rsp_head->cur_blk_size > *max_dfx_size) {
+ HIKP_ERROR_PRINT("blk%u reg_data copy size error, "
+ "data size: 0x%x, max size: 0x%x\n",
+ blk_id, rsp_head->cur_blk_size, *max_dfx_size);
+ ret = -EINVAL;
+ goto err_out;
+ }
+ memcpy(reg_data, dfx_rsp->reg_data, rsp_head->cur_blk_size);
+ *max_dfx_size -= (uint32_t)rsp_head->cur_blk_size;
+
+err_out:
+ free(cmd_ret);
+ cmd_ret = NULL;
+
+ return ret;
+}
+
+static bool is_type_found(uint16_t type_id, uint32_t *index)
+{
+ size_t arr_size = HIKP_ARRAY_SIZE(g_dfx_type_parse);
+ size_t i;
+
+ for (i = 0; i < arr_size; i++) {
+ if (g_dfx_type_parse[i].type_id == type_id) {
+ *index = i;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static void hikp_ub_dfx_print_type_head(uint8_t type_id, uint8_t *last_type_id)
+{
+ uint32_t index = 0;
+
+ if (type_id != *last_type_id) {
+ printf("-----------------------------------------------------\n");
+ if (is_type_found(type_id, &index))
+ printf("type name: %s\n\n", g_dfx_type_parse[index].type_name);
+ else
+ HIKP_WARN_PRINT("type name: unknown type, type id is %u\n\n", type_id);
+
+ *last_type_id = type_id;
+ }
+}
+
+static void hikp_ub_dfx_print_b32(uint32_t num, uint32_t *reg_data)
+{
+ uint32_t word_num = num * WORD_NUM_PER_REG;
+ uint16_t offset;
+ uint32_t value;
+ uint32_t index;
+ uint32_t i;
+
+ for (i = 0, index = 1; i < word_num; i = i + WORD_NUM_PER_REG, index++) {
+ offset = (uint16_t)HI_GET_BITFIELD(reg_data[i], 0, DFX_REG_ADDR_MASK);
+ value = reg_data[i + 1];
+ printf("%03u: 0x%04x\t0x%08x\n", index, offset, value);
+ }
+}
+
+static void hikp_ub_dfx_print_b64(uint32_t num, uint32_t *reg_data)
+{
+ uint32_t word_num = num * WORD_NUM_PER_REG;
+ uint16_t offset;
+ uint64_t value;
+ uint32_t index;
+ uint32_t i;
+
+ for (i = 0, index = 1; i < word_num; i = i + WORD_NUM_PER_REG, index++) {
+ offset = (uint16_t)HI_GET_BITFIELD(reg_data[i], 0, DFX_REG_ADDR_MASK);
+ value = (uint64_t)reg_data[i + 1] |
+ (HI_GET_BITFIELD((uint64_t)reg_data[i], DFX_REG_VALUE_OFF,
+ DFX_REG_VALUE_MASK) << BIT_NUM_OF_WORD);
+ printf("%03u: 0x%04x\t0x%016lx\n", index, offset, value);
+ }
+}
+
+static void hikp_ub_dfx_print(const struct ub_dfx_rsp_head *rsp_head, uint32_t *reg_data)
+{
+ struct ub_dfx_type_head *type_head;
+ uint8_t last_type_id = 0;
+ uint32_t *ptr = reg_data;
+ uint8_t i;
+
+ printf("****************** module %s reg dump start ********************\n",
+ g_ub_dfx_module_parse[g_ub_dfx_param.module_idx].module_name);
+ for (i = 0; i < rsp_head->total_type_num; i++) {
+ type_head = (struct ub_dfx_type_head *)ptr;
+ if (type_head->type_id == INCORRECT_REG_TYPE) {
+ HIKP_ERROR_PRINT("No.%u type is incorrect reg type\n", i + 1u);
+ break;
+ }
+ hikp_ub_dfx_print_type_head(type_head->type_id, &last_type_id);
+ ptr++;
+ if (type_head->bit_width == WIDTH_32_BIT) {
+ hikp_ub_dfx_print_b32((uint32_t)type_head->reg_num, ptr);
+ } else if (type_head->bit_width == WIDTH_64_BIT) {
+ hikp_ub_dfx_print_b64((uint32_t)type_head->reg_num, ptr);
+ } else {
+ HIKP_ERROR_PRINT("type%u's bit width error.\n", type_head->type_id);
+ break;
+ }
+ ptr += (uint32_t)type_head->reg_num * WORD_NUM_PER_REG;
+ }
+ printf("################### ====== dump end ====== ######################\n");
+}
+
+static void hikp_ub_dfx_execute(struct major_cmd_ctrl *self)
+{
+ struct ub_dfx_rsp_head rsp_head = { 0 };
+ struct ub_dfx_rsp_head tmp_head = { 0 };
+ uint32_t *reg_data = NULL;
+ uint32_t max_dfx_size = 0;
+ uint32_t real_reg_size;
+ uint32_t version;
+ uint32_t i;
+
+ if (!(g_ub_dfx_param.flag & MODULE_SET_FLAG)) {
+ self->err_no = -EINVAL;
+ snprintf(self->err_str, sizeof(self->err_str), "Please specify a module.");
+ dfx_help_info(self);
+ return;
+ }
+
+ self->err_no = hikp_ub_get_first_blk_dfx(&rsp_head, ®_data, &max_dfx_size, &version);
+ if (self->err_no != 0) {
+ snprintf(self->err_str, sizeof(self->err_str), "get the first block dfx fail.");
+ return;
+ }
+ real_reg_size = (uint32_t)rsp_head.cur_blk_size;
+ for (i = 1; i < rsp_head.total_blk_num; i++) {
+ self->err_no = hikp_ub_get_blk_dfx(&tmp_head, i,
+ reg_data + (real_reg_size / sizeof(uint32_t)),
+ &max_dfx_size);
+ if (self->err_no != 0) {
+ snprintf(self->err_str, sizeof(self->err_str),
+ "getting block%u reg fail.", i);
+ free(reg_data);
+ return;
+ }
+ real_reg_size += (uint32_t)tmp_head.cur_blk_size;
+ memset(&tmp_head, 0, sizeof(struct ub_dfx_rsp_head));
+ }
+
+ printf("DFX cmd version: 0x%x\n\n", version);
+ hikp_ub_dfx_print((const struct ub_dfx_rsp_head *)&rsp_head, reg_data);
+ free(reg_data);
+}
+
+static void cmd_ub_dfx_init(void)
+{
+ struct major_cmd_ctrl *major_cmd = get_major_cmd();
+
+ major_cmd->option_count = 0;
+ major_cmd->execute = hikp_ub_dfx_execute;
+
+ cmd_option_register("-h", "--help", false, hikp_ub_dfx_help);
+ cmd_option_register("-i", "--interface", true, hikp_ub_dfx_target);
+ cmd_option_register("-m", "--module", true, hikp_ub_dfx_module_select);
+}
+
+HIKP_CMD_DECLARE("ub_dfx", "dump ub dfx info of hardware", cmd_ub_dfx_init);
diff --git a/net/ub/ub_dfx/hikp_ub_dfx.h b/net/ub/ub_dfx/hikp_ub_dfx.h
new file mode 100644
index 0000000..4ba37a1
--- /dev/null
+++ b/net/ub/ub_dfx/hikp_ub_dfx.h
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2023 Hisilicon Technologies Co., Ltd.
+ * Hikptool is licensed under Mulan PSL v2.
+ * You can use this software according to the terms and conditions of the Mulan PSL v2.
+ * You may obtain a copy of Mulan PSL v2 at:
+ * http://license.coscl.org.cn/MulanPSL2
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
+ * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
+ * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
+ *
+ * See the Mulan PSL v2 for more details.
+ */
+
+#ifndef HIKP_UB_DFX_H
+#define HIKP_UB_DFX_H
+
+#include "hikp_net_lib.h"
+
+#define MAX_DFX_DATA_NUM 59
+#define MODULE_SET_FLAG 0x1
+
+enum ub_dfx_cmd_type {
+ LRB_DFX_REG_DUMP = 0,
+ PFA_DFX_REG_DUMP = 1,
+ PM_DFX_REG_DUMP = 2,
+ INVALID_MODULE = 0xFFFFFFFF,
+};
+
+enum ub_dfx_reg_type {
+ INCORRECT_REG_TYPE = 0,
+ TYPE_32_STATS = 1,
+ TYPE_32_RUNNING_STATUS = 2,
+ TYPE_64_STATS = 3,
+ TYPE_INVALID = 255,
+};
+
+#define MAX_TYPE_NAME_LEN 40
+
+enum ub_dfx_reg_width {
+ WIDTH_32_BIT = 32,
+ WIDTH_64_BIT = 64,
+};
+
+struct dfx_type_parse {
+ uint8_t type_id;
+ uint8_t bit_width;
+ uint8_t type_name[MAX_TYPE_NAME_LEN];
+};
+
+struct ub_dfx_param {
+ struct tool_target target;
+ uint32_t sub_cmd_code;
+ uint8_t module_idx;
+ uint8_t flag;
+};
+
+#define MAX_MODULE_NAME_LEN 20
+struct dfx_module_cmd {
+ uint8_t module_name[MAX_MODULE_NAME_LEN];
+ uint32_t sub_cmd_code;
+};
+
+struct ub_dfx_req_para {
+ struct bdf_t bdf;
+ uint8_t block_id;
+};
+
+struct ub_dfx_type_head {
+ uint8_t type_id;
+ uint8_t bit_width;
+ uint8_t reg_num;
+ uint8_t flag;
+};
+
+struct ub_dfx_rsp_head {
+ uint8_t total_blk_num;
+ uint8_t total_type_num;
+ uint8_t cur_blk_size;
+ uint8_t rsvd;
+};
+
+/*********************************************************
+ * All registers are returned as key-value pairs, and divided
+ * into three groups of data.
+ * 1. 32bit regs: R0 bit0~bit15: offset, R1 bit0~bit31: value
+ * 2. 64bit regs: R0 bit0~bit15: offset, R0 bit16~bit31 high16 value, R1 bit0~bit31: low32 value
+ *********************************************************/
+#define DFX_REG_VALUE_OFF 16
+#define DFX_REG_VALUE_MASK 0xFFFF
+#define DFX_REG_ADDR_MASK 0xFFFF
+
+#define WORD_NUM_PER_REG 2
+#define BIT_NUM_OF_WORD 32
+
+struct ub_dfx_rsp {
+ struct ub_dfx_rsp_head rsp_head;
+ uint32_t reg_data[MAX_DFX_DATA_NUM];
+};
+
+#endif /* HIKP_UB_DFX_H */
diff --git a/tool_lib/tool_lib.h b/tool_lib/tool_lib.h
index b211175..bf37465 100644
--- a/tool_lib/tool_lib.h
+++ b/tool_lib/tool_lib.h
@@ -18,7 +18,7 @@
#define TOOL_NAME "hikptool"
-#define TOOL_VER "1.0.14"
+#define TOOL_VER "1.0.15"
#define HI_GET_BITFIELD(value, start, mask) (((value) >> (start)) & (mask))
#define HI_SET_FIELD(origin, shift, val) ((origin) |= (val) << (shift))
--
2.36.1.windows.1
2
21