Signed-off-by: Jialin Zhang zhangjialin11@huawei.com --- ...NULL-vs-IS_ERR-checking-in-memory_ti.patch | 50 +++++ ...-x86-mm-Randomize-per-cpu-entry-area.patch | 172 ++++++++++++++++++ ...ap-shadow-for-percpu-pages-on-demand.patch | 126 +++++++++++++ ...-physical-address-for-every-page-of-.patch | 50 +++++ ...KASAN-shadow-for-entire-per-CPU-rang.patch | 121 ++++++++++++ ...-local-CPU_ENTRY_AREA-variables-to-s.patch | 88 +++++++++ ...lpers-to-align-shadow-addresses-up-a.patch | 113 ++++++++++++ ...te-shadow-for-shared-chunk-of-the-CP.patch | 99 ++++++++++ ...rred-better-wording-on-protection-ag.patch | 97 ++++++++++ ...he-backlog-for-nested-calls-to-mirre.patch | 149 +++++++++++++++ kernel.spec | 27 ++- 11 files changed, 1090 insertions(+), 2 deletions(-) create mode 100644 0015-mm-demotion-fix-NULL-vs-IS_ERR-checking-in-memory_ti.patch create mode 100644 0016-x86-mm-Randomize-per-cpu-entry-area.patch create mode 100644 0017-x86-kasan-Map-shadow-for-percpu-pages-on-demand.patch create mode 100644 0018-x86-mm-Recompute-physical-address-for-every-page-of-.patch create mode 100644 0019-x86-mm-Populate-KASAN-shadow-for-entire-per-CPU-rang.patch create mode 100644 0020-x86-kasan-Rename-local-CPU_ENTRY_AREA-variables-to-s.patch create mode 100644 0021-x86-kasan-Add-helpers-to-align-shadow-addresses-up-a.patch create mode 100644 0022-x86-kasan-Populate-shadow-for-shared-chunk-of-the-CP.patch create mode 100644 0023-net-sched-act_mirred-better-wording-on-protection-ag.patch create mode 100644 0024-act_mirred-use-the-backlog-for-nested-calls-to-mirre.patch
diff --git a/0015-mm-demotion-fix-NULL-vs-IS_ERR-checking-in-memory_ti.patch b/0015-mm-demotion-fix-NULL-vs-IS_ERR-checking-in-memory_ti.patch new file mode 100644 index 0000000..f598fc5 --- /dev/null +++ b/0015-mm-demotion-fix-NULL-vs-IS_ERR-checking-in-memory_ti.patch @@ -0,0 +1,50 @@ +From e11c121e73d4e98ed13259d6b19830f33ca60d76 Mon Sep 17 00:00:00 2001 +From: Miaoqian Lin linmq006@gmail.com +Date: Fri, 17 Mar 2023 10:05:08 +0800 +Subject: [PATCH 15/24] mm/demotion: fix NULL vs IS_ERR checking in + memory_tier_init + +mainline inclusion +from mainline-v6.2-rc1 +commit 4a625ceee8a0ab0273534cb6b432ce6b331db5ee +category: bugfix +bugzilla: https://gitee.com/src-openeuler/kernel/issues/I6IXO8 +CVE: CVE-2023-23005 + +Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i... + +-------------------------------- + +alloc_memory_type() returns error pointers on error instead of NULL. Use +IS_ERR() to check the return value to fix this. + +Link: https://lkml.kernel.org/r/20221110030751.1627266-1-linmq006@gmail.com +Fixes: 7b88bda3761b ("mm/demotion/dax/kmem: set node's abstract distance to MEMTIER_DEFAULT_DAX_ADISTANCE") +Signed-off-by: Miaoqian Lin linmq006@gmail.com +Reviewed-by: "Huang, Ying" ying.huang@intel.com +Cc: Aneesh Kumar K.V aneesh.kumar@linux.ibm.com +Cc: Wei Xu weixugc@google.com +Signed-off-by: Andrew Morton akpm@linux-foundation.org +Signed-off-by: Ma Wupeng mawupeng1@huawei.com +Reviewed-by: tong tiangen tongtiangen@huawei.com +Signed-off-by: Jialin Zhang zhangjialin11@huawei.com +--- + mm/memory-tiers.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c +index ba863f46759d..96022973c9ba 100644 +--- a/mm/memory-tiers.c ++++ b/mm/memory-tiers.c +@@ -645,7 +645,7 @@ static int __init memory_tier_init(void) + * than default DRAM tier. + */ + default_dram_type = alloc_memory_type(MEMTIER_ADISTANCE_DRAM); +- if (!default_dram_type) ++ if (IS_ERR(default_dram_type)) + panic("%s() failed to allocate default DRAM tier\n", __func__); + + /* +-- +2.25.1 + diff --git a/0016-x86-mm-Randomize-per-cpu-entry-area.patch b/0016-x86-mm-Randomize-per-cpu-entry-area.patch new file mode 100644 index 0000000..6f5dd2d --- /dev/null +++ b/0016-x86-mm-Randomize-per-cpu-entry-area.patch @@ -0,0 +1,172 @@ +From 0324d3cd1b57c06b0cf31b6db643ced5b29b0947 Mon Sep 17 00:00:00 2001 +From: Peter Zijlstra peterz@infradead.org +Date: Fri, 17 Mar 2023 03:07:41 +0000 +Subject: [PATCH 16/24] x86/mm: Randomize per-cpu entry area + +mainline inclusion +from mainline-v6.2-rc1 +commit 97e3d26b5e5f371b3ee223d94dd123e6c442ba80 +category: bugfix +bugzilla: 188336 +CVE: CVE-2023-0597 + +Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i... + +-------------------------------- + +Seth found that the CPU-entry-area; the piece of per-cpu data that is +mapped into the userspace page-tables for kPTI is not subject to any +randomization -- irrespective of kASLR settings. + +On x86_64 a whole P4D (512 GB) of virtual address space is reserved for +this structure, which is plenty large enough to randomize things a +little. + +As such, use a straight forward randomization scheme that avoids +duplicates to spread the existing CPUs over the available space. + + [ bp: Fix le build. ] + +Reported-by: Seth Jenkins sethjenkins@google.com +Reviewed-by: Kees Cook keescook@chromium.org +Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org +Signed-off-by: Dave Hansen dave.hansen@linux.intel.com +Signed-off-by: Borislav Petkov bp@suse.de +Signed-off-by: Tong Tiangen tongtiangen@huawei.com +Reviewed-by: Nanyong Sun sunnanyong@huawei.com +Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com +Signed-off-by: Jialin Zhang zhangjialin11@huawei.com +--- + arch/x86/include/asm/cpu_entry_area.h | 4 --- + arch/x86/include/asm/pgtable_areas.h | 8 ++++- + arch/x86/kernel/hw_breakpoint.c | 2 +- + arch/x86/mm/cpu_entry_area.c | 46 ++++++++++++++++++++++++--- + 4 files changed, 50 insertions(+), 10 deletions(-) + +diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h +index 75efc4c6f076..462fc34f1317 100644 +--- a/arch/x86/include/asm/cpu_entry_area.h ++++ b/arch/x86/include/asm/cpu_entry_area.h +@@ -130,10 +130,6 @@ struct cpu_entry_area { + }; + + #define CPU_ENTRY_AREA_SIZE (sizeof(struct cpu_entry_area)) +-#define CPU_ENTRY_AREA_ARRAY_SIZE (CPU_ENTRY_AREA_SIZE * NR_CPUS) +- +-/* Total size includes the readonly IDT mapping page as well: */ +-#define CPU_ENTRY_AREA_TOTAL_SIZE (CPU_ENTRY_AREA_ARRAY_SIZE + PAGE_SIZE) + + DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area); + DECLARE_PER_CPU(struct cea_exception_stacks *, cea_exception_stacks); +diff --git a/arch/x86/include/asm/pgtable_areas.h b/arch/x86/include/asm/pgtable_areas.h +index d34cce1b995c..4f056fb88174 100644 +--- a/arch/x86/include/asm/pgtable_areas.h ++++ b/arch/x86/include/asm/pgtable_areas.h +@@ -11,6 +11,12 @@ + + #define CPU_ENTRY_AREA_RO_IDT_VADDR ((void *)CPU_ENTRY_AREA_RO_IDT) + +-#define CPU_ENTRY_AREA_MAP_SIZE (CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_ARRAY_SIZE - CPU_ENTRY_AREA_BASE) ++#ifdef CONFIG_X86_32 ++#define CPU_ENTRY_AREA_MAP_SIZE (CPU_ENTRY_AREA_PER_CPU + \ ++ (CPU_ENTRY_AREA_SIZE * NR_CPUS) - \ ++ CPU_ENTRY_AREA_BASE) ++#else ++#define CPU_ENTRY_AREA_MAP_SIZE P4D_SIZE ++#endif + + #endif /* _ASM_X86_PGTABLE_AREAS_H */ +diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c +index 668a4a6533d9..bbb0f737aab1 100644 +--- a/arch/x86/kernel/hw_breakpoint.c ++++ b/arch/x86/kernel/hw_breakpoint.c +@@ -266,7 +266,7 @@ static inline bool within_cpu_entry(unsigned long addr, unsigned long end) + + /* CPU entry erea is always used for CPU entry */ + if (within_area(addr, end, CPU_ENTRY_AREA_BASE, +- CPU_ENTRY_AREA_TOTAL_SIZE)) ++ CPU_ENTRY_AREA_MAP_SIZE)) + return true; + + /* +diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c +index 6c2f1b76a0b6..20844cf141fb 100644 +--- a/arch/x86/mm/cpu_entry_area.c ++++ b/arch/x86/mm/cpu_entry_area.c +@@ -15,16 +15,53 @@ static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage) + #ifdef CONFIG_X86_64 + static DEFINE_PER_CPU_PAGE_ALIGNED(struct exception_stacks, exception_stacks); + DEFINE_PER_CPU(struct cea_exception_stacks*, cea_exception_stacks); +-#endif + +-#ifdef CONFIG_X86_32 ++static DEFINE_PER_CPU_READ_MOSTLY(unsigned long, _cea_offset); ++ ++static __always_inline unsigned int cea_offset(unsigned int cpu) ++{ ++ return per_cpu(_cea_offset, cpu); ++} ++ ++static __init void init_cea_offsets(void) ++{ ++ unsigned int max_cea; ++ unsigned int i, j; ++ ++ max_cea = (CPU_ENTRY_AREA_MAP_SIZE - PAGE_SIZE) / CPU_ENTRY_AREA_SIZE; ++ ++ /* O(sodding terrible) */ ++ for_each_possible_cpu(i) { ++ unsigned int cea; ++ ++again: ++ cea = prandom_u32_max(max_cea); ++ ++ for_each_possible_cpu(j) { ++ if (cea_offset(j) == cea) ++ goto again; ++ ++ if (i == j) ++ break; ++ } ++ ++ per_cpu(_cea_offset, i) = cea; ++ } ++} ++#else /* !X86_64 */ + DECLARE_PER_CPU_PAGE_ALIGNED(struct doublefault_stack, doublefault_stack); ++ ++static __always_inline unsigned int cea_offset(unsigned int cpu) ++{ ++ return cpu; ++} ++static inline void init_cea_offsets(void) { } + #endif + + /* Is called from entry code, so must be noinstr */ + noinstr struct cpu_entry_area *get_cpu_entry_area(int cpu) + { +- unsigned long va = CPU_ENTRY_AREA_PER_CPU + cpu * CPU_ENTRY_AREA_SIZE; ++ unsigned long va = CPU_ENTRY_AREA_PER_CPU + cea_offset(cpu) * CPU_ENTRY_AREA_SIZE; + BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0); + + return (struct cpu_entry_area *) va; +@@ -205,7 +242,6 @@ static __init void setup_cpu_entry_area_ptes(void) + + /* The +1 is for the readonly IDT: */ + BUILD_BUG_ON((CPU_ENTRY_AREA_PAGES+1)*PAGE_SIZE != CPU_ENTRY_AREA_MAP_SIZE); +- BUILD_BUG_ON(CPU_ENTRY_AREA_TOTAL_SIZE != CPU_ENTRY_AREA_MAP_SIZE); + BUG_ON(CPU_ENTRY_AREA_BASE & ~PMD_MASK); + + start = CPU_ENTRY_AREA_BASE; +@@ -221,6 +257,8 @@ void __init setup_cpu_entry_areas(void) + { + unsigned int cpu; + ++ init_cea_offsets(); ++ + setup_cpu_entry_area_ptes(); + + for_each_possible_cpu(cpu) +-- +2.25.1 + diff --git a/0017-x86-kasan-Map-shadow-for-percpu-pages-on-demand.patch b/0017-x86-kasan-Map-shadow-for-percpu-pages-on-demand.patch new file mode 100644 index 0000000..b9129b6 --- /dev/null +++ b/0017-x86-kasan-Map-shadow-for-percpu-pages-on-demand.patch @@ -0,0 +1,126 @@ +From 68992563c4b6b1776bd90dafe76caa88ff6dbfe8 Mon Sep 17 00:00:00 2001 +From: Andrey Ryabinin ryabinin.a.a@gmail.com +Date: Fri, 17 Mar 2023 03:07:42 +0000 +Subject: [PATCH 17/24] x86/kasan: Map shadow for percpu pages on demand + +mainline inclusion +from mainline-v6.2-rc1 +commit 3f148f3318140035e87decc1214795ff0755757b +category: bugfix +bugzilla: 188336 +CVE: CVE-2023-0597 + +Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i... + +-------------------------------- + +KASAN maps shadow for the entire CPU-entry-area: + [CPU_ENTRY_AREA_BASE, CPU_ENTRY_AREA_BASE + CPU_ENTRY_AREA_MAP_SIZE] + +This will explode once the per-cpu entry areas are randomized since it +will increase CPU_ENTRY_AREA_MAP_SIZE to 512 GB and KASAN fails to +allocate shadow for such big area. + +Fix this by allocating KASAN shadow only for really used cpu entry area +addresses mapped by cea_map_percpu_pages() + +Thanks to the 0day folks for finding and reporting this to be an issue. + +[ dhansen: tweak changelog since this will get committed before peterz's + actual cpu-entry-area randomization ] + +Signed-off-by: Andrey Ryabinin ryabinin.a.a@gmail.com +Signed-off-by: Dave Hansen dave.hansen@linux.intel.com +Tested-by: Yujie Liu yujie.liu@intel.com +Cc: kernel test robot yujie.liu@intel.com +Link: https://lore.kernel.org/r/202210241508.2e203c3d-yujie.liu@intel.com +Signed-off-by: Tong Tiangen tongtiangen@huawei.com +Reviewed-by: Nanyong Sun sunnanyong@huawei.com +Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com +Signed-off-by: Jialin Zhang zhangjialin11@huawei.com +--- + arch/x86/include/asm/kasan.h | 3 +++ + arch/x86/mm/cpu_entry_area.c | 8 +++++++- + arch/x86/mm/kasan_init_64.c | 15 ++++++++++++--- + 3 files changed, 22 insertions(+), 4 deletions(-) + +diff --git a/arch/x86/include/asm/kasan.h b/arch/x86/include/asm/kasan.h +index 13e70da38bed..de75306b932e 100644 +--- a/arch/x86/include/asm/kasan.h ++++ b/arch/x86/include/asm/kasan.h +@@ -28,9 +28,12 @@ + #ifdef CONFIG_KASAN + void __init kasan_early_init(void); + void __init kasan_init(void); ++void __init kasan_populate_shadow_for_vaddr(void *va, size_t size, int nid); + #else + static inline void kasan_early_init(void) { } + static inline void kasan_init(void) { } ++static inline void kasan_populate_shadow_for_vaddr(void *va, size_t size, ++ int nid) { } + #endif + + #endif +diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c +index 20844cf141fb..dff9001e5e12 100644 +--- a/arch/x86/mm/cpu_entry_area.c ++++ b/arch/x86/mm/cpu_entry_area.c +@@ -9,6 +9,7 @@ + #include <asm/cpu_entry_area.h> + #include <asm/fixmap.h> + #include <asm/desc.h> ++#include <asm/kasan.h> + + static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage); + +@@ -90,8 +91,13 @@ void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags) + static void __init + cea_map_percpu_pages(void *cea_vaddr, void *ptr, int pages, pgprot_t prot) + { ++ phys_addr_t pa = per_cpu_ptr_to_phys(ptr); ++ ++ kasan_populate_shadow_for_vaddr(cea_vaddr, pages * PAGE_SIZE, ++ early_pfn_to_nid(PFN_DOWN(pa))); ++ + for ( ; pages; pages--, cea_vaddr+= PAGE_SIZE, ptr += PAGE_SIZE) +- cea_set_pte(cea_vaddr, per_cpu_ptr_to_phys(ptr), prot); ++ cea_set_pte(cea_vaddr, pa, prot); + } + + static void __init percpu_setup_debug_store(unsigned int cpu) +diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c +index e7b9b464a82f..d1416926ad52 100644 +--- a/arch/x86/mm/kasan_init_64.c ++++ b/arch/x86/mm/kasan_init_64.c +@@ -316,6 +316,18 @@ void __init kasan_early_init(void) + kasan_map_early_shadow(init_top_pgt); + } + ++void __init kasan_populate_shadow_for_vaddr(void *va, size_t size, int nid) ++{ ++ unsigned long shadow_start, shadow_end; ++ ++ shadow_start = (unsigned long)kasan_mem_to_shadow(va); ++ shadow_start = round_down(shadow_start, PAGE_SIZE); ++ shadow_end = (unsigned long)kasan_mem_to_shadow(va + size); ++ shadow_end = round_up(shadow_end, PAGE_SIZE); ++ ++ kasan_populate_shadow(shadow_start, shadow_end, nid); ++} ++ + void __init kasan_init(void) + { + int i; +@@ -393,9 +405,6 @@ void __init kasan_init(void) + kasan_mem_to_shadow((void *)VMALLOC_END + 1), + shadow_cpu_entry_begin); + +- kasan_populate_shadow((unsigned long)shadow_cpu_entry_begin, +- (unsigned long)shadow_cpu_entry_end, 0); +- + kasan_populate_early_shadow(shadow_cpu_entry_end, + kasan_mem_to_shadow((void *)__START_KERNEL_map)); + +-- +2.25.1 + diff --git a/0018-x86-mm-Recompute-physical-address-for-every-page-of-.patch b/0018-x86-mm-Recompute-physical-address-for-every-page-of-.patch new file mode 100644 index 0000000..2696052 --- /dev/null +++ b/0018-x86-mm-Recompute-physical-address-for-every-page-of-.patch @@ -0,0 +1,50 @@ +From 12867b242d6e431f6f947e53abd1094cd0075b55 Mon Sep 17 00:00:00 2001 +From: Sean Christopherson seanjc@google.com +Date: Fri, 17 Mar 2023 03:07:43 +0000 +Subject: [PATCH 18/24] x86/mm: Recompute physical address for every page of + per-CPU CEA mapping + +mainline inclusion +from mainline-v6.2-rc1 +commit 80d72a8f76e8f3f0b5a70b8c7022578e17bde8e7 +category: bugfix +bugzilla: 188336 +CVE: CVE-2023-0597 + +Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i... + +-------------------------------- + +Recompute the physical address for each per-CPU page in the CPU entry +area, a recent commit inadvertantly modified cea_map_percpu_pages() such +that every PTE is mapped to the physical address of the first page. + +Fixes: 9fd429c28073 ("x86/kasan: Map shadow for percpu pages on demand") +Signed-off-by: Sean Christopherson seanjc@google.com +Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org +Reviewed-by: Andrey Ryabinin ryabinin.a.a@gmail.com +Link: https://lkml.kernel.org/r/20221110203504.1985010-2-seanjc@google.com +Signed-off-by: Tong Tiangen tongtiangen@huawei.com +Reviewed-by: Nanyong Sun sunnanyong@huawei.com +Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com +Signed-off-by: Jialin Zhang zhangjialin11@huawei.com +--- + arch/x86/mm/cpu_entry_area.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c +index dff9001e5e12..d831aae94b41 100644 +--- a/arch/x86/mm/cpu_entry_area.c ++++ b/arch/x86/mm/cpu_entry_area.c +@@ -97,7 +97,7 @@ cea_map_percpu_pages(void *cea_vaddr, void *ptr, int pages, pgprot_t prot) + early_pfn_to_nid(PFN_DOWN(pa))); + + for ( ; pages; pages--, cea_vaddr+= PAGE_SIZE, ptr += PAGE_SIZE) +- cea_set_pte(cea_vaddr, pa, prot); ++ cea_set_pte(cea_vaddr, per_cpu_ptr_to_phys(ptr), prot); + } + + static void __init percpu_setup_debug_store(unsigned int cpu) +-- +2.25.1 + diff --git a/0019-x86-mm-Populate-KASAN-shadow-for-entire-per-CPU-rang.patch b/0019-x86-mm-Populate-KASAN-shadow-for-entire-per-CPU-rang.patch new file mode 100644 index 0000000..8fa96dd --- /dev/null +++ b/0019-x86-mm-Populate-KASAN-shadow-for-entire-per-CPU-rang.patch @@ -0,0 +1,121 @@ +From 91bb861cfc95653af4223af2e00b9e637c501d5a Mon Sep 17 00:00:00 2001 +From: Sean Christopherson seanjc@google.com +Date: Fri, 17 Mar 2023 03:07:44 +0000 +Subject: [PATCH 19/24] x86/mm: Populate KASAN shadow for entire per-CPU range + of CPU entry area + +mainline inclusion +from mainline-v6.2-rc1 +commit 97650148a15e0b30099d6175ffe278b9f55ec66a +category: bugfix +bugzilla: 188336 +CVE: CVE-2023-0597 + +Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i... + +-------------------------------- + +Populate a KASAN shadow for the entire possible per-CPU range of the CPU +entry area instead of requiring that each individual chunk map a shadow. +Mapping shadows individually is error prone, e.g. the per-CPU GDT mapping +was left behind, which can lead to not-present page faults during KASAN +validation if the kernel performs a software lookup into the GDT. The DS +buffer is also likely affected. + +The motivation for mapping the per-CPU areas on-demand was to avoid +mapping the entire 512GiB range that's reserved for the CPU entry area, +shaving a few bytes by not creating shadows for potentially unused memory +was not a goal. + +The bug is most easily reproduced by doing a sigreturn with a garbage +CS in the sigcontext, e.g. + + int main(void) + { + struct sigcontext regs; + + syscall(__NR_mmap, 0x1ffff000ul, 0x1000ul, 0ul, 0x32ul, -1, 0ul); + syscall(__NR_mmap, 0x20000000ul, 0x1000000ul, 7ul, 0x32ul, -1, 0ul); + syscall(__NR_mmap, 0x21000000ul, 0x1000ul, 0ul, 0x32ul, -1, 0ul); + + memset(®s, 0, sizeof(regs)); + regs.cs = 0x1d0; + syscall(__NR_rt_sigreturn); + return 0; + } + +to coerce the kernel into doing a GDT lookup to compute CS.base when +reading the instruction bytes on the subsequent #GP to determine whether +or not the #GP is something the kernel should handle, e.g. to fixup UMIP +violations or to emulate CLI/STI for IOPL=3 applications. + + BUG: unable to handle page fault for address: fffffbc8379ace00 + #PF: supervisor read access in kernel mode + #PF: error_code(0x0000) - not-present page + PGD 16c03a067 P4D 16c03a067 PUD 15b990067 PMD 15b98f067 PTE 0 + Oops: 0000 [#1] PREEMPT SMP KASAN + CPU: 3 PID: 851 Comm: r2 Not tainted 6.1.0-rc3-next-20221103+ #432 + Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015 + RIP: 0010:kasan_check_range+0xdf/0x190 + Call Trace: + <TASK> + get_desc+0xb0/0x1d0 + insn_get_seg_base+0x104/0x270 + insn_fetch_from_user+0x66/0x80 + fixup_umip_exception+0xb1/0x530 + exc_general_protection+0x181/0x210 + asm_exc_general_protection+0x22/0x30 + RIP: 0003:0x0 + Code: Unable to access opcode bytes at 0xffffffffffffffd6. + RSP: 0003:0000000000000000 EFLAGS: 00000202 + RAX: 0000000000000000 RBX: 0000000000000000 RCX: 00000000000001d0 + RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 + RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000 + R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 + R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 + </TASK> + +Fixes: 9fd429c28073 ("x86/kasan: Map shadow for percpu pages on demand") +Reported-by: syzbot+ffb4f000dc2872c93f62@syzkaller.appspotmail.com +Suggested-by: Andrey Ryabinin ryabinin.a.a@gmail.com +Signed-off-by: Sean Christopherson seanjc@google.com +Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org +Reviewed-by: Andrey Ryabinin ryabinin.a.a@gmail.com +Link: https://lkml.kernel.org/r/20221110203504.1985010-3-seanjc@google.com +Signed-off-by: Tong Tiangen tongtiangen@huawei.com +Reviewed-by: Nanyong Sun sunnanyong@huawei.com +Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com +Signed-off-by: Jialin Zhang zhangjialin11@huawei.com +--- + arch/x86/mm/cpu_entry_area.c | 8 +++----- + 1 file changed, 3 insertions(+), 5 deletions(-) + +diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c +index d831aae94b41..7c855dffcdc2 100644 +--- a/arch/x86/mm/cpu_entry_area.c ++++ b/arch/x86/mm/cpu_entry_area.c +@@ -91,11 +91,6 @@ void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags) + static void __init + cea_map_percpu_pages(void *cea_vaddr, void *ptr, int pages, pgprot_t prot) + { +- phys_addr_t pa = per_cpu_ptr_to_phys(ptr); +- +- kasan_populate_shadow_for_vaddr(cea_vaddr, pages * PAGE_SIZE, +- early_pfn_to_nid(PFN_DOWN(pa))); +- + for ( ; pages; pages--, cea_vaddr+= PAGE_SIZE, ptr += PAGE_SIZE) + cea_set_pte(cea_vaddr, per_cpu_ptr_to_phys(ptr), prot); + } +@@ -195,6 +190,9 @@ static void __init setup_cpu_entry_area(unsigned int cpu) + pgprot_t tss_prot = PAGE_KERNEL; + #endif + ++ kasan_populate_shadow_for_vaddr(cea, CPU_ENTRY_AREA_SIZE, ++ early_cpu_to_node(cpu)); ++ + cea_set_pte(&cea->gdt, get_cpu_gdt_paddr(cpu), gdt_prot); + + cea_map_percpu_pages(&cea->entry_stack_page, +-- +2.25.1 + diff --git a/0020-x86-kasan-Rename-local-CPU_ENTRY_AREA-variables-to-s.patch b/0020-x86-kasan-Rename-local-CPU_ENTRY_AREA-variables-to-s.patch new file mode 100644 index 0000000..e4f9f63 --- /dev/null +++ b/0020-x86-kasan-Rename-local-CPU_ENTRY_AREA-variables-to-s.patch @@ -0,0 +1,88 @@ +From 0560fceb4d3c76133f1a89decbf1c3334afdbd00 Mon Sep 17 00:00:00 2001 +From: Sean Christopherson seanjc@google.com +Date: Fri, 17 Mar 2023 03:07:45 +0000 +Subject: [PATCH 20/24] x86/kasan: Rename local CPU_ENTRY_AREA variables to + shorten names + +mainline inclusion +from mainline-v6.2-rc1 +commit 7077d2ccb94dafd00b29cc2d601c9f6891648f5b +category: bugfix +bugzilla: 188336 +CVE: CVE-2023-0597 + +Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i... + +-------------------------------- + +Rename the CPU entry area variables in kasan_init() to shorten their +names, a future fix will reference the beginning of the per-CPU portion +of the CPU entry area, and shadow_cpu_entry_per_cpu_begin is a bit much. + +No functional change intended. + +Signed-off-by: Sean Christopherson seanjc@google.com +Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org +Reviewed-by: Andrey Ryabinin ryabinin.a.a@gmail.com +Link: https://lkml.kernel.org/r/20221110203504.1985010-4-seanjc@google.com +Signed-off-by: Tong Tiangen tongtiangen@huawei.com +Reviewed-by: Nanyong Sun sunnanyong@huawei.com +Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com +Signed-off-by: Jialin Zhang zhangjialin11@huawei.com +--- + arch/x86/mm/kasan_init_64.c | 22 +++++++++++----------- + 1 file changed, 11 insertions(+), 11 deletions(-) + +diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c +index d1416926ad52..ad7872ae10ed 100644 +--- a/arch/x86/mm/kasan_init_64.c ++++ b/arch/x86/mm/kasan_init_64.c +@@ -331,7 +331,7 @@ void __init kasan_populate_shadow_for_vaddr(void *va, size_t size, int nid) + void __init kasan_init(void) + { + int i; +- void *shadow_cpu_entry_begin, *shadow_cpu_entry_end; ++ void *shadow_cea_begin, *shadow_cea_end; + + memcpy(early_top_pgt, init_top_pgt, sizeof(early_top_pgt)); + +@@ -372,16 +372,16 @@ void __init kasan_init(void) + map_range(&pfn_mapped[i]); + } + +- shadow_cpu_entry_begin = (void *)CPU_ENTRY_AREA_BASE; +- shadow_cpu_entry_begin = kasan_mem_to_shadow(shadow_cpu_entry_begin); +- shadow_cpu_entry_begin = (void *)round_down( +- (unsigned long)shadow_cpu_entry_begin, PAGE_SIZE); ++ shadow_cea_begin = (void *)CPU_ENTRY_AREA_BASE; ++ shadow_cea_begin = kasan_mem_to_shadow(shadow_cea_begin); ++ shadow_cea_begin = (void *)round_down( ++ (unsigned long)shadow_cea_begin, PAGE_SIZE); + +- shadow_cpu_entry_end = (void *)(CPU_ENTRY_AREA_BASE + ++ shadow_cea_end = (void *)(CPU_ENTRY_AREA_BASE + + CPU_ENTRY_AREA_MAP_SIZE); +- shadow_cpu_entry_end = kasan_mem_to_shadow(shadow_cpu_entry_end); +- shadow_cpu_entry_end = (void *)round_up( +- (unsigned long)shadow_cpu_entry_end, PAGE_SIZE); ++ shadow_cea_end = kasan_mem_to_shadow(shadow_cea_end); ++ shadow_cea_end = (void *)round_up( ++ (unsigned long)shadow_cea_end, PAGE_SIZE); + + kasan_populate_early_shadow( + kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM), +@@ -403,9 +403,9 @@ void __init kasan_init(void) + + kasan_populate_early_shadow( + kasan_mem_to_shadow((void *)VMALLOC_END + 1), +- shadow_cpu_entry_begin); ++ shadow_cea_begin); + +- kasan_populate_early_shadow(shadow_cpu_entry_end, ++ kasan_populate_early_shadow(shadow_cea_end, + kasan_mem_to_shadow((void *)__START_KERNEL_map)); + + kasan_populate_shadow((unsigned long)kasan_mem_to_shadow(_stext), +-- +2.25.1 + diff --git a/0021-x86-kasan-Add-helpers-to-align-shadow-addresses-up-a.patch b/0021-x86-kasan-Add-helpers-to-align-shadow-addresses-up-a.patch new file mode 100644 index 0000000..5f3f362 --- /dev/null +++ b/0021-x86-kasan-Add-helpers-to-align-shadow-addresses-up-a.patch @@ -0,0 +1,113 @@ +From ec4ebad1a3ed5a1ff3301de4df9a12ebf81b09c1 Mon Sep 17 00:00:00 2001 +From: Sean Christopherson seanjc@google.com +Date: Fri, 17 Mar 2023 03:07:46 +0000 +Subject: [PATCH 21/24] x86/kasan: Add helpers to align shadow addresses up and + down + +mainline inclusion +from mainline-v6.2-rc1 +commit bde258d97409f2a45243cb393a55ea9ecfc7aba5 +category: bugfix +bugzilla: 188336 +CVE: CVE-2023-0597 + +Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i... + +-------------------------------- + +Add helpers to dedup code for aligning shadow address up/down to page +boundaries when translating an address to its shadow. + +No functional change intended. + +Signed-off-by: Sean Christopherson seanjc@google.com +Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org +Reviewed-by: Andrey Ryabinin ryabinin.a.a@gmail.com +Link: https://lkml.kernel.org/r/20221110203504.1985010-5-seanjc@google.com +Signed-off-by: Tong Tiangen tongtiangen@huawei.com +Reviewed-by: Nanyong Sun sunnanyong@huawei.com +Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com +Signed-off-by: Jialin Zhang zhangjialin11@huawei.com +--- + arch/x86/mm/kasan_init_64.c | 40 ++++++++++++++++++++----------------- + 1 file changed, 22 insertions(+), 18 deletions(-) + +diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c +index ad7872ae10ed..afc5e129ca7b 100644 +--- a/arch/x86/mm/kasan_init_64.c ++++ b/arch/x86/mm/kasan_init_64.c +@@ -316,22 +316,33 @@ void __init kasan_early_init(void) + kasan_map_early_shadow(init_top_pgt); + } + ++static unsigned long kasan_mem_to_shadow_align_down(unsigned long va) ++{ ++ unsigned long shadow = (unsigned long)kasan_mem_to_shadow((void *)va); ++ ++ return round_down(shadow, PAGE_SIZE); ++} ++ ++static unsigned long kasan_mem_to_shadow_align_up(unsigned long va) ++{ ++ unsigned long shadow = (unsigned long)kasan_mem_to_shadow((void *)va); ++ ++ return round_up(shadow, PAGE_SIZE); ++} ++ + void __init kasan_populate_shadow_for_vaddr(void *va, size_t size, int nid) + { + unsigned long shadow_start, shadow_end; + +- shadow_start = (unsigned long)kasan_mem_to_shadow(va); +- shadow_start = round_down(shadow_start, PAGE_SIZE); +- shadow_end = (unsigned long)kasan_mem_to_shadow(va + size); +- shadow_end = round_up(shadow_end, PAGE_SIZE); +- ++ shadow_start = kasan_mem_to_shadow_align_down((unsigned long)va); ++ shadow_end = kasan_mem_to_shadow_align_up((unsigned long)va + size); + kasan_populate_shadow(shadow_start, shadow_end, nid); + } + + void __init kasan_init(void) + { ++ unsigned long shadow_cea_begin, shadow_cea_end; + int i; +- void *shadow_cea_begin, *shadow_cea_end; + + memcpy(early_top_pgt, init_top_pgt, sizeof(early_top_pgt)); + +@@ -372,16 +383,9 @@ void __init kasan_init(void) + map_range(&pfn_mapped[i]); + } + +- shadow_cea_begin = (void *)CPU_ENTRY_AREA_BASE; +- shadow_cea_begin = kasan_mem_to_shadow(shadow_cea_begin); +- shadow_cea_begin = (void *)round_down( +- (unsigned long)shadow_cea_begin, PAGE_SIZE); +- +- shadow_cea_end = (void *)(CPU_ENTRY_AREA_BASE + +- CPU_ENTRY_AREA_MAP_SIZE); +- shadow_cea_end = kasan_mem_to_shadow(shadow_cea_end); +- shadow_cea_end = (void *)round_up( +- (unsigned long)shadow_cea_end, PAGE_SIZE); ++ shadow_cea_begin = kasan_mem_to_shadow_align_down(CPU_ENTRY_AREA_BASE); ++ shadow_cea_end = kasan_mem_to_shadow_align_up(CPU_ENTRY_AREA_BASE + ++ CPU_ENTRY_AREA_MAP_SIZE); + + kasan_populate_early_shadow( + kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM), +@@ -403,9 +407,9 @@ void __init kasan_init(void) + + kasan_populate_early_shadow( + kasan_mem_to_shadow((void *)VMALLOC_END + 1), +- shadow_cea_begin); ++ (void *)shadow_cea_begin); + +- kasan_populate_early_shadow(shadow_cea_end, ++ kasan_populate_early_shadow((void *)shadow_cea_end, + kasan_mem_to_shadow((void *)__START_KERNEL_map)); + + kasan_populate_shadow((unsigned long)kasan_mem_to_shadow(_stext), +-- +2.25.1 + diff --git a/0022-x86-kasan-Populate-shadow-for-shared-chunk-of-the-CP.patch b/0022-x86-kasan-Populate-shadow-for-shared-chunk-of-the-CP.patch new file mode 100644 index 0000000..d6f9b77 --- /dev/null +++ b/0022-x86-kasan-Populate-shadow-for-shared-chunk-of-the-CP.patch @@ -0,0 +1,99 @@ +From 885cbab14224aca9bcf6df23a432a84e55b55dd5 Mon Sep 17 00:00:00 2001 +From: Sean Christopherson seanjc@google.com +Date: Fri, 17 Mar 2023 03:07:47 +0000 +Subject: [PATCH 22/24] x86/kasan: Populate shadow for shared chunk of the CPU + entry area + +mainline inclusion +from mainline-v6.2-rc1 +commit 1cfaac2400c73378e78182a706be0f3ac8b93cd7 +category: bugfix +bugzilla: 188336 +CVE: CVE-2023-0597 + +Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i... + +-------------------------------- + +Popuplate the shadow for the shared portion of the CPU entry area, i.e. +the read-only IDT mapping, during KASAN initialization. A recent change +modified KASAN to map the per-CPU areas on-demand, but forgot to keep a +shadow for the common area that is shared amongst all CPUs. + +Map the common area in KASAN init instead of letting idt_map_in_cea() do +the dirty work so that it Just Works in the unlikely event more shared +data is shoved into the CPU entry area. + +The bug manifests as a not-present #PF when software attempts to lookup +an IDT entry, e.g. when KVM is handling IRQs on Intel CPUs (KVM performs +direct CALL to the IRQ handler to avoid the overhead of INTn): + + BUG: unable to handle page fault for address: fffffbc0000001d8 + #PF: supervisor read access in kernel mode + #PF: error_code(0x0000) - not-present page + PGD 16c03a067 P4D 16c03a067 PUD 0 + Oops: 0000 [#1] PREEMPT SMP KASAN + CPU: 5 PID: 901 Comm: repro Tainted: G W 6.1.0-rc3+ #410 + Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015 + RIP: 0010:kasan_check_range+0xdf/0x190 + vmx_handle_exit_irqoff+0x152/0x290 [kvm_intel] + vcpu_run+0x1d89/0x2bd0 [kvm] + kvm_arch_vcpu_ioctl_run+0x3ce/0xa70 [kvm] + kvm_vcpu_ioctl+0x349/0x900 [kvm] + __x64_sys_ioctl+0xb8/0xf0 + do_syscall_64+0x2b/0x50 + entry_SYSCALL_64_after_hwframe+0x46/0xb0 + +Fixes: 9fd429c28073 ("x86/kasan: Map shadow for percpu pages on demand") +Reported-by: syzbot+8cdd16fd5a6c0565e227@syzkaller.appspotmail.com +Signed-off-by: Sean Christopherson seanjc@google.com +Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org +Link: https://lkml.kernel.org/r/20221110203504.1985010-6-seanjc@google.com +Signed-off-by: Tong Tiangen tongtiangen@huawei.com +Reviewed-by: Nanyong Sun sunnanyong@huawei.com +Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com +Signed-off-by: Jialin Zhang zhangjialin11@huawei.com +--- + arch/x86/mm/kasan_init_64.c | 12 +++++++++++- + 1 file changed, 11 insertions(+), 1 deletion(-) + +diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c +index afc5e129ca7b..0302491d799d 100644 +--- a/arch/x86/mm/kasan_init_64.c ++++ b/arch/x86/mm/kasan_init_64.c +@@ -341,7 +341,7 @@ void __init kasan_populate_shadow_for_vaddr(void *va, size_t size, int nid) + + void __init kasan_init(void) + { +- unsigned long shadow_cea_begin, shadow_cea_end; ++ unsigned long shadow_cea_begin, shadow_cea_per_cpu_begin, shadow_cea_end; + int i; + + memcpy(early_top_pgt, init_top_pgt, sizeof(early_top_pgt)); +@@ -384,6 +384,7 @@ void __init kasan_init(void) + } + + shadow_cea_begin = kasan_mem_to_shadow_align_down(CPU_ENTRY_AREA_BASE); ++ shadow_cea_per_cpu_begin = kasan_mem_to_shadow_align_up(CPU_ENTRY_AREA_PER_CPU); + shadow_cea_end = kasan_mem_to_shadow_align_up(CPU_ENTRY_AREA_BASE + + CPU_ENTRY_AREA_MAP_SIZE); + +@@ -409,6 +410,15 @@ void __init kasan_init(void) + kasan_mem_to_shadow((void *)VMALLOC_END + 1), + (void *)shadow_cea_begin); + ++ /* ++ * Populate the shadow for the shared portion of the CPU entry area. ++ * Shadows for the per-CPU areas are mapped on-demand, as each CPU's ++ * area is randomly placed somewhere in the 512GiB range and mapping ++ * the entire 512GiB range is prohibitively expensive. ++ */ ++ kasan_populate_shadow(shadow_cea_begin, ++ shadow_cea_per_cpu_begin, 0); ++ + kasan_populate_early_shadow((void *)shadow_cea_end, + kasan_mem_to_shadow((void *)__START_KERNEL_map)); + +-- +2.25.1 + diff --git a/0023-net-sched-act_mirred-better-wording-on-protection-ag.patch b/0023-net-sched-act_mirred-better-wording-on-protection-ag.patch new file mode 100644 index 0000000..8065822 --- /dev/null +++ b/0023-net-sched-act_mirred-better-wording-on-protection-ag.patch @@ -0,0 +1,97 @@ +From 1420d4aeb4cecca648b494e6d875c222da1d9309 Mon Sep 17 00:00:00 2001 +From: Davide Caratti dcaratti@redhat.com +Date: Sat, 18 Mar 2023 16:46:22 +0800 +Subject: [PATCH 23/24] net/sched: act_mirred: better wording on protection + against excessive stack growth + +mainline inclusion +from mainline-v6.3-rc1 +commit 78dcdffe0418ac8f3f057f26fe71ccf4d8ed851f +category: bugfix +bugzilla: https://gitee.com/src-openeuler/kernel/issues/I64END +CVE: CVE-2022-4269 + +Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?h... + +-------------------------------- + +with commit e2ca070f89ec ("net: sched: protect against stack overflow in +TC act_mirred"), act_mirred protected itself against excessive stack growth +using per_cpu counter of nested calls to tcf_mirred_act(), and capping it +to MIRRED_RECURSION_LIMIT. However, such protection does not detect +recursion/loops in case the packet is enqueued to the backlog (for example, +when the mirred target device has RPS or skb timestamping enabled). Change +the wording from "recursion" to "nesting" to make it more clear to readers. + +CC: Jamal Hadi Salim jhs@mojatatu.com +Signed-off-by: Davide Caratti dcaratti@redhat.com +Reviewed-by: Marcelo Ricardo Leitner marcelo.leitner@gmail.com +Acked-by: Jamal Hadi Salim jhs@mojatatu.com +Signed-off-by: Paolo Abeni pabeni@redhat.com +Signed-off-by: Ziyang Xuan william.xuanziyang@huawei.com +Reviewed-by: Yue Haibing yuehaibing@huawei.com +Signed-off-by: Jialin Zhang zhangjialin11@huawei.com +--- + net/sched/act_mirred.c | 16 ++++++++-------- + 1 file changed, 8 insertions(+), 8 deletions(-) + +diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c +index b8ad6ae282c0..ded6ee054be1 100644 +--- a/net/sched/act_mirred.c ++++ b/net/sched/act_mirred.c +@@ -28,8 +28,8 @@ + static LIST_HEAD(mirred_list); + static DEFINE_SPINLOCK(mirred_list_lock); + +-#define MIRRED_RECURSION_LIMIT 4 +-static DEFINE_PER_CPU(unsigned int, mirred_rec_level); ++#define MIRRED_NEST_LIMIT 4 ++static DEFINE_PER_CPU(unsigned int, mirred_nest_level); + + static bool tcf_mirred_is_act_redirect(int action) + { +@@ -224,7 +224,7 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a, + struct sk_buff *skb2 = skb; + bool m_mac_header_xmit; + struct net_device *dev; +- unsigned int rec_level; ++ unsigned int nest_level; + int retval, err = 0; + bool use_reinsert; + bool want_ingress; +@@ -235,11 +235,11 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a, + int mac_len; + bool at_nh; + +- rec_level = __this_cpu_inc_return(mirred_rec_level); +- if (unlikely(rec_level > MIRRED_RECURSION_LIMIT)) { ++ nest_level = __this_cpu_inc_return(mirred_nest_level); ++ if (unlikely(nest_level > MIRRED_NEST_LIMIT)) { + net_warn_ratelimited("Packet exceeded mirred recursion limit on dev %s\n", + netdev_name(skb->dev)); +- __this_cpu_dec(mirred_rec_level); ++ __this_cpu_dec(mirred_nest_level); + return TC_ACT_SHOT; + } + +@@ -308,7 +308,7 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a, + err = tcf_mirred_forward(want_ingress, skb); + if (err) + tcf_action_inc_overlimit_qstats(&m->common); +- __this_cpu_dec(mirred_rec_level); ++ __this_cpu_dec(mirred_nest_level); + return TC_ACT_CONSUMED; + } + } +@@ -320,7 +320,7 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a, + if (tcf_mirred_is_act_redirect(m_eaction)) + retval = TC_ACT_SHOT; + } +- __this_cpu_dec(mirred_rec_level); ++ __this_cpu_dec(mirred_nest_level); + + return retval; + } +-- +2.25.1 + diff --git a/0024-act_mirred-use-the-backlog-for-nested-calls-to-mirre.patch b/0024-act_mirred-use-the-backlog-for-nested-calls-to-mirre.patch new file mode 100644 index 0000000..edfc0ba --- /dev/null +++ b/0024-act_mirred-use-the-backlog-for-nested-calls-to-mirre.patch @@ -0,0 +1,149 @@ +From a6bb3989ccb7d3493c20e709179904733c6db856 Mon Sep 17 00:00:00 2001 +From: Davide Caratti dcaratti@redhat.com +Date: Sat, 18 Mar 2023 16:46:40 +0800 +Subject: [PATCH 24/24] act_mirred: use the backlog for nested calls to mirred + ingress + +mainline inclusion +from mainline-v6.3-rc1 +commit ca22da2fbd693b54dc8e3b7b54ccc9f7e9ba3640 +category: bugfix +bugzilla: https://gitee.com/src-openeuler/kernel/issues/I64END +CVE: CVE-2022-4269 + +Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?h... + +-------------------------------- + +William reports kernel soft-lockups on some OVS topologies when TC mirred +egress->ingress action is hit by local TCP traffic [1]. +The same can also be reproduced with SCTP (thanks Xin for verifying), when +client and server reach themselves through mirred egress to ingress, and +one of the two peers sends a "heartbeat" packet (from within a timer). + +Enqueueing to backlog proved to fix this soft lockup; however, as Cong +noticed [2], we should preserve - when possible - the current mirred +behavior that counts as "overlimits" any eventual packet drop subsequent to +the mirred forwarding action [3]. A compromise solution might use the +backlog only when tcf_mirred_act() has a nest level greater than one: +change tcf_mirred_forward() accordingly. + +Also, add a kselftest that can reproduce the lockup and verifies TC mirred +ability to account for further packet drops after TC mirred egress->ingress +(when the nest level is 1). + + [1] https://lore.kernel.org/netdev/33dc43f587ec1388ba456b4915c75f02a8aae226.1663... + [2] https://lore.kernel.org/netdev/Y0w%2FWWY60gqrtGLp@pop-os.localdomain/ + [3] such behavior is not guaranteed: for example, if RPS or skb RX + timestamping is enabled on the mirred target device, the kernel + can defer receiving the skb and return NET_RX_SUCCESS inside + tcf_mirred_forward(). + +Reported-by: William Zhao wizhao@redhat.com +CC: Xin Long lucien.xin@gmail.com +Signed-off-by: Davide Caratti dcaratti@redhat.com +Reviewed-by: Marcelo Ricardo Leitner marcelo.leitner@gmail.com +Acked-by: Jamal Hadi Salim jhs@mojatatu.com +Signed-off-by: Paolo Abeni pabeni@redhat.com +Signed-off-by: Ziyang Xuan william.xuanziyang@huawei.com +Reviewed-by: Yue Haibing yuehaibing@huawei.com +Signed-off-by: Jialin Zhang zhangjialin11@huawei.com +--- + net/sched/act_mirred.c | 7 +++ + .../selftests/net/forwarding/tc_actions.sh | 49 ++++++++++++++++++- + 2 files changed, 55 insertions(+), 1 deletion(-) + +diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c +index ded6ee054be1..baeae5e5c8f0 100644 +--- a/net/sched/act_mirred.c ++++ b/net/sched/act_mirred.c +@@ -205,12 +205,19 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, + return err; + } + ++static bool is_mirred_nested(void) ++{ ++ return unlikely(__this_cpu_read(mirred_nest_level) > 1); ++} ++ + static int tcf_mirred_forward(bool want_ingress, struct sk_buff *skb) + { + int err; + + if (!want_ingress) + err = tcf_dev_queue_xmit(skb, dev_queue_xmit); ++ else if (is_mirred_nested()) ++ err = netif_rx(skb); + else + err = netif_receive_skb(skb); + +diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh +index 1e0a62f638fe..919c0dd9fe4b 100755 +--- a/tools/testing/selftests/net/forwarding/tc_actions.sh ++++ b/tools/testing/selftests/net/forwarding/tc_actions.sh +@@ -3,7 +3,8 @@ + + ALL_TESTS="gact_drop_and_ok_test mirred_egress_redirect_test \ + mirred_egress_mirror_test matchall_mirred_egress_mirror_test \ +- gact_trap_test mirred_egress_to_ingress_test" ++ gact_trap_test mirred_egress_to_ingress_test \ ++ mirred_egress_to_ingress_tcp_test" + NUM_NETIFS=4 + source tc_common.sh + source lib.sh +@@ -198,6 +199,52 @@ mirred_egress_to_ingress_test() + log_test "mirred_egress_to_ingress ($tcflags)" + } + ++mirred_egress_to_ingress_tcp_test() ++{ ++ local tmpfile=$(mktemp) tmpfile1=$(mktemp) ++ ++ RET=0 ++ dd conv=sparse status=none if=/dev/zero bs=1M count=2 of=$tmpfile ++ tc filter add dev $h1 protocol ip pref 100 handle 100 egress flower \ ++ $tcflags ip_proto tcp src_ip 192.0.2.1 dst_ip 192.0.2.2 \ ++ action ct commit nat src addr 192.0.2.2 pipe \ ++ action ct clear pipe \ ++ action ct commit nat dst addr 192.0.2.1 pipe \ ++ action ct clear pipe \ ++ action skbedit ptype host pipe \ ++ action mirred ingress redirect dev $h1 ++ tc filter add dev $h1 protocol ip pref 101 handle 101 egress flower \ ++ $tcflags ip_proto icmp \ ++ action mirred ingress redirect dev $h1 ++ tc filter add dev $h1 protocol ip pref 102 handle 102 ingress flower \ ++ ip_proto icmp \ ++ action drop ++ ++ ip vrf exec v$h1 nc --recv-only -w10 -l -p 12345 -o $tmpfile1 & ++ local rpid=$! ++ ip vrf exec v$h1 nc -w1 --send-only 192.0.2.2 12345 <$tmpfile ++ wait -n $rpid ++ cmp -s $tmpfile $tmpfile1 ++ check_err $? "server output check failed" ++ ++ $MZ $h1 -c 10 -p 64 -a $h1mac -b $h1mac -A 192.0.2.1 -B 192.0.2.1 \ ++ -t icmp "ping,id=42,seq=5" -q ++ tc_check_packets "dev $h1 egress" 101 10 ++ check_err $? "didn't mirred redirect ICMP" ++ tc_check_packets "dev $h1 ingress" 102 10 ++ check_err $? "didn't drop mirred ICMP" ++ local overlimits=$(tc_rule_stats_get ${h1} 101 egress .overlimits) ++ test ${overlimits} = 10 ++ check_err $? "wrong overlimits, expected 10 got ${overlimits}" ++ ++ tc filter del dev $h1 egress protocol ip pref 100 handle 100 flower ++ tc filter del dev $h1 egress protocol ip pref 101 handle 101 flower ++ tc filter del dev $h1 ingress protocol ip pref 102 handle 102 flower ++ ++ rm -f $tmpfile $tmpfile1 ++ log_test "mirred_egress_to_ingress_tcp ($tcflags)" ++} ++ + setup_prepare() + { + h1=${NETIFS[p1]} +-- +2.25.1 + diff --git a/kernel.spec b/kernel.spec index 0eb5b16..f4bc7a4 100644 --- a/kernel.spec +++ b/kernel.spec @@ -10,9 +10,9 @@
%global upstream_version 6.1 %global upstream_sublevel 19 -%global devel_release 6 +%global devel_release 7 %global maintenance_release .0.0 -%global pkg_release .16 +%global pkg_release .17
%define with_debuginfo 0 # Do not recompute the build-id of vmlinux in find-debuginfo.sh @@ -84,6 +84,16 @@ Patch0011: 0011-bpf-Two-helper-functions-are-introduced-to-parse-use.patch Patch0012: 0012-net-bpf-Add-a-writeable_tracepoint-to-inet_stream_co.patch Patch0013: 0013-nfs-client-multipath.patch Patch0014: 0014-nfs-client-multipath-config.patch +Patch0015: 0015-mm-demotion-fix-NULL-vs-IS_ERR-checking-in-memory_ti.patch +Patch0016: 0016-x86-mm-Randomize-per-cpu-entry-area.patch +Patch0017: 0017-x86-kasan-Map-shadow-for-percpu-pages-on-demand.patch +Patch0018: 0018-x86-mm-Recompute-physical-address-for-every-page-of-.patch +Patch0019: 0019-x86-mm-Populate-KASAN-shadow-for-entire-per-CPU-rang.patch +Patch0020: 0020-x86-kasan-Rename-local-CPU_ENTRY_AREA-variables-to-s.patch +Patch0021: 0021-x86-kasan-Add-helpers-to-align-shadow-addresses-up-a.patch +Patch0022: 0022-x86-kasan-Populate-shadow-for-shared-chunk-of-the-CP.patch +Patch0023: 0023-net-sched-act_mirred-better-wording-on-protection-ag.patch +Patch0024: 0024-act_mirred-use-the-backlog-for-nested-calls-to-mirre.patch
#BuildRequires: @@ -323,6 +333,16 @@ Applypatches series.conf %{_builddir}/kernel-%{version}/linux-%{KernelVer} %patch0012 -p1 %patch0013 -p1 %patch0014 -p1 +%patch0015 -p1 +%patch0016 -p1 +%patch0017 -p1 +%patch0018 -p1 +%patch0019 -p1 +%patch0020 -p1 +%patch0021 -p1 +%patch0022 -p1 +%patch0023 -p1 +%patch0024 -p1 touch .scmversion
find . ( -name "*.orig" -o -name "*~" ) -exec rm -f {} ; >/dev/null @@ -905,6 +925,9 @@ fi %endif
%changelog +* Fri Mar 18 2023 Jialin Zhang zhangjialin11@huawei.com - 6.1.19-7.0.0.17 +- Fix CVE-2023-23005, CVE-2023-0597 and CVE-2022-4269 + * Fri Mar 17 2023 Zheng Zengkai zhengzengkai@huawei.com - 6.1.19-6.0.0.16 - Fix kernel rpm build failure that libperf-jvmti.so is missing