From: Peter Zijlstra peterz@infradead.org
mainline inclusion from mainline-v6.2-rc1 commit 97e3d26b5e5f371b3ee223d94dd123e6c442ba80 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/I6C6UC CVE: CVE-2023-0597
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
Seth found that the CPU-entry-area; the piece of per-cpu data that is mapped into the userspace page-tables for kPTI is not subject to any randomization -- irrespective of kASLR settings.
On x86_64 a whole P4D (512 GB) of virtual address space is reserved for this structure, which is plenty large enough to randomize things a little.
As such, use a straight forward randomization scheme that avoids duplicates to spread the existing CPUs over the available space.
[ bp: Fix le build. ]
Reported-by: Seth Jenkins sethjenkins@google.com Reviewed-by: Kees Cook keescook@chromium.org Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Signed-off-by: Dave Hansen dave.hansen@linux.intel.com Signed-off-by: Borislav Petkov bp@suse.de Confilict: arch/x86/mm/cpu_entry_area.c
Use get_random_u32() instead of prandom_u32_max() in init_cea_offsets().
With CONFIG_RANDOMIZE_BASE=y, KASLR use prandom_seed_state() init prandom seed before init_cea_offsets(). But when CONFIG_RANDOMIZE_BASE=n, prandom seed init after init_cea_offsets() cause cea is always 0.
The patch d4150779e60f("random32: use real rng for non-deterministic randomness") use get_random_u32() instead of prandom_u32() in prandom_u32_max() that make prandom_u32_max() don't need to wait prandom seed init(). But the patch has many pre-patches that have not been merged,
So,we adopt the current solution as a workaround. directly use get_random_u32() in init_cea_offsets() to simplify code. Signed-off-by: Ke Liu liuke94@huawei.com Reviewed-by: Wang Weiyang wangweiyang2@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: Jialin Zhang zhangjialin11@huawei.com --- arch/x86/include/asm/cpu_entry_area.h | 4 --- arch/x86/include/asm/pgtable_areas.h | 8 ++++- arch/x86/kernel/hw_breakpoint.c | 2 +- arch/x86/mm/cpu_entry_area.c | 51 ++++++++++++++++++++++++--- 4 files changed, 55 insertions(+), 10 deletions(-)
diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h index dd5ea1bdf04c..e2c04a5015b0 100644 --- a/arch/x86/include/asm/cpu_entry_area.h +++ b/arch/x86/include/asm/cpu_entry_area.h @@ -130,10 +130,6 @@ struct cpu_entry_area { };
#define CPU_ENTRY_AREA_SIZE (sizeof(struct cpu_entry_area)) -#define CPU_ENTRY_AREA_ARRAY_SIZE (CPU_ENTRY_AREA_SIZE * NR_CPUS) - -/* Total size includes the readonly IDT mapping page as well: */ -#define CPU_ENTRY_AREA_TOTAL_SIZE (CPU_ENTRY_AREA_ARRAY_SIZE + PAGE_SIZE)
DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area); DECLARE_PER_CPU(struct cea_exception_stacks *, cea_exception_stacks); diff --git a/arch/x86/include/asm/pgtable_areas.h b/arch/x86/include/asm/pgtable_areas.h index d34cce1b995c..4f056fb88174 100644 --- a/arch/x86/include/asm/pgtable_areas.h +++ b/arch/x86/include/asm/pgtable_areas.h @@ -11,6 +11,12 @@
#define CPU_ENTRY_AREA_RO_IDT_VADDR ((void *)CPU_ENTRY_AREA_RO_IDT)
-#define CPU_ENTRY_AREA_MAP_SIZE (CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_ARRAY_SIZE - CPU_ENTRY_AREA_BASE) +#ifdef CONFIG_X86_32 +#define CPU_ENTRY_AREA_MAP_SIZE (CPU_ENTRY_AREA_PER_CPU + \ + (CPU_ENTRY_AREA_SIZE * NR_CPUS) - \ + CPU_ENTRY_AREA_BASE) +#else +#define CPU_ENTRY_AREA_MAP_SIZE P4D_SIZE +#endif
#endif /* _ASM_X86_PGTABLE_AREAS_H */ diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index 668a4a6533d9..bbb0f737aab1 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -266,7 +266,7 @@ static inline bool within_cpu_entry(unsigned long addr, unsigned long end)
/* CPU entry erea is always used for CPU entry */ if (within_area(addr, end, CPU_ENTRY_AREA_BASE, - CPU_ENTRY_AREA_TOTAL_SIZE)) + CPU_ENTRY_AREA_MAP_SIZE)) return true;
/* diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c index 6c2f1b76a0b6..98396a67958c 100644 --- a/arch/x86/mm/cpu_entry_area.c +++ b/arch/x86/mm/cpu_entry_area.c @@ -5,6 +5,7 @@ #include <linux/kallsyms.h> #include <linux/kcore.h> #include <linux/pgtable.h> +#include <linux/random.h>
#include <asm/cpu_entry_area.h> #include <asm/fixmap.h> @@ -15,16 +16,57 @@ static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage) #ifdef CONFIG_X86_64 static DEFINE_PER_CPU_PAGE_ALIGNED(struct exception_stacks, exception_stacks); DEFINE_PER_CPU(struct cea_exception_stacks*, cea_exception_stacks); -#endif
-#ifdef CONFIG_X86_32 +static DEFINE_PER_CPU_READ_MOSTLY(unsigned long, _cea_offset); + +static __always_inline unsigned int cea_offset(unsigned int cpu) +{ + return per_cpu(_cea_offset, cpu); +} + +static __init void init_cea_offsets(void) +{ + unsigned int max_cea; + unsigned int i, j; + + max_cea = (CPU_ENTRY_AREA_MAP_SIZE - PAGE_SIZE) / CPU_ENTRY_AREA_SIZE; + + /* O(sodding terrible) */ + for_each_possible_cpu(i) { + unsigned int cea; + +again: + /* + * Directly use get_random_u32() instead of prandom_u32_max + * to avoid seed can't be generated when CONFIG_RANDOMIZE_BASE=n. + */ + cea = (u32)(((u64) get_random_u32() * max_cea) >> 32); + + for_each_possible_cpu(j) { + if (cea_offset(j) == cea) + goto again; + + if (i == j) + break; + } + + per_cpu(_cea_offset, i) = cea; + } +} +#else /* !X86_64 */ DECLARE_PER_CPU_PAGE_ALIGNED(struct doublefault_stack, doublefault_stack); + +static __always_inline unsigned int cea_offset(unsigned int cpu) +{ + return cpu; +} +static inline void init_cea_offsets(void) { } #endif
/* Is called from entry code, so must be noinstr */ noinstr struct cpu_entry_area *get_cpu_entry_area(int cpu) { - unsigned long va = CPU_ENTRY_AREA_PER_CPU + cpu * CPU_ENTRY_AREA_SIZE; + unsigned long va = CPU_ENTRY_AREA_PER_CPU + cea_offset(cpu) * CPU_ENTRY_AREA_SIZE; BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0);
return (struct cpu_entry_area *) va; @@ -205,7 +247,6 @@ static __init void setup_cpu_entry_area_ptes(void)
/* The +1 is for the readonly IDT: */ BUILD_BUG_ON((CPU_ENTRY_AREA_PAGES+1)*PAGE_SIZE != CPU_ENTRY_AREA_MAP_SIZE); - BUILD_BUG_ON(CPU_ENTRY_AREA_TOTAL_SIZE != CPU_ENTRY_AREA_MAP_SIZE); BUG_ON(CPU_ENTRY_AREA_BASE & ~PMD_MASK);
start = CPU_ENTRY_AREA_BASE; @@ -221,6 +262,8 @@ void __init setup_cpu_entry_areas(void) { unsigned int cpu;
+ init_cea_offsets(); + setup_cpu_entry_area_ptes();
for_each_possible_cpu(cpu)