From: Takao Indoh indou.takao@fujitsu.com
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I4BLL0 CVE: NA
---------------------------
This patch adds new boot parameter 'disable_tlbflush_is' to disable TLB flush within the same inner shareable domain for performance tuning.
In the case of flush_tlb_mm() *without* this parameter, TLB entry is invalidated by __tlbi(aside1is, asid). By this instruction, all CPUs within the same inner shareable domain check if there are TLB entries which have this ASID, this causes performance noise, especially at large-scale HPC environment, which has more than thousand nodes with low latency interconnect.
When this new parameter is specified, TLB entry is invalidated by __tlbi(aside1, asid) only on the CPUs specified by mm_cpumask(mm). Therefore TLB flush is done on minimal CPUs and performance problem does not occur.
Signed-off-by: QI Fuli qi.fuli@fujitsu.com Signed-off-by: Takao Indoh indou.takao@fujitsu.com Signed-off-by: Cheng Jian cj.chengjian@huawei.com Reviewed-by: Xie XiuQi xiexiuqi@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- .../admin-guide/kernel-parameters.txt | 4 + arch/arm64/include/asm/mmu_context.h | 7 +- arch/arm64/include/asm/tlbflush.h | 61 ++----- arch/arm64/kernel/Makefile | 2 +- arch/arm64/kernel/tlbflush.c | 155 ++++++++++++++++++ 5 files changed, 178 insertions(+), 51 deletions(-) create mode 100644 arch/arm64/kernel/tlbflush.c
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 54127ef751553..297586ef297ee 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -844,6 +844,10 @@ disable= [IPV6] See Documentation/networking/ipv6.txt.
+ disable_tlbflush_is + [ARM64] Disable using TLB instruction to flush + all PE within the same inner shareable domain. + hardened_usercopy= [KNL] Under CONFIG_HARDENED_USERCOPY, whether hardening is enabled for this boot. Hardened diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 93d6a3c093dbc..005345d77349c 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -229,8 +229,13 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) { - if (prev != next) + unsigned int cpu = smp_processor_id(); + + if (prev != next) { __switch_mm(next); + cpumask_clear_cpu(cpu, mm_cpumask(prev)); + local_flush_tlb_mm(prev); + }
/* * Update the saved TTBR0_EL1 of the scheduled-in task as the previous diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index 178719950e74e..c9307b602fadd 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -139,6 +139,13 @@ * on top of these routines, since that is our interface to the mmu_gather * API as used by munmap() and friends. */ + +void flush_tlb_mm(struct mm_struct *mm); +void flush_tlb_page_nosync(struct vm_area_struct *vma, + unsigned long uaddr); +void __flush_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end, unsigned long stride, bool last_level); + static inline void local_flush_tlb_all(void) { dsb(nshst); @@ -155,24 +162,14 @@ static inline void flush_tlb_all(void) isb(); }
-static inline void flush_tlb_mm(struct mm_struct *mm) +static inline void local_flush_tlb_mm(struct mm_struct *mm) { unsigned long asid = __TLBI_VADDR(0, ASID(mm));
- dsb(ishst); - __tlbi(aside1is, asid); - __tlbi_user(aside1is, asid); - dsb(ish); -} - -static inline void flush_tlb_page_nosync(struct vm_area_struct *vma, - unsigned long uaddr) -{ - unsigned long addr = __TLBI_VADDR(uaddr, ASID(vma->vm_mm)); - - dsb(ishst); - __tlbi(vale1is, addr); - __tlbi_user(vale1is, addr); + dsb(nshst); + __tlbi(aside1, asid); + __tlbi_user(aside1, asid); + dsb(nsh); }
static inline void flush_tlb_page(struct vm_area_struct *vma, @@ -188,40 +185,6 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, */ #define MAX_TLBI_OPS PTRS_PER_PTE
-static inline void __flush_tlb_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end, - unsigned long stride, bool last_level) -{ - unsigned long asid = ASID(vma->vm_mm); - unsigned long addr; - - start = round_down(start, stride); - end = round_up(end, stride); - - if ((end - start) >= (MAX_TLBI_OPS * stride)) { - flush_tlb_mm(vma->vm_mm); - return; - } - - /* Convert the stride into units of 4k */ - stride >>= 12; - - start = __TLBI_VADDR(start, asid); - end = __TLBI_VADDR(end, asid); - - dsb(ishst); - for (addr = start; addr < end; addr += stride) { - if (last_level) { - __tlbi(vale1is, addr); - __tlbi_user(vale1is, addr); - } else { - __tlbi(vae1is, addr); - __tlbi_user(vae1is, addr); - } - } - dsb(ish); -} - static inline void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index fe759f364609d..fcb9d64d651cc 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -19,7 +19,7 @@ arm64-obj-y := debug-monitors.o entry.o irq.o fpsimd.o \ return_address.o cpuinfo.o cpu_errata.o \ cpufeature.o alternative.o cacheinfo.o \ smp.o smp_spin_table.o topology.o smccc-call.o \ - syscall.o + syscall.o tlbflush.o
extra-$(CONFIG_EFI) := efi-entry.o
diff --git a/arch/arm64/kernel/tlbflush.c b/arch/arm64/kernel/tlbflush.c new file mode 100644 index 0000000000000..52c9a237759a6 --- /dev/null +++ b/arch/arm64/kernel/tlbflush.c @@ -0,0 +1,155 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2019 FUJITSU LIMITED + +#include <linux/smp.h> +#include <asm/tlbflush.h> + +struct tlb_args { + struct vm_area_struct *ta_vma; + unsigned long ta_start; + unsigned long ta_end; + unsigned long ta_stride; + bool ta_last_level; +}; + +int disable_tlbflush_is; + +static int __init disable_tlbflush_is_setup(char *str) +{ + disable_tlbflush_is = 1; + + return 0; +} +__setup("disable_tlbflush_is", disable_tlbflush_is_setup); + +static inline void __flush_tlb_mm(struct mm_struct *mm) +{ + unsigned long asid = __TLBI_VADDR(0, ASID(mm)); + + dsb(ishst); + __tlbi(aside1is, asid); + __tlbi_user(aside1is, asid); + dsb(ish); +} + +static inline void ipi_flush_tlb_mm(void *arg) +{ + struct mm_struct *mm = arg; + + local_flush_tlb_mm(mm); +} + +void flush_tlb_mm(struct mm_struct *mm) +{ + if (disable_tlbflush_is) + on_each_cpu_mask(mm_cpumask(mm), ipi_flush_tlb_mm, + (void *)mm, true); + else + __flush_tlb_mm(mm); +} + +static inline void __flush_tlb_page_nosync(unsigned long addr) +{ + dsb(ishst); + __tlbi(vale1is, addr); + __tlbi_user(vale1is, addr); +} + +static inline void __local_flush_tlb_page_nosync(unsigned long addr) +{ + dsb(nshst); + __tlbi(vale1, addr); + __tlbi_user(vale1, addr); + dsb(nsh); +} + +static inline void ipi_flush_tlb_page_nosync(void *arg) +{ + unsigned long addr = *(unsigned long *)arg; + + __local_flush_tlb_page_nosync(addr); +} + +void flush_tlb_page_nosync(struct vm_area_struct *vma, unsigned long uaddr) +{ + unsigned long addr = __TLBI_VADDR(uaddr, ASID(vma->vm_mm)); + + if (disable_tlbflush_is) + on_each_cpu_mask(mm_cpumask(vma->vm_mm), + ipi_flush_tlb_page_nosync, &addr, true); + else + __flush_tlb_page_nosync(addr); +} + +static inline void ___flush_tlb_range(unsigned long start, unsigned long end, + unsigned long stride, bool last_level) +{ + unsigned long addr; + + dsb(ishst); + for (addr = start; addr < end; addr += stride) { + if (last_level) { + __tlbi(vale1is, addr); + __tlbi_user(vale1is, addr); + } else { + __tlbi(vae1is, addr); + __tlbi_user(vae1is, addr); + } + } + dsb(ish); +} + +static inline void __local_flush_tlb_range(unsigned long addr, bool last_level) +{ + dsb(nshst); + if (last_level) { + __tlbi(vale1, addr); + __tlbi_user(vale1, addr); + } else { + __tlbi(vae1, addr); + __tlbi_user(vae1, addr); + } + dsb(nsh); +} + +static inline void ipi_flush_tlb_range(void *arg) +{ + struct tlb_args *ta = (struct tlb_args *)arg; + unsigned long addr; + + for (addr = ta->ta_start; addr < ta->ta_end; addr += ta->ta_stride) + __local_flush_tlb_range(addr, ta->ta_last_level); +} + +void __flush_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end, unsigned long stride, bool last_level) +{ + unsigned long asid = ASID(vma->vm_mm); + + start = round_down(start, stride); + end = round_up(end, stride); + + if ((end - start) >= (MAX_TLBI_OPS * stride)) { + flush_tlb_mm(vma->vm_mm); + return; + } + + /* Convert the stride into units of 4k */ + stride >>= 12; + + start = __TLBI_VADDR(start, asid); + end = __TLBI_VADDR(end, asid); + + if (disable_tlbflush_is) { + struct tlb_args ta = { + .ta_start = start, + .ta_end = end, + .ta_stride = stride, + .ta_last_level = last_level, + }; + + on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_tlb_range, + &ta, true); + } else + ___flush_tlb_range(start, end, stride, last_level); +}