From: Barry Song v-songbaohua@oppo.com
mainline inclusion from mainline-v6.0-rc1 commit d0637c505f8a1d8c4088642f1f3e9e3b22da14f6 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I7TVRF CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
-------------------------------------------
THP_SWAP has been proven to improve the swap throughput significantly on x86_64 according to commit bd4c82c22c367e ("mm, THP, swap: delay splitting THP after swapped out"). As long as arm64 uses 4K page size, it is quite similar with x86_64 by having 2MB PMD THP. THP_SWAP is architecture-independent, thus, enabling it on arm64 will benefit arm64 as well. A corner case is that MTE has an assumption that only base pages can be swapped. We won't enable THP_SWAP for ARM64 hardware with MTE support until MTE is reworked to coexist with THP_SWAP.
A micro-benchmark is written to measure thp swapout throughput as below,
unsigned long long tv_to_ms(struct timeval tv) { return tv.tv_sec * 1000 + tv.tv_usec / 1000; }
main() { struct timeval tv_b, tv_e;; #define SIZE 400*1024*1024 volatile void *p = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (!p) { perror("fail to get memory"); exit(-1); }
madvise(p, SIZE, MADV_HUGEPAGE); memset(p, 0x11, SIZE); /* write to get mem */
gettimeofday(&tv_b, NULL); madvise(p, SIZE, MADV_PAGEOUT); gettimeofday(&tv_e, NULL);
printf("swp out bandwidth: %ld bytes/ms\n", SIZE/(tv_to_ms(tv_e) - tv_to_ms(tv_b))); }
Testing is done on rk3568 64bit Quad Core Cortex-A55 platform - ROCK 3A. thp swp throughput w/o patch: 2734bytes/ms (mean of 10 tests) thp swp throughput w/ patch: 3331bytes/ms (mean of 10 tests)
Cc: "Huang, Ying" ying.huang@intel.com Cc: Minchan Kim minchan@kernel.org Cc: Johannes Weiner hannes@cmpxchg.org Cc: Hugh Dickins hughd@google.com Cc: Andrea Arcangeli aarcange@redhat.com Cc: Steven Price steven.price@arm.com Cc: Yang Shi shy828301@gmail.com Reviewed-by: Anshuman Khandual anshuman.khandual@arm.com Signed-off-by: Barry Song v-songbaohua@oppo.com Link: https://lore.kernel.org/r/20220720093737.133375-1-21cnbao@gmail.com Signed-off-by: Will Deacon will@kernel.org
Conflicts: arch/arm64/Kconfig include/linux/huge_mm.h mm/swap_slots.c
Signed-off-by: Jinjiang Tu tujinjiang@huawei.com --- arch/arm64/Kconfig | 1 + arch/arm64/include/asm/pgtable.h | 6 ++++++ include/linux/huge_mm.h | 12 ++++++++++++ mm/swap_slots.c | 2 +- 4 files changed, 20 insertions(+), 1 deletion(-)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 16620146c49a..05302e1d8e19 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -86,6 +86,7 @@ config ARM64 select ARCH_WANT_HUGETLB_PAGE_OPTIMIZE_VMEMMAP select ARCH_WANT_LD_ORPHAN_WARN select ARCH_WANT_RESERVE_CRASH_KERNEL if KEXEC_CORE + select ARCH_WANTS_THP_SWAP if ARM64_4K_PAGES select ARCH_HAS_UBSAN_SANITIZE_ALL select ARM_AMBA select ARM_ARCH_TIMER diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index ab2443900f4e..f2843785f2ec 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -46,6 +46,12 @@ __flush_tlb_range(vma, addr, end, PUD_SIZE, false, 1) #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+static inline bool arch_thp_swp_supported(void) +{ + return !system_supports_mte(); +} +#define arch_thp_swp_supported arch_thp_swp_supported + /* * Outside of a few very special situations (e.g. hibernation), we always * use broadcast TLB invalidation instructions, therefore a spurious page diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 176457145bcf..b993ae44111c 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -499,4 +499,16 @@ static inline unsigned long thp_size(struct page *page) return PAGE_SIZE << thp_order(page); }
+/* + * archs that select ARCH_WANTS_THP_SWAP but don't support THP_SWP due to + * limitations in the implementation like arm64 MTE can override this to + * false + */ +#ifndef arch_thp_swp_supported +static inline bool arch_thp_swp_supported(void) +{ + return true; +} +#endif + #endif /* _LINUX_HUGE_MM_H */ diff --git a/mm/swap_slots.c b/mm/swap_slots.c index fbb9888a0469..b7958ca276c2 100644 --- a/mm/swap_slots.c +++ b/mm/swap_slots.c @@ -436,7 +436,7 @@ swp_entry_t get_swap_page(struct page *page) goto out;
if (PageTransHuge(page)) { - if (IS_ENABLED(CONFIG_THP_SWAP)) + if (IS_ENABLED(CONFIG_THP_SWAP) && arch_thp_swp_supported()) get_swap_pages(1, &entry, HPAGE_PMD_NR, type); goto out; }