From: ZhangPeng zhangpeng362@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8KESX CVE: NA
--------------------------------
VM_USWAP gets set in vma->vm_flags to tell the VM common code that the userswap is registered. SWP_USERSWAP_ENTRY is the swap entry when userswap memory is swapped out. In addition, is_userswap_entry() is introduced to determine whether the entry is a userswap swap entry. Add the userswap entry case in zap_pte_range() to prevent WARN_ON_ONCE(1).
Signed-off-by: ZhangPeng zhangpeng362@huawei.com --- fs/proc/task_mmu.c | 3 +++ include/linux/mm.h | 7 +++++++ include/linux/swap.h | 14 +++++++++++++- include/linux/swapops.h | 12 ++++++++++++ include/trace/events/mmflags.h | 7 +++++++ mm/Kconfig | 10 ++++++++++ mm/memory.c | 3 +++ 7 files changed, 55 insertions(+), 1 deletion(-)
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 3dd5be96691b..fe12b057d077 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -700,6 +700,9 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma) #ifdef CONFIG_X86_USER_SHADOW_STACK [ilog2(VM_SHADOW_STACK)] = "ss", #endif +#ifdef CONFIG_USERSWAP + [ilog2(VM_USWAP)] = "us", +#endif /* CONFIG_USERSWAP */ }; size_t i;
diff --git a/include/linux/mm.h b/include/linux/mm.h index e2ba77243461..abafc9efc30f 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -313,6 +313,13 @@ extern unsigned int kobjsize(const void *objp); #define VM_NOHUGEPAGE 0x40000000 /* MADV_NOHUGEPAGE marked this vma */ #define VM_MERGEABLE 0x80000000 /* KSM may merge identical pages */
+#ifdef CONFIG_USERSWAP +# define VM_USWAP_BIT 62 +#define VM_USWAP BIT(VM_USWAP_BIT) +#else /* !CONFIG_USERSWAP */ +#define VM_USWAP VM_NONE +#endif /* CONFIG_USERSWAP */ + #ifdef CONFIG_ARCH_USES_HIGH_VMA_FLAGS #define VM_HIGH_ARCH_BIT_0 32 /* bit only usable on 64-bit architectures */ #define VM_HIGH_ARCH_BIT_1 33 /* bit only usable on 64-bit architectures */ diff --git a/include/linux/swap.h b/include/linux/swap.h index f6dd6575b905..fe20c462fecb 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -55,6 +55,18 @@ static inline int current_is_kswapd(void) * actions on faults. */
+/* + * Userswap entry type + */ +#ifdef CONFIG_USERSWAP +#define SWP_USERSWAP_NUM 1 +#define SWP_USERSWAP_ENTRY (MAX_SWAPFILES + SWP_HWPOISON_NUM + \ + SWP_MIGRATION_NUM + SWP_DEVICE_NUM + \ + SWP_PTE_MARKER_NUM) +#else +#define SWP_USERSWAP_NUM 0 +#endif + /* * PTE markers are used to persist information onto PTEs that otherwise * should be a none pte. As its name "PTE" hints, it should only be @@ -117,7 +129,7 @@ static inline int current_is_kswapd(void) #define MAX_SWAPFILES \ ((1 << MAX_SWAPFILES_SHIFT) - SWP_DEVICE_NUM - \ SWP_MIGRATION_NUM - SWP_HWPOISON_NUM - \ - SWP_PTE_MARKER_NUM) + SWP_PTE_MARKER_NUM - SWP_USERSWAP_NUM)
/* * Magic header for a swap area. The first part of the union is diff --git a/include/linux/swapops.h b/include/linux/swapops.h index bff1e8d97de0..6b4ed6bfb67c 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -455,6 +455,18 @@ static inline int pte_none_mostly(pte_t pte) return pte_none(pte) || is_pte_marker(pte); }
+#ifdef CONFIG_USERSWAP +static inline int is_userswap_entry(swp_entry_t entry) +{ + return unlikely(swp_type(entry) == SWP_USERSWAP_ENTRY); +} +#else +static inline int is_userswap_entry(swp_entry_t entry) +{ + return 0; +} +#endif + static inline struct page *pfn_swap_entry_to_page(swp_entry_t entry) { struct page *p = pfn_to_page(swp_offset_pfn(entry)); diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index 1478b9dd05fa..18d30581137a 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h @@ -164,6 +164,12 @@ IF_HAVE_PG_ARCH_X(arch_3) # define IF_HAVE_UFFD_MINOR(flag, name) #endif
+#ifdef CONFIG_USERSWAP +#define IF_HAVE_VM_USWAP(flag, name) {flag, name }, +#else +#define IF_HAVE_VM_USWAP(flag, name) +#endif + #define __def_vmaflag_names \ {VM_READ, "read" }, \ {VM_WRITE, "write" }, \ @@ -196,6 +202,7 @@ IF_HAVE_VM_SOFTDIRTY(VM_SOFTDIRTY, "softdirty" ) \ {VM_MIXEDMAP, "mixedmap" }, \ {VM_HUGEPAGE, "hugepage" }, \ {VM_NOHUGEPAGE, "nohugepage" }, \ +IF_HAVE_VM_USWAP(VM_USWAP, "userswap" ) \ {VM_MERGEABLE, "mergeable" } \
#define show_vma_flags(flags) \ diff --git a/mm/Kconfig b/mm/Kconfig index 0f68e5bbeb89..822c2ded2e26 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -1227,6 +1227,16 @@ config PTE_MARKER_UFFD_WP purposes. It is required to enable userfaultfd write protection on file-backed memory types like shmem and hugetlbfs.
+config USERSWAP + bool "Enable User Swap" + depends on MMU && USERFAULTFD + depends on X86 || ARM64 + default n + + help + Support for User Swap. This is based on userfaultfd. We can implement + our own swapout and swapin functions in usersapce. + # multi-gen LRU { config LRU_GEN bool "Multi-Gen LRU" diff --git a/mm/memory.c b/mm/memory.c index e1a0eb8b776a..862e14416027 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1516,6 +1516,9 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, is_poisoned_swp_entry(entry)) { if (!should_zap_cows(details)) continue; + } else if (is_userswap_entry(entry)) { + if (!should_zap_cows(details)) + continue; } else { /* We should have covered all the swap entry types */ WARN_ON_ONCE(1);