From: liubo liubo254@huawei.com
euleros inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5DC4A CVE: NA
-------------------------------------------------
etmem, the memory vertical expansion technology, uses DRAM and high-performance storage new media to form multi-level memory storage. By grading the stored data, etmem migrates the classified cold storage data from the storage medium to the high-performance storage medium, so as to achieve the purpose of memory capacity expansion and memory cost reduction.
When the memory expansion function etmem is running, the native swap function of the kernel needs to be disabled in certain scenarios to avoid the impact of kernel swap.
This feature provides the preceding functions.
The /sys/kernel/mm/swap/ directory provides the kernel_swap_enable sys interface to enable or disable the native swap function of the kernel.
The default value of /sys/kernel/mm/swap/kernel_swap_enable is true, that is, kernel swap is enabled by default.
Turn on kernel swap: echo true > /sys/kernel/mm/swap/kernel_swap_enable
Turn off kernel swap: echo false > /sys/kernel/mm/swap/kernel_swap_enable
Signed-off-by: liubo liubo254@huawei.com Reviewed-by: Miaohe Lin linmiaohe@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- include/linux/swap.h | 5 ++++- mm/swap_state.c | 37 +++++++++++++++++++++++++++++++++++++ mm/vmscan.c | 27 +++++++++++++++++++++++++++ 3 files changed, 68 insertions(+), 1 deletion(-)
diff --git a/include/linux/swap.h b/include/linux/swap.h index adb294970605..fbac6b2236a9 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -453,7 +453,6 @@ extern struct page *swap_cluster_readahead(swp_entry_t entry, gfp_t flag, struct vm_fault *vmf); extern struct page *swapin_readahead(swp_entry_t entry, gfp_t flag, struct vm_fault *vmf); - /* linux/mm/swapfile.c */ extern atomic_long_t nr_swap_pages; extern long total_swap_pages; @@ -730,5 +729,9 @@ static inline bool mem_cgroup_swap_full(struct page *page) } #endif
+#ifdef CONFIG_ETMEM +extern bool kernel_swap_enabled(void); +#endif + #endif /* __KERNEL__*/ #endif /* _LINUX_SWAP_H */ diff --git a/mm/swap_state.c b/mm/swap_state.c index 149f46781061..d58cbf4fe27f 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -39,6 +39,9 @@ static const struct address_space_operations swap_aops = { struct address_space *swapper_spaces[MAX_SWAPFILES] __read_mostly; static unsigned int nr_swapper_spaces[MAX_SWAPFILES] __read_mostly; static bool enable_vma_readahead __read_mostly = true; +#ifdef CONFIG_ETMEM +static bool enable_kernel_swap __read_mostly = true; +#endif
#define SWAP_RA_WIN_SHIFT (PAGE_SHIFT / 2) #define SWAP_RA_HITS_MASK ((1UL << SWAP_RA_WIN_SHIFT) - 1) @@ -349,6 +352,13 @@ static inline bool swap_use_vma_readahead(void) return READ_ONCE(enable_vma_readahead) && !atomic_read(&nr_rotate_swap); }
+#ifdef CONFIG_ETMEM +bool kernel_swap_enabled(void) +{ + return READ_ONCE(enable_kernel_swap); +} +#endif + /* * Lookup a swap entry in the swap cache. A found page will be returned * unlocked and with its refcount incremented - we rely on the kernel @@ -909,8 +919,35 @@ static struct kobj_attribute vma_ra_enabled_attr = __ATTR(vma_ra_enabled, 0644, vma_ra_enabled_show, vma_ra_enabled_store);
+#ifdef CONFIG_ETMEM +static ssize_t kernel_swap_enable_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "%s\n", enable_kernel_swap ? "true" : "false"); +} +static ssize_t kernel_swap_enable_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + if (!strncmp(buf, "true", 4) || !strncmp(buf, "1", 1)) + WRITE_ONCE(enable_kernel_swap, true); + else if (!strncmp(buf, "false", 5) || !strncmp(buf, "0", 1)) + WRITE_ONCE(enable_kernel_swap, false); + else + return -EINVAL; + + return count; +} +static struct kobj_attribute kernel_swap_enable_attr = + __ATTR(kernel_swap_enable, 0644, kernel_swap_enable_show, + kernel_swap_enable_store); +#endif + static struct attribute *swap_attrs[] = { &vma_ra_enabled_attr.attr, +#ifdef CONFIG_ETMEM + &kernel_swap_enable_attr.attr, +#endif NULL, };
diff --git a/mm/vmscan.c b/mm/vmscan.c index a8f7804d15d2..1913dbf31187 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -3500,6 +3500,18 @@ static bool throttle_direct_reclaim(gfp_t gfp_mask, struct zonelist *zonelist, return false; }
+#ifdef CONFIG_ETMEM +/* + * Check if original kernel swap is enabled + * turn off kernel swap,but leave page cache reclaim on + */ +static inline void kernel_swap_check(struct scan_control *sc) +{ + if (sc != NULL && !kernel_swap_enabled()) + sc->may_swap = 0; +} +#endif + unsigned long try_to_free_pages(struct zonelist *zonelist, int order, gfp_t gfp_mask, nodemask_t *nodemask) { @@ -3516,6 +3528,9 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order, .may_swap = 1, };
+#ifdef CONFIG_ETMEM + kernel_swap_check(&sc); +#endif /* * scan_control uses s8 fields for order, priority, and reclaim_idx. * Confirm they are large enough for max values. @@ -3912,6 +3927,10 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int highest_zoneidx) sc.may_writepage = !laptop_mode && !nr_boost_reclaim; sc.may_swap = !nr_boost_reclaim;
+#ifdef CONFIG_ETMEM + kernel_swap_check(&sc); +#endif + /* * Do some background aging of the anon list, to give * pages a chance to be referenced before reclaiming. All @@ -4288,6 +4307,10 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim) noreclaim_flag = memalloc_noreclaim_save(); set_task_reclaim_state(current, &sc.reclaim_state);
+#ifdef CONFIG_ETMEM + kernel_swap_check(&sc); +#endif + nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
set_task_reclaim_state(current, NULL); @@ -4452,6 +4475,10 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in cond_resched(); psi_memstall_enter(&pflags); fs_reclaim_acquire(sc.gfp_mask); + +#ifdef CONFIG_ETMEM + kernel_swap_check(&sc); +#endif /* * We need to be able to allocate from the reserves for RECLAIM_UNMAP * and we also need to be able to write out pages for RECLAIM_WRITE