euleros inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4QVXW CVE: NA
-------------------------------------------------
etmem, the memory vertical expansion technology, uses DRAM and high-performance storage new media to form multi-level memory storage. By grading the stored data, etmem migrates the classified cold storage data from the storage medium to the high-performance storage medium, so as to achieve the purpose of memory capacity expansion and memory cost reduction.
When the memory expansion function etmem is running, the native swap function of the kernel needs to be disabled in certain scenarios to avoid the impact of kernel swap.
This feature provides the preceding functions.
The /sys/kernel/mm/swap/ directory provides the kernel_swap_enable sys interface to enable or disable the native swap function of the kernel.
The default value of /sys/kernel/mm/swap/kernel_swap_enable is true, that is, kernel swap is enabled by default.
Turn on kernel swap: echo true > /sys/kernel/mm/swap/kernel_swap_enable
Turn off kernel swap: echo false > /sys/kernel/mm/swap/kernel_swap_enable
Signed-off-by: liubo liubo254@huawei.com Reviewed-by: Miaohe Lin linmiaohe@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com Signed-off-by: Yuchen Tang tangyuchen5@huawei.com --- include/linux/etmem.h | 7 +++++++ mm/etmem.c | 30 ++++++++++++++++++++++++++++++ mm/swap_state.c | 4 ++++ mm/vmscan.c | 17 +++++++++++++++++ 4 files changed, 58 insertions(+)
diff --git a/include/linux/etmem.h b/include/linux/etmem.h index 8f6d7b15d6ea..668b19147ca9 100644 --- a/include/linux/etmem.h +++ b/include/linux/etmem.h @@ -46,10 +46,12 @@ static inline enum node_type get_node_type(int nid) } #endif
+extern struct kobj_attribute kernel_swap_enable_attr; extern unsigned int sysctl_vmemmap_block_from_dram; extern int add_page_for_swap(struct page *page, struct list_head *pagelist); extern struct page *get_page_from_vaddr(struct mm_struct *mm, unsigned long vaddr); +extern bool kernel_swap_enabled(void); #else /* !CONFIG_ETMEM */ static inline int add_page_for_swap(struct page *page, struct list_head *pagelist) { @@ -61,5 +63,10 @@ static inline struct page *get_page_from_vaddr(struct mm_struct *mm, { return NULL; } + +static inline bool kernel_swap_enabled(void) +{ + return true; +} #endif /* #ifdef CONFIG_ETMEM */ #endif /* define __MM_ETMEM_H_ */ diff --git a/mm/etmem.c b/mm/etmem.c index e2ba83583246..b5c5cf7768c6 100644 --- a/mm/etmem.c +++ b/mm/etmem.c @@ -9,6 +9,7 @@ #include <linux/etmem.h> #include "internal.h"
+static bool enable_kernel_swap __read_mostly = true; unsigned int sysctl_vmemmap_block_from_dram; #ifdef CONFIG_NUMA enum node_type nodes_type[MAX_NUMNODES]; @@ -42,6 +43,35 @@ int find_best_peer_node(int nid) return peer; }
+bool kernel_swap_enabled(void) +{ + return READ_ONCE(enable_kernel_swap); +} + +static ssize_t kernel_swap_enable_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "%s\n", enable_kernel_swap ? "true" : "false"); +} + +static ssize_t kernel_swap_enable_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + if (!strncmp(buf, "true", 4) || !strncmp(buf, "1", 1)) + WRITE_ONCE(enable_kernel_swap, true); + else if (!strncmp(buf, "false", 5) || !strncmp(buf, "0", 1)) + WRITE_ONCE(enable_kernel_swap, false); + else + return -EINVAL; + + return count; +} + +struct kobj_attribute kernel_swap_enable_attr = + __ATTR(kernel_swap_enable, 0644, kernel_swap_enable_show, + kernel_swap_enable_store); + #ifdef CONFIG_SYSCTL static struct ctl_table vm_vmemmap_table[] = { { diff --git a/mm/swap_state.c b/mm/swap_state.c index b3b14bd0dd64..ddb3a65e5c6e 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -21,6 +21,7 @@ #include <linux/swap_slots.h> #include <linux/huge_mm.h> #include <linux/shmem_fs.h> +#include <linux/etmem.h> #include "internal.h" #include "swap.h"
@@ -881,6 +882,9 @@ static struct kobj_attribute vma_ra_enabled_attr = __ATTR_RW(vma_ra_enabled);
static struct attribute *swap_attrs[] = { &vma_ra_enabled_attr.attr, +#ifdef CONFIG_ETMEM + &kernel_swap_enable_attr.attr, +#endif NULL, };
diff --git a/mm/vmscan.c b/mm/vmscan.c index 3f70e680d144..03b93ce6b536 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -7042,6 +7042,18 @@ static bool throttle_direct_reclaim(gfp_t gfp_mask, struct zonelist *zonelist, return false; }
+/* + * Check if original kernel swap is enabled + * turn off kernel swap,but leave page cache reclaim on + */ +static inline void kernel_force_no_swap(struct scan_control *sc) +{ +#ifdef CONFIG_ETMEM + if (sc != NULL && !kernel_swap_enabled()) + sc->may_swap = 0; +#endif +} + unsigned long try_to_free_pages(struct zonelist *zonelist, int order, gfp_t gfp_mask, nodemask_t *nodemask) { @@ -7058,6 +7070,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order, .may_swap = 1, };
+ kernel_force_no_swap(&sc); /* * scan_control uses s8 fields for order, priority, and reclaim_idx. * Confirm they are large enough for max values. @@ -7496,6 +7509,8 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int highest_zoneidx) sc.may_writepage = !laptop_mode && !nr_boost_reclaim; sc.may_swap = !nr_boost_reclaim;
+ kernel_force_no_swap(&sc); + /* * Do some background aging, to give pages a chance to be * referenced before reclaiming. All pages are rotated @@ -7874,6 +7889,8 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim) noreclaim_flag = memalloc_noreclaim_save(); set_task_reclaim_state(current, &sc.reclaim_state);
+ kernel_force_no_swap(&sc); + nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
set_task_reclaim_state(current, NULL);