From: tangyuchen tangyuchen5@huawei.com
v2 -> v3
- fix compile error - bind to correct issue url
v1 -> v2
- remove redundant symbol export - use safer kthread_freezable_should_stop - make THP our recalim candidate
Yuchen Tang (2): etmem: Expose symbol reclaim_folio_list etmem: add swapcache reclaim to etmem
fs/proc/etmem_swap.c | 173 ++++++++++++++++++++++++++++++++++++++++++ include/linux/etmem.h | 29 +++++++ include/linux/swap.h | 2 + mm/etmem.c | 173 ++++++++++++++++++++++++++++++++++++++++++ mm/vmscan.c | 2 +- 5 files changed, 378 insertions(+), 1 deletion(-)
euleros inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I96X3J?from=project-issue CVE: NA
-------------------------------------------------
Expose reclaim_folio_list, so that etmem can exploit this for swapcache reclamation.
Signed-off-by: Yuchen Tang tangyuchen5@huawei.com Signed-off-by: tangyuchen tangyuchen5@huawei.com --- include/linux/swap.h | 2 ++ mm/vmscan.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-)
diff --git a/include/linux/swap.h b/include/linux/swap.h index b47d288e657b..42bbdfd2fbb1 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -419,6 +419,8 @@ extern void lru_cache_add_inactive_or_unevictable(struct page *page, extern unsigned long zone_reclaimable_pages(struct zone *zone); extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, gfp_t gfp_mask, nodemask_t *mask); +extern unsigned int reclaim_folio_list(struct list_head *folio_list, + struct pglist_data *pgdat); extern unsigned long reclaim_pages(struct list_head *folio_list);
#define MEMCG_RECLAIM_MAY_SWAP (1 << 1) diff --git a/mm/vmscan.c b/mm/vmscan.c index 3b670b1d2b61..fc3d70abc78e 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2791,7 +2791,7 @@ static void shrink_active_list(unsigned long nr_to_scan, nr_deactivate, nr_rotated, sc->priority, file); }
-static unsigned int reclaim_folio_list(struct list_head *folio_list, +unsigned int reclaim_folio_list(struct list_head *folio_list, struct pglist_data *pgdat) { struct reclaim_stat dummy_stat;
euleros inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I96X3J?from=project-issue CVE: NA
-------------------------------------------------
etmem, the memory vertical expansion technology,
In the current etmem process, memory folio swapping is implemented by invoking shrink_folio_list. When this interface is invoked for the first time, folios are added to the swap cache and written to disks.The swap cache folio is reclaimed only when this interface is invoked for the second time and no process accesses the folio.However, in the etmem process, the user mode scans folios that have been accessed, and the migration is not delivered to folios that are not accessed by processes. Therefore, the swap cache may always be occupied. To solve the preceding problem, add the logic for actively reclaiming the swap cache.When the swap cache occupies a large amount of memory, the system proactively scans the LRU linked list and reclaims the swap cache to save memory within the specified range.
Reference:https://gitee.com/openeuler/kernel/commit/44983705e56ab22fda801d66e2a6bd0d1b...
Signed-off-by: liubo liubo254@huawei.com Signed-off-by: Yuchen Tang tangyuchen5@huawei.com Signed-off-by: tangyuchen tangyuchen5@huawei.com --- fs/proc/etmem_swap.c | 173 ++++++++++++++++++++++++++++++++++++++++++ include/linux/etmem.h | 29 +++++++ mm/etmem.c | 173 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 375 insertions(+)
diff --git a/fs/proc/etmem_swap.c b/fs/proc/etmem_swap.c index 4aad6b9db9a6..b4a35da9ac3d 100644 --- a/fs/proc/etmem_swap.c +++ b/fs/proc/etmem_swap.c @@ -11,6 +11,22 @@ #include <linux/uaccess.h> #include <linux/delay.h> #include <linux/etmem.h> +#include <linux/freezer.h> +#include <linux/kthread.h> + +#define RECLAIM_SWAPCACHE_MAGIC 0X77 +#define SET_SWAPCACHE_WMARK _IOW(RECLAIM_SWAPCACHE_MAGIC, 0x02, unsigned int) +#define RECLAIM_SWAPCACHE_ON _IOW(RECLAIM_SWAPCACHE_MAGIC, 0x01, unsigned int) +#define RECLAIM_SWAPCACHE_OFF _IOW(RECLAIM_SWAPCACHE_MAGIC, 0x00, unsigned int) + +#define WATERMARK_MAX 100 +#define SWAP_SCAN_NUM_MAX 32 + +static struct task_struct *reclaim_swapcache_tk; +static bool enable_swapcache_reclaim; +static unsigned long swapcache_watermark[ETMEM_SWAPCACHE_NR_WMARK]; + +static DECLARE_WAIT_QUEUE_HEAD(reclaim_queue);
static ssize_t swap_pages_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) @@ -82,6 +98,152 @@ static int swap_pages_release(struct inode *inode, struct file *file)
extern struct file_operations proc_swap_pages_operations;
+/* check if swapcache meet requirements */ +static bool swapcache_balanced(void) +{ + return total_swapcache_pages() < swapcache_watermark[ETMEM_SWAPCACHE_WMARK_HIGH]; +} + +/* the flag present if swapcache reclaim is started */ +static bool swapcache_reclaim_enabled(void) +{ + return READ_ONCE(enable_swapcache_reclaim); +} + +static void start_swapcache_reclaim(void) +{ + if (swapcache_balanced()) + return; + /* RECLAIM_SWAPCACHE_ON trigger the thread to start running. */ + if (!waitqueue_active(&reclaim_queue)) + return; + + WRITE_ONCE(enable_swapcache_reclaim, true); + wake_up_interruptible(&reclaim_queue); +} + +static void stop_swapcache_reclaim(void) +{ + WRITE_ONCE(enable_swapcache_reclaim, false); +} + +static bool should_goto_sleep(void) +{ + if (swapcache_balanced()) + stop_swapcache_reclaim(); + + if (swapcache_reclaim_enabled()) + return false; + + return true; +} + +static int get_swapcache_watermark(unsigned int ratio) +{ + unsigned int low_watermark; + unsigned int high_watermark; + + low_watermark = ratio & 0xFF; + high_watermark = (ratio >> 8) & 0xFF; + if (low_watermark > WATERMARK_MAX || + high_watermark > WATERMARK_MAX || + low_watermark > high_watermark) + return -EPERM; + + swapcache_watermark[ETMEM_SWAPCACHE_WMARK_LOW] = totalram_pages() * + low_watermark / WATERMARK_MAX; + swapcache_watermark[ETMEM_SWAPCACHE_WMARK_HIGH] = totalram_pages() * + high_watermark / WATERMARK_MAX; + + return 0; +} + +static void reclaim_swapcache_try_to_sleep(void) +{ + DEFINE_WAIT(wait); + + if (freezing(current) || kthread_should_stop()) + return; + + prepare_to_wait(&reclaim_queue, &wait, TASK_INTERRUPTIBLE); + if (should_goto_sleep()) { + if (!kthread_should_stop()) + schedule(); + } + finish_wait(&reclaim_queue, &wait); +} + +static void etmem_reclaim_swapcache(void) +{ + do_swapcache_reclaim(swapcache_watermark, + ARRAY_SIZE(swapcache_watermark)); + stop_swapcache_reclaim(); +} + +static int reclaim_swapcache_proactive(void *para) +{ + set_freezable(); + + while (1) { + bool ret; + + reclaim_swapcache_try_to_sleep(); + ret = try_to_freeze(); + if (kthread_freezable_should_stop(NULL)) + break; + + if (ret) + continue; + + etmem_reclaim_swapcache(); + } + + return 0; +} + +static int reclaim_swapcache_run(void) +{ + int ret = 0; + + reclaim_swapcache_tk = kthread_run(reclaim_swapcache_proactive, NULL, + "etmem_recalim_swapcache"); + if (IS_ERR(reclaim_swapcache_tk)) { + ret = PTR_ERR(reclaim_swapcache_tk); + reclaim_swapcache_tk = NULL; + } + return ret; +} + +static long swap_page_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg) +{ + void __user *argp = (void __user *)arg; + unsigned int ratio; + + switch (cmd) { + case RECLAIM_SWAPCACHE_ON: + if (swapcache_reclaim_enabled()) + return 0; + start_swapcache_reclaim(); + break; + case RECLAIM_SWAPCACHE_OFF: + stop_swapcache_reclaim(); + break; + case SET_SWAPCACHE_WMARK: + if (get_user(ratio, (unsigned int __user *)argp)) + return -EFAULT; + + if (get_swapcache_watermark(ratio) != 0) + return -EFAULT; + break; + default: + return -EPERM; + } + + return 0; +} + + static int swap_pages_entry(void) { proc_swap_pages_operations.flock(NULL, 1, NULL); @@ -89,8 +251,12 @@ static int swap_pages_entry(void) proc_swap_pages_operations.write = swap_pages_write; proc_swap_pages_operations.open = swap_pages_open; proc_swap_pages_operations.release = swap_pages_release; + proc_swap_pages_operations.unlocked_ioctl = swap_page_ioctl; proc_swap_pages_operations.flock(NULL, 0, NULL);
+ enable_swapcache_reclaim = false; + reclaim_swapcache_run(); + return 0; }
@@ -101,7 +267,14 @@ static void swap_pages_exit(void) proc_swap_pages_operations.write = NULL; proc_swap_pages_operations.open = NULL; proc_swap_pages_operations.release = NULL; + proc_swap_pages_operations.unlocked_ioctl = NULL; proc_swap_pages_operations.flock(NULL, 0, NULL); + + if (!IS_ERR(reclaim_swapcache_tk)) { + kthread_stop(reclaim_swapcache_tk); + reclaim_swapcache_tk = NULL; + } + return; }
MODULE_LICENSE("GPL"); diff --git a/include/linux/etmem.h b/include/linux/etmem.h index 9ec9657e56ed..c33542b339a0 100644 --- a/include/linux/etmem.h +++ b/include/linux/etmem.h @@ -9,6 +9,28 @@ #include <linux/page-flags.h>
#ifdef CONFIG_ETMEM +/** + * list_for_each_entry_safe_reverse_from - iterate backwards over list from + * current point safe against removal + * @pos: the type * to use as a loop cursor. + * @n: another type * to use as temporary storage + * @head: the head for your list. + * @member: the name of the list_head within the struct. + * + * Iterate backwards over list of given type from current point, safe against + * removal of list entry. + */ +#define list_for_each_entry_safe_reverse_from(pos, n, head, member) \ + for (n = list_prev_entry(pos, member); \ + !list_entry_is_head(pos, head, member); \ + pos = n, n = list_prev_entry(n, member)) + + +enum etmem_swapcache_watermark_en { + ETMEM_SWAPCACHE_WMARK_LOW, + ETMEM_SWAPCACHE_WMARK_HIGH, + ETMEM_SWAPCACHE_NR_WMARK +};
#if IS_ENABLED(CONFIG_KVM) static inline struct kvm *mm_kvm(struct mm_struct *mm) @@ -27,6 +49,8 @@ extern struct page *get_page_from_vaddr(struct mm_struct *mm, unsigned long vaddr); extern struct kobj_attribute kernel_swap_enable_attr; extern bool kernel_swap_enabled(void); +extern int do_swapcache_reclaim(unsigned long *swapcache_watermark, + unsigned int watermark_nr); #else /* !CONFIG_ETMEM */ static inline int add_page_for_swap(struct page *page, struct list_head *pagelist) { @@ -43,5 +67,10 @@ static inline bool kernel_swap_enabled(void) { return true; } +static inline int do_swapcache_reclaim(unsigned long *swapcache_watermark, + unsigned int watermark_nr) +{ + return 0; +} #endif /* #ifdef CONFIG_ETMEM */ #endif /* define __MM_ETMEM_H_ */ diff --git a/mm/etmem.c b/mm/etmem.c index acd32e71a643..5accf8e0bbdf 100644 --- a/mm/etmem.c +++ b/mm/etmem.c @@ -93,3 +93,176 @@ struct page *get_page_from_vaddr(struct mm_struct *mm, unsigned long vaddr) return page; } EXPORT_SYMBOL_GPL(get_page_from_vaddr); + +#define SWAP_SCAN_NUM_MAX 32 + +static unsigned long get_swapcache_reclaim_num(unsigned long *swapcache_watermark) +{ + return total_swapcache_pages() > + swapcache_watermark[ETMEM_SWAPCACHE_WMARK_LOW] ? + (total_swapcache_pages() - swapcache_watermark[ETMEM_SWAPCACHE_WMARK_LOW]) : 0; +} + +static int move_lru_folios_to_list(struct lruvec *lruvec, + struct folio *folio, struct list_head *foliolist) +{ + + if (!folio_test_large(folio)) { + /* If another process is also mapping this folio */ + if (folio_mapcount(folio) > 1) + return -EACCES; + } else if (folio_test_hugetlb(folio)) { + /* Do not reclaim hugetlb folios */ + return -EACCES; + } else { + /* Try to reclaim THP unless it is mapped by another process */ + if (folio_entire_mapcount(folio) > 1) + return -EACCES; + } + + /* + * try to a reference to a folio + * may fail if, the folio has been freed/frozen + */ + if (!(folio_try_get(folio))) + return -1; + + /* racing with another isolation */ + if (!folio_test_clear_lru(folio)) { + folio_put(folio); + return -1; + } + + list_move(&folio->lru, foliolist); + update_lru_size(lruvec, + LRU_INACTIVE_ANON, + folio_zonenum(folio), + -folio_nr_pages(folio)); + return 0; +} + +/* + * For each node, scan the inactive anon lru, isolate and move + * appropriate candidates to swapcache_list[nid] + */ +static void memcg_reclaim_swapcache(struct list_head *swapcache_list, + unsigned long swapcache_to_reclaim) +{ + struct mem_cgroup *memcg = NULL, *target_memcg = NULL; + struct lruvec *lruvec; + int nid; + pg_data_t *pgdat; + unsigned int scan_count = 0; + unsigned long swapcache_total_reclaimable = 0; + struct list_head *src = NULL; + struct folio *folio = NULL, *next = NULL, *pos = NULL; + + for_each_node_state(nid, N_MEMORY) { + INIT_LIST_HEAD(&swapcache_list[nid]); + cond_resched(); + pgdat = NODE_DATA(nid); + + memcg = mem_cgroup_iter(target_memcg, NULL, NULL); + do { + cond_resched(); + lruvec = mem_cgroup_lruvec(memcg, pgdat); + src = &(lruvec->lists[LRU_INACTIVE_ANON]); + + spin_lock_irq(&lruvec->lru_lock); + pos = list_last_entry(src, struct folio, lru); + spin_unlock_irq(&lruvec->lru_lock); +reverse_scan_lru: + cond_resched(); + scan_count = 0; + + spin_lock_irq(&lruvec->lru_lock); + if (!pos || list_entry_is_head(pos, src, lru)) { + spin_unlock_irq(&lruvec->lru_lock); + continue; + } + + if (!folio_test_lru(pos) || folio_lru_list(pos) != LRU_INACTIVE_ANON) { + spin_unlock_irq(&lruvec->lru_lock); + continue; + } + + folio = pos; + + list_for_each_entry_safe_reverse_from(folio, next, src, lru) { + pos = next; + scan_count++; + if (scan_count >= SWAP_SCAN_NUM_MAX) + break; + + if (!folio_test_swapcache(folio) || folio_mapped(folio)) + continue; + + if (move_lru_folios_to_list(lruvec, + folio, + &swapcache_list[nid]) != 0) + continue; + + swapcache_total_reclaimable += folio_nr_pages(folio); + } + spin_unlock_irq(&lruvec->lru_lock); + + if (swapcache_total_reclaimable >= swapcache_to_reclaim) + break; + + if (scan_count >= SWAP_SCAN_NUM_MAX) + goto reverse_scan_lru; + + } while ((memcg = mem_cgroup_iter(target_memcg, memcg, NULL))); + } +} + +static int lru_gen_reclaim_swapcache(struct list_head *swapcache_list, + unsigned long swapcache_to_reclaim) +{ + return 0; +} + +int do_swapcache_reclaim(unsigned long *swapcache_watermark, + unsigned int watermark_nr) +{ + int nid; + unsigned long swapcache_to_reclaim = 0; + struct list_head *swapcache_list = NULL, *folio_list = NULL; + struct folio *folio = NULL; + + if (swapcache_watermark == NULL || + watermark_nr < ETMEM_SWAPCACHE_NR_WMARK) + return -EINVAL; + + if (lru_gen_enabled()) + return lru_gen_reclaim_swapcache(swapcache_list, swapcache_to_reclaim); + + swapcache_to_reclaim = get_swapcache_reclaim_num(swapcache_watermark); + + swapcache_list = kcalloc(MAX_NUMNODES, sizeof(struct list_head), GFP_KERNEL); + if (swapcache_list == NULL) + return -ENOMEM; + + memcg_reclaim_swapcache(swapcache_list, swapcache_to_reclaim); + + /* Reclaim all the swapcache we have scanned */ + for_each_node_state(nid, N_MEMORY) { + cond_resched(); + reclaim_folio_list(&swapcache_list[nid], NODE_DATA(nid)); + } + + /* Put pack all the pages that are not reclaimed by shrink_folio_list */ + for_each_node_state(nid, N_MEMORY) { + cond_resched(); + folio_list = &swapcache_list[nid]; + while (!list_empty(folio_list)) { + folio = lru_to_folio(folio_list); + list_del(&folio->lru); + folio_putback_lru(folio); + } + } + + kfree(swapcache_list); + return 0; +} +EXPORT_SYMBOL_GPL(do_swapcache_reclaim);
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/5047 邮件列表地址:https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/A...
FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/5047 Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/A...