Liu Yuntao (1): add "enable_swap" ability
Ni Cunshu (1): mm: add preferred_swap ability
liubo (1): preferred_swap: share memory can specify swap device
fs/proc/base.c | 188 +++++++++++++++++++++++++++++++++ include/linux/mm_types.h | 4 + include/linux/sched/coredump.h | 1 + include/linux/swap.h | 13 +++ kernel/fork.c | 4 + mm/Kconfig | 7 ++ mm/swap_slots.c | 64 ++++++++++- mm/swap_state.c | 1 + mm/swapfile.c | 86 ++++++++++++++- mm/vmscan.c | 38 +++++++ 10 files changed, 403 insertions(+), 3 deletions(-)
From: Ni Cunshu nicunshu@huawei.com
euleros inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I7U78A CVE: NA
------------------------------------------------
add a new attribute preferred_swap for each process, which indicates the swapfile or swap device that can be used by the process. Each process can only have one preferred_swap.
Signed-off-by: Ni Cunshu nicunshu@huawei.com --- fs/proc/base.c | 113 +++++++++++++++++++++++++++++++++++++++ include/linux/mm_types.h | 4 ++ include/linux/swap.h | 13 +++++ kernel/fork.c | 4 ++ mm/Kconfig | 7 +++ mm/swap_slots.c | 68 ++++++++++++++++++++++- mm/swapfile.c | 86 ++++++++++++++++++++++++++++- 7 files changed, 292 insertions(+), 3 deletions(-)
diff --git a/fs/proc/base.c b/fs/proc/base.c index 24c70ff923b8..34537572db57 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -3330,6 +3330,116 @@ static const struct file_operations proc_preferred_cpuset_operations = { }; #endif
+#ifdef CONFIG_PREFERRED_SWAP +static ssize_t proc_preferred_swap_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct task_struct *task = get_proc_task(file_inode(file)); + struct mm_struct *mm; + int ret; + int err = 0; + size_t len; + char *buffer; + char *path; + + if (!task) + return -ESRCH; + + ret = 0; + mm = get_task_mm(task); + if (!mm) + goto out_no_mm; + + if (mm->preferred_swap) { + buffer = (char *)kmalloc(PATH_MAX + 1, GFP_KERNEL); + if (buffer == NULL) { + err = -ENOMEM; + goto out; + } + + path = d_path(&mm->preferred_swap->swap_file->f_path, + buffer, PATH_MAX); + + if (IS_ERR(path)) { + err = PTR_ERR(path); + kfree(buffer); + goto out; + } + + len = snprintf(buffer, strlen(path) + 2, "%s\n", path); + ret = simple_read_from_buffer(buf, count, ppos, buffer, len); + kfree(buffer); + } +out: + mmput(mm); +out_no_mm: + put_task_struct(task); + if (err < 0) + return err; + return ret; +} + +static ssize_t proc_preferred_swap_write(struct file *file, + const char __user *buf, size_t count, loff_t *ppos) +{ + struct task_struct *task; + struct mm_struct *mm; + char *buffer; + int err = 0; + + if (count >= PATH_MAX) + return -EINVAL; + + buffer = (char *)kmalloc(PATH_MAX, GFP_KERNEL); + if (buffer == NULL) { + err = -ENOMEM; + goto out_no_buffer; + } + if (copy_from_user(buffer, buf, count)) { + err = -EFAULT; + goto out_no_task; + } + buffer[count] = '\0'; + strreplace(buffer, '\n', '\0'); + task = get_proc_task(file_inode(file)); + if (!task) { + err = -ESRCH; + goto out_no_task; + } + mm = get_task_mm(task); + if (!mm) { + err = -ESRCH; + goto out_no_mm; + } + if (strlen(buffer) == 0) { + mm->preferred_swap = NULL; + goto out; + } + err = find_swap_info(buffer, mm); + if (err) { + pr_info("%s failed to be found as swap\n", buffer); + goto out; + } + + preferred_swap_used = 1; +out: + mmput(mm); +out_no_mm: + put_task_struct(task); +out_no_task: + kfree(buffer); +out_no_buffer: + if (err < 0) + return err; + return count; +} +static const struct file_operations proc_preferred_swap_operations = { + .write = proc_preferred_swap_write, + .read = proc_preferred_swap_read, + .llseek = generic_file_llseek, +}; +#endif /* CONFIG_PREFERRED_SWAP */ + static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { @@ -3526,6 +3636,9 @@ static const struct pid_entry tgid_base_stuff[] = { ONE("ksm_merging_pages", S_IRUSR, proc_pid_ksm_merging_pages), ONE("ksm_stat", S_IRUSR, proc_pid_ksm_stat), #endif +#ifdef CONFIG_PREFERRED_SWAP + REG("preferred_swap", S_IRUGO|S_IWUSR, proc_preferred_swap_operations), +#endif };
static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index e3eaf458787a..2edd816fba7f 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -638,7 +638,11 @@ struct mm_struct { KABI_RESERVE(2) KABI_RESERVE(3) #endif +#ifdef CONFIG_PREFERRED_SWAP + KABI_USE(4, struct swap_info_struct *preferred_swap) +#else KABI_RESERVE(4) +#endif KABI_RESERVE(5) KABI_RESERVE(6) KABI_RESERVE(7) diff --git a/include/linux/swap.h b/include/linux/swap.h index 7f49964f27d2..998f10e97601 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -389,6 +389,10 @@ extern int vm_swappiness; extern int remove_mapping(struct address_space *mapping, struct page *page);
extern unsigned long reclaim_pages(struct list_head *page_list); +#ifdef CONFIG_PREFERRED_SWAP +extern int find_swap_info(char *filename, struct mm_struct *mm); +extern void clear_tasks_mm_preferred_swap(struct swap_info_struct *p); +#endif #ifdef CONFIG_ETMEM enum etmem_swapcache_watermark_en { ETMEM_SWAPCACHE_WMARK_LOW, @@ -496,6 +500,10 @@ extern long total_swap_pages; extern atomic_t nr_rotate_swap; extern bool has_usable_swap(void);
+#ifdef CONFIG_PREFERRED_SWAP +extern int preferred_swap_used; +#endif + /* Swap 50% full? Release swapcache more aggressively.. */ static inline bool vm_swap_full(void) { @@ -511,7 +519,12 @@ extern void si_swapinfo(struct sysinfo *); extern swp_entry_t get_swap_page(struct page *page); extern void put_swap_page(struct page *page, swp_entry_t entry); extern swp_entry_t get_swap_page_of_type(int); +#ifdef CONFIG_PREFERRED_SWAP +extern int get_swap_pages(int n, swp_entry_t swp_entries[], int entry_size, + struct swap_info_struct *preferred_swap); +#else extern int get_swap_pages(int n, swp_entry_t swp_entries[], int entry_size); +#endif extern int add_swap_count_continuation(swp_entry_t, gfp_t); extern void swap_shmem_alloc(swp_entry_t); extern int swap_duplicate(swp_entry_t); diff --git a/kernel/fork.c b/kernel/fork.c index a531fd38d111..b12449f398ed 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1072,6 +1072,10 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, mm_init_uprobes_state(mm); hugetlb_count_init(mm);
+#ifdef CONFIG_PREFERRED_SWAP + mm->preferred_swap = NULL; +#endif + if (current->mm) { mm->flags = current->mm->flags & MMF_INIT_MASK; mm->def_flags = current->mm->def_flags & VM_INIT_DEF_MASK; diff --git a/mm/Kconfig b/mm/Kconfig index f66457168de9..48c37c3bda8c 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -999,6 +999,13 @@ config EXTEND_HUGEPAGE_MAPPING help Introduce vmalloc/vmap/remap interfaces that handle only hugepages.
+config PREFERRED_SWAP + bool "make task only swapping memory to assigned swap" + depends on SYSFS + default y + help + Say Y here to enable kernel to limit tasks only swapping memory to assigned swap. + source "mm/damon/Kconfig"
endmenu diff --git a/mm/swap_slots.c b/mm/swap_slots.c index 0357fbe70645..79a3164bf393 100644 --- a/mm/swap_slots.c +++ b/mm/swap_slots.c @@ -33,6 +33,9 @@ #include <linux/vmalloc.h> #include <linux/mutex.h> #include <linux/mm.h> +#ifdef CONFIG_PREFERRED_SWAP +#include <linux/rmap.h> +#endif
static DEFINE_PER_CPU(struct swap_slots_cache, swp_slots); static bool swap_slot_cache_active; @@ -265,8 +268,13 @@ static int refill_swap_slots_cache(struct swap_slots_cache *cache)
cache->cur = 0; if (swap_slot_cache_active) +#ifdef CONFIG_PREFERRED_SWAP + cache->nr = get_swap_pages(SWAP_SLOTS_CACHE_SIZE, + cache->slots, 1, NULL); +#else cache->nr = get_swap_pages(SWAP_SLOTS_CACHE_SIZE, cache->slots, 1); +#endif
return cache->nr; } @@ -303,16 +311,66 @@ int free_swap_slot(swp_entry_t entry) return 0; }
+#ifdef CONFIG_PREFERRED_SWAP +static bool has_preferred_swap(struct page *page, struct vm_area_struct *vma, + unsigned long addr, void *arg) +{ + struct swap_info_struct **si = (struct swap_info_struct **)arg; + + if (*si) { + *si = ERR_PTR(-EPERM); + return false; + } + if (vma->vm_file) { + *si = ERR_PTR(-EPERM); + return false; + } + if (vma->vm_mm->preferred_swap) { + *si = vma->vm_mm->preferred_swap; + } else { + *si = ERR_PTR(-EPERM); + return false; + } + return true; +} + +static struct swap_info_struct *page_preferred_swap(struct page *page) +{ + struct swap_info_struct *preferred_swap = NULL; + struct rmap_walk_control rwc = { + .rmap_one = has_preferred_swap, + .arg = (void *)&preferred_swap, + .anon_lock = page_lock_anon_vma_read, + }; + rmap_walk(page, &rwc); + if (IS_ERR_OR_NULL(preferred_swap)) + preferred_swap = NULL; + + return preferred_swap; +} +#endif /* CONFIG_PREFERRED_SWAP */ + swp_entry_t get_swap_page(struct page *page) { swp_entry_t entry; struct swap_slots_cache *cache;
+#ifdef CONFIG_PREFERRED_SWAP + struct swap_info_struct *preferred_swap = NULL; + + if (!page_mapping(page)) + preferred_swap = page_preferred_swap(page); +#endif + entry.val = 0;
if (PageTransHuge(page)) { if (IS_ENABLED(CONFIG_THP_SWAP)) +#ifdef CONFIG_PREFERRED_SWAP + get_swap_pages(1, &entry, HPAGE_PMD_NR, preferred_swap); +#else get_swap_pages(1, &entry, HPAGE_PMD_NR); +#endif goto out; }
@@ -327,7 +385,11 @@ swp_entry_t get_swap_page(struct page *page) */ cache = raw_cpu_ptr(&swp_slots);
- if (likely(check_cache_active() && cache->slots)) { +#ifdef CONFIG_PREFERRED_SWAP + if (likely(check_cache_active() && cache->slots && !preferred_swap)) { +#else + if (likely(check_cache_active() && cache->slots &&)) { +#endif mutex_lock(&cache->alloc_lock); if (cache->slots) { repeat: @@ -344,7 +406,11 @@ swp_entry_t get_swap_page(struct page *page) goto out; }
+#ifdef CONFIG_PREFERRED_SWAP + get_swap_pages(1, &entry, 1, preferred_swap); +#else get_swap_pages(1, &entry, 1); +#endif out: if (mem_cgroup_try_charge_swap(page, entry)) { put_swap_page(page, entry); diff --git a/mm/swapfile.c b/mm/swapfile.c index b0824a6fe21e..6f68ceaf49bd 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -62,7 +62,10 @@ EXPORT_SYMBOL_GPL(nr_swap_pages); /* protected with swap_lock. reading in vm_swap_full() doesn't need lock */ long total_swap_pages; static int least_priority = -1; - +#ifdef CONFIG_PREFERRED_SWAP +DEFINE_SPINLOCK(preferred_swap_lock); +int preferred_swap_used; +#endif static const char Bad_file[] = "Bad swap file entry "; static const char Unused_file[] = "Unused swap file entry "; static const char Bad_offset[] = "Bad swap offset entry "; @@ -1057,7 +1060,13 @@ static unsigned long scan_swap_map(struct swap_info_struct *si,
}
-int get_swap_pages(int n_goal, swp_entry_t swp_entries[], int entry_size) +#ifdef CONFIG_PREFERRED_SWAP +int get_swap_pages(int n_goal, swp_entry_t swp_entries[], + int entry_size, struct swap_info_struct *preferred_swap) +#else +int get_swap_pages(int n_goal, swp_entry_t swp_entries[], + int entry_size) +#endif { unsigned long size = swap_entry_size(entry_size); struct swap_info_struct *si, *next; @@ -1083,6 +1092,10 @@ int get_swap_pages(int n_goal, swp_entry_t swp_entries[], int entry_size) start_over: node = numa_node_id(); plist_for_each_entry_safe(si, next, &swap_avail_heads[node], avail_lists[node]) { +#ifdef CONFIG_PREFERRED_SWAP + if (preferred_swap && preferred_swap != si) + goto nextsi; +#endif /* requeue si to after same-priority siblings */ plist_requeue(&si->avail_lists[node], &swap_avail_heads[node]); spin_unlock(&swap_avail_lock); @@ -2644,6 +2657,10 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) p->flags &= ~SWP_WRITEOK; spin_unlock(&p->lock); spin_unlock(&swap_lock); +#ifdef CONFIG_PREFERRED_SWAP + if (preferred_swap_used) + clear_tasks_mm_preferred_swap(p); +#endif
disable_swap_slots_cache_lock();
@@ -3879,6 +3896,71 @@ void cgroup_throttle_swaprate(struct page *page, gfp_t gfp_mask) } #endif
+#ifdef CONFIG_PREFERRED_SWAP +int find_swap_info(char *filename, struct mm_struct *mm) +{ + struct file *chosen_file; + unsigned int type; + struct path path; + int err = 0; + struct swap_info_struct *found_swap = NULL; + + err = kern_path(filename, LOOKUP_FOLLOW, &path); + if (err) + goto out_no_path; + chosen_file = dentry_open(&path, O_RDWR|O_LARGEFILE, current_cred()); + if (IS_ERR(chosen_file)) { + err = PTR_ERR(chosen_file); + goto out_no_file; + } + spin_lock(&preferred_swap_lock); + spin_lock(&swap_lock); + for (type = 0; type < nr_swapfiles; type++) { + if ((swap_info[type]->flags & SWP_USED) && + (swap_info[type]->flags & SWP_WRITEOK)) { + if (swap_info[type]->swap_file->f_mapping == + chosen_file->f_mapping) { + found_swap = swap_info[type]; + break; + } + } + } + + spin_unlock(&swap_lock); + if (type >= nr_swapfiles) { + err = -EINVAL; + goto out; + } + + mm->preferred_swap = found_swap; +out: + spin_unlock(&preferred_swap_lock); + filp_close(chosen_file, NULL); +out_no_file: + path_put(&path); +out_no_path: + return err; +} + +void clear_tasks_mm_preferred_swap(struct swap_info_struct *p) +{ + struct task_struct *task; + struct mm_struct *mm; + + spin_lock(&preferred_swap_lock); + read_lock(&tasklist_lock); + for_each_process(task) { + mm = get_task_mm(task); + if (mm) { + if (mm->preferred_swap == p) + mm->preferred_swap = NULL; + mmput(mm); + } + } + read_unlock(&tasklist_lock); + spin_unlock(&preferred_swap_lock); +} +#endif static int __init swapfile_init(void) { int nid;
From: Liu Yuntao liuyuntao10@huawei.com
euleros inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I7U78A CVE: NA
------------------------------------------------
Configuring which process cannot swap out memory.
Signed-off-by: Liu Yuntao liuyuntao10@huawei.com --- fs/proc/base.c | 75 ++++++++++++++++++++++++++++++++++ include/linux/sched/coredump.h | 1 + mm/swap_state.c | 1 + mm/vmscan.c | 38 +++++++++++++++++ 4 files changed, 115 insertions(+)
diff --git a/fs/proc/base.c b/fs/proc/base.c index 34537572db57..943e6d4fdcc2 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -3440,6 +3440,80 @@ static const struct file_operations proc_preferred_swap_operations = { }; #endif /* CONFIG_PREFERRED_SWAP */
+static ssize_t proc_enable_swap_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct task_struct *task = get_proc_task(file_inode(file)); + struct mm_struct *mm; + int ret, enable_swap; + size_t len; + char buffer[PROC_NUMBUF]; + if (!task) + return -ESRCH; + + ret = 0; + mm = get_task_mm(task); + if (mm) { + enable_swap = test_bit(MMF_DISABLE_SWAP, &mm->flags) ? 0 : 1; + len = snprintf(buffer, sizeof(buffer), "%d\n", enable_swap); + mmput(mm); + ret = simple_read_from_buffer(buf, count, ppos, buffer, len); + } + + put_task_struct(task); + + return ret; +} + +static ssize_t proc_enable_swap_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct task_struct *task; + struct mm_struct *mm; + bool enable_swap; + unsigned val; + int ret; + + ret = kstrtouint_from_user(buf, count, 0, &val); + if (ret < 0) + return ret; + if (val == 0) + enable_swap = false; + else if (val == 1) + enable_swap = true; + else + return -EINVAL; + + ret = -ESRCH; + task = get_proc_task(file_inode(file)); + if (!task) + goto out_no_task; + + mm = get_task_mm(task); + if (!mm) + goto out_no_mm; + ret = 0; + + if (enable_swap) + clear_bit(MMF_DISABLE_SWAP, &mm->flags); + else + set_bit(MMF_DISABLE_SWAP, &mm->flags); + + mmput(mm); +out_no_mm: + put_task_struct(task); +out_no_task: + if (ret < 0) + return ret; + return count; +} + +static const struct file_operations proc_enable_swap_operations = { + .write = proc_enable_swap_write, + .read = proc_enable_swap_read, + .llseek = generic_file_llseek, +}; + static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { @@ -3632,6 +3706,7 @@ static const struct pid_entry tgid_base_stuff[] = { #ifdef CONFIG_ASCEND_SHARE_POOL ONE("sp_group", 0444, proc_sp_group_state), #endif + REG("enable_swap", S_IRUGO|S_IWUSR, proc_enable_swap_operations), #ifdef CONFIG_KSM ONE("ksm_merging_pages", S_IRUSR, proc_pid_ksm_merging_pages), ONE("ksm_stat", S_IRUSR, proc_pid_ksm_stat), diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h index 6a4d85c7a5f3..8d244424186f 100644 --- a/include/linux/sched/coredump.h +++ b/include/linux/sched/coredump.h @@ -73,6 +73,7 @@ static inline int get_dumpable(struct mm_struct *mm) #define MMF_OOM_VICTIM 25 /* mm is the oom victim */ #define MMF_OOM_REAP_QUEUED 26 /* mm was queued for oom_reaper */ #define MMF_MULTIPROCESS 27 /* mm is shared between processes */ +#define MMF_DISABLE_SWAP 28 /* disable swap for pages in all VMAs */ #define MMF_DISABLE_THP_MASK (1 << MMF_DISABLE_THP)
#define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\ diff --git a/mm/swap_state.c b/mm/swap_state.c index 69d71c4be7b8..75cc654bd4f1 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -971,6 +971,7 @@ static int __init swap_init_sysfs(void) pr_err("failed to register swap group\n"); goto delete_obj; } + return 0;
delete_obj: diff --git a/mm/vmscan.c b/mm/vmscan.c index d062f0aa06ee..1077d6b516e5 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1268,6 +1268,41 @@ static void page_check_dirty_writeback(struct page *page, mapping->a_ops->is_dirty_writeback(page, dirty, writeback); }
+/* functions provided for vmscan */ +static bool is_vma_noevict(struct page *page, struct vm_area_struct *vma, + unsigned long addr, void *arg) +{ + if (test_bit(MMF_DISABLE_SWAP, &vma->vm_mm->flags)) { + *(bool *)arg = true; + return false; + } + + return true; +} + +static inline bool is_page_noevict(struct page *page) +{ + bool noevict = false; + struct rmap_walk_control rwc = { + .rmap_one = is_vma_noevict, + .arg = (void *)&noevict, + .anon_lock = page_lock_anon_vma_read, + }; + + rmap_walk(page, &rwc); + + return noevict; +} + +static bool mm_noevict_page(struct page *page) +{ + if (unlikely(PageKsm(page))) + return false; + + return is_page_noevict(page); +} + + /* * shrink_page_list() returns the number of reclaimed pages */ @@ -1310,6 +1345,9 @@ static unsigned int shrink_page_list(struct list_head *page_list, if (unlikely(!page_evictable(page))) goto activate_locked;
+ if (mm_noevict_page(page)) + goto activate_locked; + if (!sc->may_unmap && page_mapped(page)) goto keep_locked;
From: liubo liubo254@huaewi.com
euleros inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I7U78A CVE: NA
------------------------------------------------
only shared page mapped by multi process cannot specify the swap area
Signed-off-by: liubo liubo254@huaewi.com --- mm/swap_slots.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-)
diff --git a/mm/swap_slots.c b/mm/swap_slots.c index 79a3164bf393..91a5057cb097 100644 --- a/mm/swap_slots.c +++ b/mm/swap_slots.c @@ -321,10 +321,6 @@ static bool has_preferred_swap(struct page *page, struct vm_area_struct *vma, *si = ERR_PTR(-EPERM); return false; } - if (vma->vm_file) { - *si = ERR_PTR(-EPERM); - return false; - } if (vma->vm_mm->preferred_swap) { *si = vma->vm_mm->preferred_swap; } else { @@ -358,7 +354,7 @@ swp_entry_t get_swap_page(struct page *page) #ifdef CONFIG_PREFERRED_SWAP struct swap_info_struct *preferred_swap = NULL;
- if (!page_mapping(page)) + if (page_mapcount(page) <= 1) preferred_swap = page_preferred_swap(page); #endif
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/2912 邮件列表地址:https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/X...
FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/2912 Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/X...