From: Ni Cunshu nicunshu@huawei.com
euleros inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I7U78A CVE: NA
------------------------------------------------
add a new attribute preferred_swap for each process, which indicates the swapfile or swap device that can be used by the process. Each process can only have one preferred_swap.
Signed-off-by: Ni Cunshu nicunshu@huawei.com --- fs/proc/base.c | 113 +++++++++++++++++++++++++++++++++++++++ include/linux/mm_types.h | 4 ++ include/linux/swap.h | 13 +++++ kernel/fork.c | 4 ++ mm/Kconfig | 7 +++ mm/swap_slots.c | 68 ++++++++++++++++++++++- mm/swapfile.c | 86 ++++++++++++++++++++++++++++- 7 files changed, 292 insertions(+), 3 deletions(-)
diff --git a/fs/proc/base.c b/fs/proc/base.c index 24c70ff923b8..34537572db57 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -3330,6 +3330,116 @@ static const struct file_operations proc_preferred_cpuset_operations = { }; #endif
+#ifdef CONFIG_PREFERRED_SWAP +static ssize_t proc_preferred_swap_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct task_struct *task = get_proc_task(file_inode(file)); + struct mm_struct *mm; + int ret; + int err = 0; + size_t len; + char *buffer; + char *path; + + if (!task) + return -ESRCH; + + ret = 0; + mm = get_task_mm(task); + if (!mm) + goto out_no_mm; + + if (mm->preferred_swap) { + buffer = (char *)kmalloc(PATH_MAX + 1, GFP_KERNEL); + if (buffer == NULL) { + err = -ENOMEM; + goto out; + } + + path = d_path(&mm->preferred_swap->swap_file->f_path, + buffer, PATH_MAX); + + if (IS_ERR(path)) { + err = PTR_ERR(path); + kfree(buffer); + goto out; + } + + len = snprintf(buffer, strlen(path) + 2, "%s\n", path); + ret = simple_read_from_buffer(buf, count, ppos, buffer, len); + kfree(buffer); + } +out: + mmput(mm); +out_no_mm: + put_task_struct(task); + if (err < 0) + return err; + return ret; +} + +static ssize_t proc_preferred_swap_write(struct file *file, + const char __user *buf, size_t count, loff_t *ppos) +{ + struct task_struct *task; + struct mm_struct *mm; + char *buffer; + int err = 0; + + if (count >= PATH_MAX) + return -EINVAL; + + buffer = (char *)kmalloc(PATH_MAX, GFP_KERNEL); + if (buffer == NULL) { + err = -ENOMEM; + goto out_no_buffer; + } + if (copy_from_user(buffer, buf, count)) { + err = -EFAULT; + goto out_no_task; + } + buffer[count] = '\0'; + strreplace(buffer, '\n', '\0'); + task = get_proc_task(file_inode(file)); + if (!task) { + err = -ESRCH; + goto out_no_task; + } + mm = get_task_mm(task); + if (!mm) { + err = -ESRCH; + goto out_no_mm; + } + if (strlen(buffer) == 0) { + mm->preferred_swap = NULL; + goto out; + } + err = find_swap_info(buffer, mm); + if (err) { + pr_info("%s failed to be found as swap\n", buffer); + goto out; + } + + preferred_swap_used = 1; +out: + mmput(mm); +out_no_mm: + put_task_struct(task); +out_no_task: + kfree(buffer); +out_no_buffer: + if (err < 0) + return err; + return count; +} +static const struct file_operations proc_preferred_swap_operations = { + .write = proc_preferred_swap_write, + .read = proc_preferred_swap_read, + .llseek = generic_file_llseek, +}; +#endif /* CONFIG_PREFERRED_SWAP */ + static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { @@ -3526,6 +3636,9 @@ static const struct pid_entry tgid_base_stuff[] = { ONE("ksm_merging_pages", S_IRUSR, proc_pid_ksm_merging_pages), ONE("ksm_stat", S_IRUSR, proc_pid_ksm_stat), #endif +#ifdef CONFIG_PREFERRED_SWAP + REG("preferred_swap", S_IRUGO|S_IWUSR, proc_preferred_swap_operations), +#endif };
static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index e3eaf458787a..2edd816fba7f 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -638,7 +638,11 @@ struct mm_struct { KABI_RESERVE(2) KABI_RESERVE(3) #endif +#ifdef CONFIG_PREFERRED_SWAP + KABI_USE(4, struct swap_info_struct *preferred_swap) +#else KABI_RESERVE(4) +#endif KABI_RESERVE(5) KABI_RESERVE(6) KABI_RESERVE(7) diff --git a/include/linux/swap.h b/include/linux/swap.h index 7f49964f27d2..998f10e97601 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -389,6 +389,10 @@ extern int vm_swappiness; extern int remove_mapping(struct address_space *mapping, struct page *page);
extern unsigned long reclaim_pages(struct list_head *page_list); +#ifdef CONFIG_PREFERRED_SWAP +extern int find_swap_info(char *filename, struct mm_struct *mm); +extern void clear_tasks_mm_preferred_swap(struct swap_info_struct *p); +#endif #ifdef CONFIG_ETMEM enum etmem_swapcache_watermark_en { ETMEM_SWAPCACHE_WMARK_LOW, @@ -496,6 +500,10 @@ extern long total_swap_pages; extern atomic_t nr_rotate_swap; extern bool has_usable_swap(void);
+#ifdef CONFIG_PREFERRED_SWAP +extern int preferred_swap_used; +#endif + /* Swap 50% full? Release swapcache more aggressively.. */ static inline bool vm_swap_full(void) { @@ -511,7 +519,12 @@ extern void si_swapinfo(struct sysinfo *); extern swp_entry_t get_swap_page(struct page *page); extern void put_swap_page(struct page *page, swp_entry_t entry); extern swp_entry_t get_swap_page_of_type(int); +#ifdef CONFIG_PREFERRED_SWAP +extern int get_swap_pages(int n, swp_entry_t swp_entries[], int entry_size, + struct swap_info_struct *preferred_swap); +#else extern int get_swap_pages(int n, swp_entry_t swp_entries[], int entry_size); +#endif extern int add_swap_count_continuation(swp_entry_t, gfp_t); extern void swap_shmem_alloc(swp_entry_t); extern int swap_duplicate(swp_entry_t); diff --git a/kernel/fork.c b/kernel/fork.c index 2547c6a6e5b1..734882ed98c6 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1072,6 +1072,10 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, mm_init_uprobes_state(mm); hugetlb_count_init(mm);
+#ifdef CONFIG_PREFERRED_SWAP + mm->preferred_swap = NULL; +#endif + if (current->mm) { mm->flags = current->mm->flags & MMF_INIT_MASK; mm->def_flags = current->mm->def_flags & VM_INIT_DEF_MASK; diff --git a/mm/Kconfig b/mm/Kconfig index f66457168de9..48c37c3bda8c 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -999,6 +999,13 @@ config EXTEND_HUGEPAGE_MAPPING help Introduce vmalloc/vmap/remap interfaces that handle only hugepages.
+config PREFERRED_SWAP + bool "make task only swapping memory to assigned swap" + depends on SYSFS + default y + help + Say Y here to enable kernel to limit tasks only swapping memory to assigned swap. + source "mm/damon/Kconfig"
endmenu diff --git a/mm/swap_slots.c b/mm/swap_slots.c index 0357fbe70645..79a3164bf393 100644 --- a/mm/swap_slots.c +++ b/mm/swap_slots.c @@ -33,6 +33,9 @@ #include <linux/vmalloc.h> #include <linux/mutex.h> #include <linux/mm.h> +#ifdef CONFIG_PREFERRED_SWAP +#include <linux/rmap.h> +#endif
static DEFINE_PER_CPU(struct swap_slots_cache, swp_slots); static bool swap_slot_cache_active; @@ -265,8 +268,13 @@ static int refill_swap_slots_cache(struct swap_slots_cache *cache)
cache->cur = 0; if (swap_slot_cache_active) +#ifdef CONFIG_PREFERRED_SWAP + cache->nr = get_swap_pages(SWAP_SLOTS_CACHE_SIZE, + cache->slots, 1, NULL); +#else cache->nr = get_swap_pages(SWAP_SLOTS_CACHE_SIZE, cache->slots, 1); +#endif
return cache->nr; } @@ -303,16 +311,66 @@ int free_swap_slot(swp_entry_t entry) return 0; }
+#ifdef CONFIG_PREFERRED_SWAP +static bool has_preferred_swap(struct page *page, struct vm_area_struct *vma, + unsigned long addr, void *arg) +{ + struct swap_info_struct **si = (struct swap_info_struct **)arg; + + if (*si) { + *si = ERR_PTR(-EPERM); + return false; + } + if (vma->vm_file) { + *si = ERR_PTR(-EPERM); + return false; + } + if (vma->vm_mm->preferred_swap) { + *si = vma->vm_mm->preferred_swap; + } else { + *si = ERR_PTR(-EPERM); + return false; + } + return true; +} + +static struct swap_info_struct *page_preferred_swap(struct page *page) +{ + struct swap_info_struct *preferred_swap = NULL; + struct rmap_walk_control rwc = { + .rmap_one = has_preferred_swap, + .arg = (void *)&preferred_swap, + .anon_lock = page_lock_anon_vma_read, + }; + rmap_walk(page, &rwc); + if (IS_ERR_OR_NULL(preferred_swap)) + preferred_swap = NULL; + + return preferred_swap; +} +#endif /* CONFIG_PREFERRED_SWAP */ + swp_entry_t get_swap_page(struct page *page) { swp_entry_t entry; struct swap_slots_cache *cache;
+#ifdef CONFIG_PREFERRED_SWAP + struct swap_info_struct *preferred_swap = NULL; + + if (!page_mapping(page)) + preferred_swap = page_preferred_swap(page); +#endif + entry.val = 0;
if (PageTransHuge(page)) { if (IS_ENABLED(CONFIG_THP_SWAP)) +#ifdef CONFIG_PREFERRED_SWAP + get_swap_pages(1, &entry, HPAGE_PMD_NR, preferred_swap); +#else get_swap_pages(1, &entry, HPAGE_PMD_NR); +#endif goto out; }
@@ -327,7 +385,11 @@ swp_entry_t get_swap_page(struct page *page) */ cache = raw_cpu_ptr(&swp_slots);
- if (likely(check_cache_active() && cache->slots)) { +#ifdef CONFIG_PREFERRED_SWAP + if (likely(check_cache_active() && cache->slots && !preferred_swap)) { +#else + if (likely(check_cache_active() && cache->slots &&)) { +#endif mutex_lock(&cache->alloc_lock); if (cache->slots) { repeat: @@ -344,7 +406,11 @@ swp_entry_t get_swap_page(struct page *page) goto out; }
+#ifdef CONFIG_PREFERRED_SWAP + get_swap_pages(1, &entry, 1, preferred_swap); +#else get_swap_pages(1, &entry, 1); +#endif out: if (mem_cgroup_try_charge_swap(page, entry)) { put_swap_page(page, entry); diff --git a/mm/swapfile.c b/mm/swapfile.c index 14e2396fa8a3..835f8a8913b6 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -62,7 +62,10 @@ EXPORT_SYMBOL_GPL(nr_swap_pages); /* protected with swap_lock. reading in vm_swap_full() doesn't need lock */ long total_swap_pages; static int least_priority = -1; - +#ifdef CONFIG_PREFERRED_SWAP +DEFINE_SPINLOCK(preferred_swap_lock); +int preferred_swap_used; +#endif static const char Bad_file[] = "Bad swap file entry "; static const char Unused_file[] = "Unused swap file entry "; static const char Bad_offset[] = "Bad swap offset entry "; @@ -1056,7 +1059,13 @@ static unsigned long scan_swap_map(struct swap_info_struct *si,
}
-int get_swap_pages(int n_goal, swp_entry_t swp_entries[], int entry_size) +#ifdef CONFIG_PREFERRED_SWAP +int get_swap_pages(int n_goal, swp_entry_t swp_entries[], + int entry_size, struct swap_info_struct *preferred_swap) +#else +int get_swap_pages(int n_goal, swp_entry_t swp_entries[], + int entry_size) +#endif { unsigned long size = swap_entry_size(entry_size); struct swap_info_struct *si, *next; @@ -1082,6 +1091,10 @@ int get_swap_pages(int n_goal, swp_entry_t swp_entries[], int entry_size) start_over: node = numa_node_id(); plist_for_each_entry_safe(si, next, &swap_avail_heads[node], avail_lists[node]) { +#ifdef CONFIG_PREFERRED_SWAP + if (preferred_swap && preferred_swap != si) + goto nextsi; +#endif /* requeue si to after same-priority siblings */ plist_requeue(&si->avail_lists[node], &swap_avail_heads[node]); spin_unlock(&swap_avail_lock); @@ -2643,6 +2656,10 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) p->flags &= ~SWP_WRITEOK; spin_unlock(&p->lock); spin_unlock(&swap_lock); +#ifdef CONFIG_PREFERRED_SWAP + if (preferred_swap_used) + clear_tasks_mm_preferred_swap(p); +#endif
disable_swap_slots_cache_lock();
@@ -3878,6 +3895,71 @@ void cgroup_throttle_swaprate(struct page *page, gfp_t gfp_mask) } #endif
+#ifdef CONFIG_PREFERRED_SWAP +int find_swap_info(char *filename, struct mm_struct *mm) +{ + struct file *chosen_file; + unsigned int type; + struct path path; + int err = 0; + struct swap_info_struct *found_swap = NULL; + + err = kern_path(filename, LOOKUP_FOLLOW, &path); + if (err) + goto out_no_path; + chosen_file = dentry_open(&path, O_RDWR|O_LARGEFILE, current_cred()); + if (IS_ERR(chosen_file)) { + err = PTR_ERR(chosen_file); + goto out_no_file; + } + spin_lock(&preferred_swap_lock); + spin_lock(&swap_lock); + for (type = 0; type < nr_swapfiles; type++) { + if ((swap_info[type]->flags & SWP_USED) && + (swap_info[type]->flags & SWP_WRITEOK)) { + if (swap_info[type]->swap_file->f_mapping == + chosen_file->f_mapping) { + found_swap = swap_info[type]; + break; + } + } + } + + spin_unlock(&swap_lock); + if (type >= nr_swapfiles) { + err = -EINVAL; + goto out; + } + + mm->preferred_swap = found_swap; +out: + spin_unlock(&preferred_swap_lock); + filp_close(chosen_file, NULL); +out_no_file: + path_put(&path); +out_no_path: + return err; +} + +void clear_tasks_mm_preferred_swap(struct swap_info_struct *p) +{ + struct task_struct *task; + struct mm_struct *mm; + + spin_lock(&preferred_swap_lock); + read_lock(&tasklist_lock); + for_each_process(task) { + mm = get_task_mm(task); + if (mm) { + if (mm->preferred_swap == p) + mm->preferred_swap = NULL; + mmput(mm); + } + } + read_unlock(&tasklist_lock); + spin_unlock(&preferred_swap_lock); +} +#endif static int __init swapfile_init(void) { int nid;