From: Liu Yuntao liuyuntao10@huawei.com
euleros inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I7U78A CVE: NA
------------------------------------------------
Configuring which process cannot swap out memory.
Signed-off-by: Liu Yuntao liuyuntao10@huawei.com --- fs/proc/base.c | 75 ++++++++++++++++++++++++++++++++++ include/linux/sched/coredump.h | 1 + mm/swap_state.c | 1 + mm/vmscan.c | 38 +++++++++++++++++ 4 files changed, 115 insertions(+)
diff --git a/fs/proc/base.c b/fs/proc/base.c index 34537572db57..943e6d4fdcc2 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -3440,6 +3440,80 @@ static const struct file_operations proc_preferred_swap_operations = { }; #endif /* CONFIG_PREFERRED_SWAP */
+static ssize_t proc_enable_swap_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct task_struct *task = get_proc_task(file_inode(file)); + struct mm_struct *mm; + int ret, enable_swap; + size_t len; + char buffer[PROC_NUMBUF]; + if (!task) + return -ESRCH; + + ret = 0; + mm = get_task_mm(task); + if (mm) { + enable_swap = test_bit(MMF_DISABLE_SWAP, &mm->flags) ? 0 : 1; + len = snprintf(buffer, sizeof(buffer), "%d\n", enable_swap); + mmput(mm); + ret = simple_read_from_buffer(buf, count, ppos, buffer, len); + } + + put_task_struct(task); + + return ret; +} + +static ssize_t proc_enable_swap_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct task_struct *task; + struct mm_struct *mm; + bool enable_swap; + unsigned val; + int ret; + + ret = kstrtouint_from_user(buf, count, 0, &val); + if (ret < 0) + return ret; + if (val == 0) + enable_swap = false; + else if (val == 1) + enable_swap = true; + else + return -EINVAL; + + ret = -ESRCH; + task = get_proc_task(file_inode(file)); + if (!task) + goto out_no_task; + + mm = get_task_mm(task); + if (!mm) + goto out_no_mm; + ret = 0; + + if (enable_swap) + clear_bit(MMF_DISABLE_SWAP, &mm->flags); + else + set_bit(MMF_DISABLE_SWAP, &mm->flags); + + mmput(mm); +out_no_mm: + put_task_struct(task); +out_no_task: + if (ret < 0) + return ret; + return count; +} + +static const struct file_operations proc_enable_swap_operations = { + .write = proc_enable_swap_write, + .read = proc_enable_swap_read, + .llseek = generic_file_llseek, +}; + static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { @@ -3632,6 +3706,7 @@ static const struct pid_entry tgid_base_stuff[] = { #ifdef CONFIG_ASCEND_SHARE_POOL ONE("sp_group", 0444, proc_sp_group_state), #endif + REG("enable_swap", S_IRUGO|S_IWUSR, proc_enable_swap_operations), #ifdef CONFIG_KSM ONE("ksm_merging_pages", S_IRUSR, proc_pid_ksm_merging_pages), ONE("ksm_stat", S_IRUSR, proc_pid_ksm_stat), diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h index 6a4d85c7a5f3..8d244424186f 100644 --- a/include/linux/sched/coredump.h +++ b/include/linux/sched/coredump.h @@ -73,6 +73,7 @@ static inline int get_dumpable(struct mm_struct *mm) #define MMF_OOM_VICTIM 25 /* mm is the oom victim */ #define MMF_OOM_REAP_QUEUED 26 /* mm was queued for oom_reaper */ #define MMF_MULTIPROCESS 27 /* mm is shared between processes */ +#define MMF_DISABLE_SWAP 28 /* disable swap for pages in all VMAs */ #define MMF_DISABLE_THP_MASK (1 << MMF_DISABLE_THP)
#define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\ diff --git a/mm/swap_state.c b/mm/swap_state.c index 69d71c4be7b8..75cc654bd4f1 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -971,6 +971,7 @@ static int __init swap_init_sysfs(void) pr_err("failed to register swap group\n"); goto delete_obj; } + return 0;
delete_obj: diff --git a/mm/vmscan.c b/mm/vmscan.c index d062f0aa06ee..1077d6b516e5 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1268,6 +1268,41 @@ static void page_check_dirty_writeback(struct page *page, mapping->a_ops->is_dirty_writeback(page, dirty, writeback); }
+/* functions provided for vmscan */ +static bool is_vma_noevict(struct page *page, struct vm_area_struct *vma, + unsigned long addr, void *arg) +{ + if (test_bit(MMF_DISABLE_SWAP, &vma->vm_mm->flags)) { + *(bool *)arg = true; + return false; + } + + return true; +} + +static inline bool is_page_noevict(struct page *page) +{ + bool noevict = false; + struct rmap_walk_control rwc = { + .rmap_one = is_vma_noevict, + .arg = (void *)&noevict, + .anon_lock = page_lock_anon_vma_read, + }; + + rmap_walk(page, &rwc); + + return noevict; +} + +static bool mm_noevict_page(struct page *page) +{ + if (unlikely(PageKsm(page))) + return false; + + return is_page_noevict(page); +} + + /* * shrink_page_list() returns the number of reclaimed pages */ @@ -1310,6 +1345,9 @@ static unsigned int shrink_page_list(struct list_head *page_list, if (unlikely(!page_evictable(page))) goto activate_locked;
+ if (mm_noevict_page(page)) + goto activate_locked; + if (!sc->may_unmap && page_mapped(page)) goto keep_locked;