From: Peng Wu wupeng58@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4PM01 CVE: NA
------------------------------------------
Adding reliable flag for user task. User task with reliable flag can only alloc memory from mirrored region. PF_RELIABLE is added to represent the task's reliable flag.
- For init task, which is regarded as special task which alloc memory from mirrored region.
- For normal user tasks, The reliable flag can be set via procfs interface shown as below and can be inherited via fork().
User can change a user task's reliable flag by
$ echo [0/1] > /proc/<pid>/reliable
and check a user task's reliable flag by
$ cat /proc/<pid>/reliable
Note, global init task's reliable file can not be accessed.
Signed-off-by: Peng Wu wupeng58@huawei.com Signed-off-by: Ma Wupeng mawupeng1@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- Documentation/filesystems/proc.rst | 6 ++ fs/proc/base.c | 96 ++++++++++++++++++++++++++++++ include/linux/sched.h | 1 + mm/page_alloc.c | 12 ++++ 4 files changed, 115 insertions(+)
diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst index e1562bef4887..f6783bb99e3f 100644 --- a/Documentation/filesystems/proc.rst +++ b/Documentation/filesystems/proc.rst @@ -162,6 +162,8 @@ usually fail with ESRCH. can be derived from smaps, but is faster and more convenient numa_maps An extension based on maps, showing the memory locality and binding policy as well as mem usage (in pages) of each mapping. + reliable Present with CONFIG_MEMORY_RELIABLE=y. Process reliable status + information ============= ===============================================================
For example, to get the status information of a process, all you have to do is @@ -649,6 +651,10 @@ Where: node locality page counters (N0 == node0, N1 == node1, ...) and the kernel page size, in KB, that is backing the mapping up.
+The /proc/pid/reliable is used to control user process's reliable status. +Process with this flag can only alloc memory from mirrored region. Global +init task's reliable flag can not be accessed. + 1.2 Kernel data ---------------
diff --git a/fs/proc/base.c b/fs/proc/base.c index 7edbfd2ef757..2ba1313aa444 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1260,6 +1260,96 @@ static const struct file_operations proc_oom_score_adj_operations = { .llseek = default_llseek, };
+#ifdef CONFIG_MEMORY_RELIABLE +static inline int reliable_check(struct task_struct *task, struct pid *pid) +{ + if (!mem_reliable_is_enabled()) + return -EPERM; + + if (is_global_init(task)) + return -EPERM; + + if (!task->mm || (task->flags & PF_KTHREAD) || + (task->flags & PF_EXITING)) + return -EPERM; + + return 0; +} + +static ssize_t reliable_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct task_struct *task = get_proc_task(file_inode(file)); + struct pid *pid = proc_pid(file_inode(file)); + char buffer[PROC_NUMBUF]; + size_t len; + short val; + int err; + + if (!task) + return -ESRCH; + + err = reliable_check(task, pid); + if (err) { + put_task_struct(task); + return err; + } + + val = task->flags & PF_RELIABLE ? 1 : 0; + put_task_struct(task); + len = snprintf(buffer, sizeof(buffer), "%hd\n", val); + return simple_read_from_buffer(buf, count, ppos, buffer, len); +} + +static ssize_t reliable_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct task_struct *task = get_proc_task(file_inode(file)); + struct pid *pid = proc_pid(file_inode(file)); + char buffer[PROC_NUMBUF]; + int val; + int err; + + if (!task) + return -ESRCH; + + err = reliable_check(task, pid); + if (err) + goto out; + + memset(buffer, 0, sizeof(buffer)); + if (count > sizeof(buffer) - 1) + count = sizeof(buffer) - 1; + if (copy_from_user(buffer, buf, count)) { + err = -EFAULT; + goto out; + } + + err = kstrtoint(strstrip(buffer), 0, &val); + if (err) + goto out; + if (val != 0 && val != 1) { + err = -EINVAL; + goto out; + } + + if (val == 1) + task->flags |= PF_RELIABLE; + else + task->flags &= ~PF_RELIABLE; + +out: + put_task_struct(task); + return err < 0 ? err : count; +} + +static const struct file_operations proc_reliable_operations = { + .read = reliable_read, + .write = reliable_write, + .llseek = generic_file_llseek, +}; +#endif + #ifdef CONFIG_AUDIT #define TMPBUFLEN 11 static ssize_t proc_loginuid_read(struct file * file, char __user * buf, @@ -3262,6 +3352,9 @@ static const struct pid_entry tgid_base_stuff[] = { ONE("oom_score", S_IRUGO, proc_oom_score), REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations), REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), +#ifdef CONFIG_MEMORY_RELIABLE + REG("reliable", S_IRUGO|S_IWUSR, proc_reliable_operations), +#endif #ifdef CONFIG_AUDIT REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), REG("sessionid", S_IRUGO, proc_sessionid_operations), @@ -3609,6 +3702,9 @@ static const struct pid_entry tid_base_stuff[] = { ONE("oom_score", S_IRUGO, proc_oom_score), REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations), REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), +#ifdef CONFIG_MEMORY_RELIABLE + REG("reliable", S_IRUGO|S_IWUSR, proc_reliable_operations), +#endif #ifdef CONFIG_AUDIT REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), REG("sessionid", S_IRUGO, proc_sessionid_operations), diff --git a/include/linux/sched.h b/include/linux/sched.h index c33f7a70629a..edd236f98f0c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1581,6 +1581,7 @@ extern struct pid *cad_pid; #define PF_VCPU 0x00000001 /* I'm a virtual CPU */ #define PF_IDLE 0x00000002 /* I am an IDLE thread */ #define PF_EXITING 0x00000004 /* Getting shut down */ +#define PF_RELIABLE 0x00000008 /* Allocate from reliable memory */ #define PF_IO_WORKER 0x00000010 /* Task is an IO worker */ #define PF_WQ_WORKER 0x00000020 /* I'm a workqueue worker */ #define PF_FORKNOEXEC 0x00000040 /* Forked but didn't exec */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index bfe215fc0da1..a72df34fa210 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5140,6 +5140,15 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid, } EXPORT_SYMBOL_GPL(__alloc_pages_bulk);
+static inline void prepare_before_alloc(gfp_t *gfp_mask) +{ + if (!mem_reliable_is_enabled()) + return; + + if ((current->flags & PF_RELIABLE) || is_global_init(current)) + *gfp_mask |= GFP_RELIABLE; +} + /* * This is the 'heart' of the zoned buddy allocator. */ @@ -5161,6 +5170,9 @@ struct page *__alloc_pages(gfp_t gfp, unsigned int order, int preferred_nid, }
gfp &= gfp_allowed_mask; + + prepare_before_alloc(&gfp); + alloc_gfp = gfp; if (!prepare_alloc_pages(gfp, order, preferred_nid, nodemask, &ac, &alloc_gfp, &alloc_flags))