From: Ma Wupeng mawupeng1@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8USBA CVE: NA
------------------------------------------
Adding reliable flag for user task. User task with reliable flag can only alloc memory from mirrored region. PF_RELIABLE is added to represent the task's reliable flag.
- For init task, which is regarded as special task which alloc memory from mirrored region.
- For normal user tasks, The reliable flag can be set via procfs interface shown as below and can be inherited via fork().
User can change a user task's reliable flag by
$ echo [0/1] > /proc/<pid>/reliable
and check a user task's reliable flag by
$ cat /proc/<pid>/reliable
Note, global init task's reliable file can not be accessed.
Signed-off-by: Peng Wu wupeng58@huawei.com Signed-off-by: Ma Wupeng mawupeng1@huawei.com --- Documentation/filesystems/proc.rst | 6 ++ fs/proc/Makefile | 1 + fs/proc/base.c | 9 +++ fs/proc/mem_reliable.c | 100 +++++++++++++++++++++++++++++ include/linux/mem_reliable.h | 3 + include/linux/sched.h | 5 ++ mm/page_alloc.c | 30 +++++++++ 7 files changed, 154 insertions(+) create mode 100644 fs/proc/mem_reliable.c
diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst index 65e0556064d6..d7c614e3be67 100644 --- a/Documentation/filesystems/proc.rst +++ b/Documentation/filesystems/proc.rst @@ -163,6 +163,8 @@ usually fail with ESRCH. can be derived from smaps, but is faster and more convenient numa_maps An extension based on maps, showing the memory locality and binding policy as well as mem usage (in pages) of each mapping. + reliable Present with CONFIG_MEMORY_RELIABLE=y. Task reliable status + information ============= ===============================================================
For example, to get the status information of a process, all you have to do is @@ -674,6 +676,10 @@ Where: node locality page counters (N0 == node0, N1 == node1, ...) and the kernel page size, in KB, that is backing the mapping up.
+The /proc/pid/reliable is used to control user task's reliable status. +Task with this flag can only alloc memory from mirrored region. Global +init task's reliable flag can not be accessed. + 1.2 Kernel data ---------------
diff --git a/fs/proc/Makefile b/fs/proc/Makefile index bd08616ed8ba..70dca85a5861 100644 --- a/fs/proc/Makefile +++ b/fs/proc/Makefile @@ -34,3 +34,4 @@ proc-$(CONFIG_PROC_VMCORE) += vmcore.o proc-$(CONFIG_PRINTK) += kmsg.o proc-$(CONFIG_PROC_PAGE_MONITOR) += page.o proc-$(CONFIG_BOOT_CONFIG) += bootconfig.o +proc-$(CONFIG_MEMORY_RELIABLE) += mem_reliable.o diff --git a/fs/proc/base.c b/fs/proc/base.c index 96313167702d..e04b0126334f 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -2659,6 +2659,9 @@ static struct dentry *proc_pident_instantiate(struct dentry *dentry,
static bool proc_hide_pidents(const struct pid_entry *p) { + if (mem_reliable_hide_file(p->name)) + return true; + return false; }
@@ -3390,6 +3393,9 @@ static const struct pid_entry tgid_base_stuff[] = { ONE("oom_score", S_IRUGO, proc_oom_score), REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations), REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), +#ifdef CONFIG_MEMORY_RELIABLE + REG("reliable", S_IRUGO|S_IWUSR, proc_reliable_operations), +#endif #ifdef CONFIG_AUDIT REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), REG("sessionid", S_IRUGO, proc_sessionid_operations), @@ -3739,6 +3745,9 @@ static const struct pid_entry tid_base_stuff[] = { ONE("oom_score", S_IRUGO, proc_oom_score), REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations), REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), +#ifdef CONFIG_MEMORY_RELIABLE + REG("reliable", S_IRUGO|S_IWUSR, proc_reliable_operations), +#endif #ifdef CONFIG_AUDIT REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), REG("sessionid", S_IRUGO, proc_sessionid_operations), diff --git a/fs/proc/mem_reliable.c b/fs/proc/mem_reliable.c new file mode 100644 index 000000000000..52ac9ae79847 --- /dev/null +++ b/fs/proc/mem_reliable.c @@ -0,0 +1,100 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <linux/mem_reliable.h> + +#include "internal.h" + +static inline int reliable_check(struct task_struct *task, struct pid *pid) +{ + if (!mem_reliable_is_enabled()) + return -EACCES; + + if (is_global_init(task)) + return -EINVAL; + + if (!task->mm || (task->flags & PF_KTHREAD) || + (task->flags & PF_EXITING)) + return -EINVAL; + + return 0; +} + +static ssize_t reliable_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct task_struct *task = get_proc_task(file_inode(file)); + struct pid *pid = proc_pid(file_inode(file)); + char buffer[PROC_NUMBUF]; + size_t len; + short val; + int err; + + if (!task) + return -ESRCH; + + err = reliable_check(task, pid); + if (err) { + put_task_struct(task); + return err; + } + + val = task->flags & PF_RELIABLE ? 1 : 0; + put_task_struct(task); + len = snprintf(buffer, sizeof(buffer), "%hd\n", val); + return simple_read_from_buffer(buf, count, ppos, buffer, len); +} + +static ssize_t reliable_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct task_struct *task = get_proc_task(file_inode(file)); + struct pid *pid = proc_pid(file_inode(file)); + char buffer[PROC_NUMBUF]; + int val; + int err; + + if (!task) + return -ESRCH; + + err = reliable_check(task, pid); + if (err) + goto out; + + memset(buffer, 0, sizeof(buffer)); + if (count > sizeof(buffer) - 1) + count = sizeof(buffer) - 1; + if (copy_from_user(buffer, buf, count)) { + err = -EFAULT; + goto out; + } + + err = kstrtoint(strstrip(buffer), 0, &val); + if (err) + goto out; + if (val != 0 && val != 1) { + err = -EINVAL; + goto out; + } + + if (val == 1) + task->flags |= PF_RELIABLE; + else + task->flags &= ~PF_RELIABLE; + +out: + put_task_struct(task); + return err < 0 ? err : count; +} + +struct file_operations proc_reliable_operations = { + .read = reliable_read, + .write = reliable_write, + .llseek = generic_file_llseek, +}; + +bool mem_reliable_hide_file(const char *name) +{ + if (!mem_reliable_is_enabled() && !strncmp("reliable", name, 8)) + return true; + + return false; +} diff --git a/include/linux/mem_reliable.h b/include/linux/mem_reliable.h index 5df1c157a2bd..019e17130c67 100644 --- a/include/linux/mem_reliable.h +++ b/include/linux/mem_reliable.h @@ -13,9 +13,11 @@ DECLARE_STATIC_KEY_FALSE(mem_reliable);
extern bool reliable_enabled; +extern struct file_operations proc_reliable_operations;
void mem_reliable_init(bool has_unmirrored_mem, unsigned long mirrored_sz); bool mem_reliable_status(void); +bool mem_reliable_hide_file(const char *name);
static inline bool mem_reliable_is_enabled(void) { @@ -74,6 +76,7 @@ static inline bool skip_non_mirrored_zone(gfp_t gfp, struct zoneref *z) return false; } static inline bool mem_reliable_status(void) { return false; } +static inline bool mem_reliable_hide_file(const char *name) { return false; } #endif
#endif diff --git a/include/linux/sched.h b/include/linux/sched.h index 9fdd08aa9626..97ad76e86ed6 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1756,6 +1756,11 @@ extern struct pid *cad_pid; #define PF_USER_WORKER 0x00004000 /* Kernel thread cloned from userspace thread */ #define PF_NOFREEZE 0x00008000 /* This thread should not be frozen */ #define PF__HOLE__00010000 0x00010000 +#ifdef CONFIG_MEMORY_RELIABLE +#define PF_RELIABLE PF__HOLE__00010000 /* Allocate from reliable memory */ +#else +#define PF_RELIABLE 0x00000000 +#endif #define PF_KSWAPD 0x00020000 /* I am kswapd */ #define PF_MEMALLOC_NOFS 0x00040000 /* All allocation requests will inherit GFP_NOFS */ #define PF_MEMALLOC_NOIO 0x00080000 /* All allocation requests will inherit GFP_NOIO */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index efde73e24b3b..9d158a1113f5 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4530,6 +4530,33 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid, } EXPORT_SYMBOL_GPL(__alloc_pages_bulk);
+static inline void prepare_before_alloc(gfp_t *gfp_mask) +{ + bool zone_movable; + + if (!mem_reliable_is_enabled()) + return; + + /* + * memory reliable only handle memory allocation from movable zone + * (force alloc from non-movable zone or force alloc from movable + * zone) to get total isolation. + */ + zone_movable = gfp_zone(*gfp_mask & ~GFP_RELIABLE) == ZONE_MOVABLE; + if (!zone_movable) + goto clear_flag; + + if (!in_task()) + return; + + if ((current->flags & PF_RELIABLE) || is_global_init(current)) + *gfp_mask |= GFP_RELIABLE; + + return; +clear_flag: + *gfp_mask &= ~GFP_RELIABLE; +} + /* * This is the 'heart' of the zoned buddy allocator. */ @@ -4549,6 +4576,9 @@ struct page *__alloc_pages(gfp_t gfp, unsigned int order, int preferred_nid, return NULL;
gfp &= gfp_allowed_mask; + + prepare_before_alloc(&gfp); + /* * Apply scoped allocation constraints. This is mainly about GFP_NOFS * resp. GFP_NOIO which has to be inherited for all allocation requests