From: Jingxian He hejingxian@huawei.com
hulk inclusion category: feature bugzilla: 48159 CVE: N/A
------------------------------
We record the pid of dump tasks in the reserved memory, and reserve the pids before init task start. In the recover process, free the reserved pids and realloc them by setting fork_pid.
Signed-off-by: Jingxian He hejingxian@huawei.com Reviewed-by: Jing Xiangfeng jingxiangfeng@huawei.com Acked-by: Hanjun Guo guohanjun@huawei.com Reviewed-by: Xie XiuQi xiexiuqi@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/char/pin_memory.c | 31 ++++++++++++++++++++++++ include/linux/pin_mem.h | 6 +++++ include/linux/sched.h | 3 +++ init/init_task.c | 3 +++ kernel/pid.c | 29 ++++++++++++++++++++++ mm/Kconfig | 10 ++++++++ mm/pin_mem.c | 51 +++++++++++++++++++++++++++++++++++++++ 7 files changed, 133 insertions(+)
diff --git a/drivers/char/pin_memory.c b/drivers/char/pin_memory.c index 9b50ab867c5d3..05fa7cfde03b2 100644 --- a/drivers/char/pin_memory.c +++ b/drivers/char/pin_memory.c @@ -39,12 +39,20 @@ struct pin_mem_area_set { #define _REMAP_PIN_MEM_AREA 3 #define _FINISH_PIN_MEM_DUMP 4 #define _INIT_PAGEMAP_READ 5 +#ifdef CONFIG_PID_RESERVE +#define _SET_FORK_PID 6 +#define _PIN_MEM_IOC_MAX_NR 6 +#else #define _PIN_MEM_IOC_MAX_NR 5 +#endif #define SET_PIN_MEM_AREA _IOW(PIN_MEM_MAGIC, _SET_PIN_MEM_AREA, struct pin_mem_area_set) #define CLEAR_PIN_MEM_AREA _IOW(PIN_MEM_MAGIC, _CLEAR_PIN_MEM_AREA, int) #define REMAP_PIN_MEM_AREA _IOW(PIN_MEM_MAGIC, _REMAP_PIN_MEM_AREA, int) #define FINISH_PIN_MEM_DUMP _IOW(PIN_MEM_MAGIC, _FINISH_PIN_MEM_DUMP, int) #define INIT_PAGEMAP_READ _IOW(PIN_MEM_MAGIC, _INIT_PAGEMAP_READ, int) +#ifdef CONFIG_PID_RESERVE +#define SET_FORK_PID _IOW(PIN_MEM_MAGIC, _SET_FORK_PID, int) +#endif static int set_pin_mem(struct pin_mem_area_set *pmas) { int i; @@ -145,6 +153,24 @@ static int pin_mem_remap(unsigned long arg) return -EFAULT; }
+#ifdef CONFIG_PID_RESERVE +static int set_fork_pid(unsigned long arg) +{ + int pid; + struct page_map_info *pmi = NULL; + void __user *buf = (void __user *)arg; + + if (!access_ok(buf, sizeof(int))) + goto fault; + if (copy_from_user(&pid, buf, sizeof(int))) + goto fault; + current->fork_pid = pid; + return 0; +fault: + return -EFAULT; +} +#endif + static long pin_memory_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { long ret = 0; @@ -170,6 +196,11 @@ static long pin_memory_ioctl(struct file *file, unsigned int cmd, unsigned long case INIT_PAGEMAP_READ: ret = init_pagemap_read(); break; +#ifdef CONFIG_PID_RESERVE + case SET_FORK_PID: + ret = set_fork_pid(arg); + break; +#endif default: return -EINVAL; } diff --git a/include/linux/pin_mem.h b/include/linux/pin_mem.h index 61e925a455de2..21422f8b0349c 100644 --- a/include/linux/pin_mem.h +++ b/include/linux/pin_mem.h @@ -89,5 +89,11 @@ extern struct resource pin_memory_resource; #endif extern void init_reserve_page_map(unsigned long map_addr, unsigned long map_size);
+#ifdef CONFIG_PID_RESERVE +extern bool is_need_reserve_pids(void); +extern void free_reserved_pid(struct idr *idr, int pid); +extern void reserve_pids(struct idr *idr, int pid_max); +#endif + #endif /* CONFIG_PIN_MEMORY */ #endif /* _LINUX_PIN_MEMORY_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 62cb3d2ea186a..8a09c0c018ec8 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1207,6 +1207,9 @@ struct task_struct { /* Used by LSM modules for access restriction: */ void *security; #endif +#ifdef CONFIG_PID_RESERVE + int fork_pid; +#endif
/* * New fields for task_struct should be added above here, so that diff --git a/init/init_task.c b/init/init_task.c index 994ffe0181208..dd02e591490ae 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -180,6 +180,9 @@ struct task_struct init_task #ifdef CONFIG_SECURITY .security = NULL, #endif +#ifdef CONFIG_PID_RESERVE + .fork_pid = 0, +#endif }; EXPORT_SYMBOL(init_task);
diff --git a/kernel/pid.c b/kernel/pid.c index bfdcd16c7f09a..d6a8009f96739 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -41,6 +41,9 @@ #include <linux/sched/task.h> #include <linux/idr.h> #include <linux/kmemleak.h> +#ifdef CONFIG_PID_RESERVE +#include <linux/pin_mem.h> +#endif
struct pid init_struct_pid = { .count = ATOMIC_INIT(1), @@ -185,6 +188,27 @@ struct pid *alloc_pid(struct pid_namespace *ns) if (idr_get_cursor(&tmp->idr) > RESERVED_PIDS) pid_min = RESERVED_PIDS;
+#ifdef CONFIG_PID_RESERVE + if (!current->fork_pid) { + /* + * Store a null pointer so find_pid_ns does not find + * a partially initialized PID (see below). + */ + nr = idr_alloc_cyclic(&tmp->idr, NULL, pid_min, + tmp->pid_max, + GFP_ATOMIC); + } else { + /* Try to free the reserved fork_pid, and then use it to alloc pid. */ + free_reserved_pid(&tmp->idr, current->fork_pid); + pid_min = current->fork_pid; + current->fork_pid = 0; + nr = idr_alloc(&tmp->idr, NULL, pid_min, + pid_min + 1, + GFP_ATOMIC); + if (nr == -ENOSPC) + nr = -EEXIST; + } +#else /* * Store a null pointer so find_pid_ns does not find * a partially initialized PID (see below). @@ -192,6 +216,7 @@ struct pid *alloc_pid(struct pid_namespace *ns) nr = idr_alloc_cyclic(&tmp->idr, NULL, pid_min, tmp->pid_max, GFP_ATOMIC); +#endif spin_unlock_irq(&pidmap_lock); idr_preload_end();
@@ -501,6 +526,10 @@ void __init pid_idr_init(void) init_pid_ns.pid_cachep = KMEM_CACHE(pid, SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT);
+#ifdef CONFIG_PID_RESERVE + if (is_need_reserve_pids()) + reserve_pids(&init_pid_ns.idr, pid_max); +#endif hdr = register_sysctl_paths(pid_kern_path, pid_ctl_table); kmemleak_not_leak(hdr); } diff --git a/mm/Kconfig b/mm/Kconfig index f8f2db73ceb2c..8225489c6439d 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -809,4 +809,14 @@ config PIN_MEMORY and restore. We can pin the memory data of tasks and collect the corresponding physical pages mapping info in checkpoint, and remap the physical pages to restore tasks in restore. + +config PID_RESERVE + bool "Support for reserve and recover pid" + depends on PIN_MEMORY + help + Say y here to enable the pid reserved feature for checkpoint. + and restore. + We record the pid of dump task in the reserve memory, + and reserve the pids before init task start. In restore process, + free the reserved pids and realloc them for use. endmenu diff --git a/mm/pin_mem.c b/mm/pin_mem.c index 56641d6e2f4e5..c657ae7f64860 100644 --- a/mm/pin_mem.c +++ b/mm/pin_mem.c @@ -1071,4 +1071,55 @@ void clear_pin_memory_record(void) } EXPORT_SYMBOL_GPL(clear_pin_memory_record);
+#ifdef CONFIG_PID_RESERVE +struct idr *reserve_idr; + +/* test if there exist pin memory tasks */ +bool is_need_reserve_pids(void) +{ + return (pin_pid_num > 0); +} + +void free_reserved_pid(struct idr *idr, int pid) +{ + unsigned int index; + struct page_map_info *pmi; + + if (!max_pin_pid_num || idr != reserve_idr) + return; + + for (index = 0; index < pin_pid_num; index++) { + pmi = &(user_space_reserve_start[index]); + if (pmi->pid == pid && pmi->pid_reserved) { + idr_remove(idr, pid); + return; + } + } +} + +/* reserve pids for check point tasks which pinned memory */ +void reserve_pids(struct idr *idr, int pid_max) +{ + int alloc_pid; + unsigned int index; + struct page_map_info *pmi; + + if (!max_pin_pid_num) + return; + reserve_idr = idr; + for (index = 0; index < pin_pid_num; index++) { + pmi = &(user_space_reserve_start[index]); + pmi->pid_reserved = true; + alloc_pid = idr_alloc(idr, NULL, pmi->pid, pid_max, GFP_ATOMIC); + if (alloc_pid != pmi->pid) { + if (alloc_pid > 0) + idr_remove(idr, alloc_pid); + pr_warn("Reserve pid (%d) fail, real pid is %d.\n", alloc_pid, pmi->pid); + pmi->pid_reserved = false; + continue; + } + } +} +#endif /* CONFIG_PID_RESERVE */ + #endif /* CONFIG_PIN_MEMORY */