From: Li Zefan lizefan@huawei.com
euler inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I4OPKC CVE: NA
-------------------------------------------------
Signed-off-by: Pavel Emelyanov xemul@parallels.com Signed-off-by: Li Zefan lizefan@huawei.com Signed-off-by: luojiajun luojiajun3@huawei.com Reviewed-by: Li Zefan lizefan@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com Signed-off-by: Cui GaoSheng cuigaosheng1@huawei.com Reviewed-by: weiyang wang wangweiyang2@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- include/linux/pid.h | 3 --- include/linux/pid_namespace.h | 1 + kernel/pid.c | 45 ++++++++++++++++++++++++++++++----- kernel/pid_namespace.c | 6 +++-- kernel/sysctl.c | 9 ------- kernel/trace/trace.c | 4 ++-- kernel/trace/trace.h | 2 -- 7 files changed, 46 insertions(+), 24 deletions(-)
diff --git a/include/linux/pid.h b/include/linux/pid.h index fa10acb8d6a4..34afff2dc888 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -110,9 +110,6 @@ extern void transfer_pid(struct task_struct *old, struct task_struct *new, struct pid_namespace; extern struct pid_namespace init_pid_ns;
-extern int pid_max; -extern int pid_max_min, pid_max_max; - /* * look up a PID in the hash table. Must be called with the tasklist_lock * or rcu_read_lock() held. diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index 5a5cb45ac57e..b202733aa6cc 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -31,6 +31,7 @@ struct pid_namespace { #endif struct user_namespace *user_ns; struct ucounts *ucounts; + int pid_max; int reboot; /* group exit code if this pidns was rebooted */ struct ns_common ns; } __randomize_layout; diff --git a/kernel/pid.c b/kernel/pid.c index be2ec1d26896..28fdf3dc1005 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -43,6 +43,7 @@ #include <linux/sched/task.h> #include <linux/idr.h> #include <net/sock.h> +#include <linux/kmemleak.h> #include <uapi/linux/pidfd.h> #ifdef CONFIG_PID_RESERVE #include <linux/pin_mem.h> @@ -62,12 +63,10 @@ struct pid init_struct_pid = { }, } };
-int pid_max = PID_MAX_DEFAULT; - #define RESERVED_PIDS 300
-int pid_max_min = RESERVED_PIDS + 1; -int pid_max_max = PID_MAX_LIMIT; +static int pid_max_min = RESERVED_PIDS + 1; +static int pid_max_max = PID_MAX_LIMIT;
/* * PID-map pages start out as NULL, they get allocated upon @@ -83,6 +82,7 @@ struct pid_namespace init_pid_ns = { .child_reaper = &init_task, .user_ns = &init_user_ns, .ns.inum = PROC_PID_INIT_INO, + .pid_max = PID_MAX_DEFAULT, #ifdef CONFIG_PID_NS .ns.ops = &pidns_operations, #endif @@ -194,7 +194,7 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid, tid = set_tid[ns->level - i];
retval = -EINVAL; - if (tid < 1 || tid >= pid_max) + if (tid < 1 || tid >= task_active_pid_ns(current)->pid_max) goto out_free; /* * Also fail if a PID != 1 is requested and @@ -237,7 +237,7 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid, * a partially initialized PID (see below). */ nr = idr_alloc_cyclic(&tmp->idr, NULL, pid_min, - pid_max, GFP_ATOMIC); + tmp->pid_max, GFP_ATOMIC); } spin_unlock_irq(&pidmap_lock); idr_preload_end(); @@ -612,8 +612,37 @@ SYSCALL_DEFINE2(pidfd_open, pid_t, pid, unsigned int, flags) return fd; }
+static int proc_dointvec_pidmax(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + struct ctl_table tmp; + + tmp = *table; + tmp.data = &task_active_pid_ns(current)->pid_max; + + return proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); +} + +static struct ctl_table pid_ctl_table[] = { + { + .procname = "pid_max", + .data = &init_pid_ns.pid_max, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_pidmax, + .extra1 = &pid_max_min, + .extra2 = &pid_max_max, + }, + {} +}; + +static struct ctl_path pid_kern_path[] = { { .procname = "kernel" }, {} }; + void __init pid_idr_init(void) { + struct ctl_table_header *hdr; + int pid_max = init_pid_ns.pid_max; + /* Verify no one has done anything silly: */ BUILD_BUG_ON(PID_MAX_LIMIT >= PIDNS_ADDING);
@@ -624,6 +653,8 @@ void __init pid_idr_init(void) PIDS_PER_CPU_MIN * num_possible_cpus()); pr_info("pid_max: default: %u minimum: %u\n", pid_max, pid_max_min);
+ init_pid_ns.pid_max = pid_max; + idr_init(&init_pid_ns.idr);
init_pid_ns.pid_cachep = KMEM_CACHE(pid, @@ -632,6 +663,8 @@ void __init pid_idr_init(void) if (is_need_reserve_pids()) reserve_pids(&init_pid_ns.idr, pid_max); #endif + hdr = register_sysctl_paths(pid_kern_path, pid_ctl_table); + kmemleak_not_leak(hdr); }
static struct file *__pidfd_fget(struct task_struct *task, int fd) diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index 52c017feabcb..c290d21b6c24 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -109,6 +109,7 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns ns->user_ns = get_user_ns(user_ns); ns->ucounts = ucounts; ns->pid_allocated = PIDNS_ADDING; + ns->pid_max = parent_pid_ns->pid_max;
return ns;
@@ -282,6 +283,8 @@ static int pid_ns_ctl_handler(struct ctl_table *table, int write, next = idr_get_cursor(&pid_ns->idr) - 1;
tmp.data = &next; + tmp.extra2 = &pid_ns->pid_max; + ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); if (!ret && write) idr_set_cursor(&pid_ns->idr, next + 1); @@ -289,7 +292,6 @@ static int pid_ns_ctl_handler(struct ctl_table *table, int write, return ret; }
-extern int pid_max; static struct ctl_table pid_ns_ctl_table[] = { { .procname = "ns_last_pid", @@ -297,7 +299,7 @@ static struct ctl_table pid_ns_ctl_table[] = { .mode = 0666, /* permissions are checked in the handler */ .proc_handler = pid_ns_ctl_handler, .extra1 = SYSCTL_ZERO, - .extra2 = &pid_max, + .extra2 = &init_pid_ns.pid_max, }, { } }; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 749ef59224e2..6eb2b1e88f0c 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -2231,15 +2231,6 @@ static struct ctl_table kern_table[] = { .extra2 = SYSCTL_ONE, }, #endif /* CONFIG_SMP */ - { - .procname = "pid_max", - .data = &pid_max, - .maxlen = sizeof (int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &pid_max_min, - .extra2 = &pid_max_max, - }, { .procname = "panic_on_oops", .data = &panic_on_oops, diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index e4f154119e52..2ce366687ce4 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -693,7 +693,7 @@ int trace_pid_write(struct trace_pid_list *filtered_pids, return -ENOMEM; }
- pid_list->pid_max = READ_ONCE(pid_max); + pid_list->pid_max = READ_ONCE(init_pid_ns.pid_max);
/* Only truncating will shrink pid_max */ if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max) @@ -4896,7 +4896,7 @@ int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
if (mask == TRACE_ITER_RECORD_TGID) { if (!tgid_map) { - tgid_map_max = pid_max; + tgid_map_max = init_pid_ns.pid_max; map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map), GFP_KERNEL);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 15a811d34cd8..94f8087a3c22 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -806,8 +806,6 @@ extern unsigned long tracing_thresh;
/* PID filtering */
-extern int pid_max; - bool trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid); bool trace_ignore_this_task(struct trace_pid_list *filtered_pids,