From: Li Zefan lizefan@huawei.com
euler inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I8TCPY
-------------------------------------------------
The adjusted pid_max on the host also takes effect in the container. Move pid_max to pid_namespace to fix this problem.
Signed-off-by: Pavel Emelyanov xemul@parallels.com Signed-off-by: Yi Yang yiyang13@huawei.com --- include/linux/pid.h | 3 --- include/linux/pid_namespace.h | 1 + kernel/pid.c | 42 ++++++++++++++++++++++++++++++----- kernel/pid_namespace.c | 6 +++-- kernel/sysctl.c | 9 -------- kernel/trace/pid_list.c | 2 +- kernel/trace/trace.c | 2 +- kernel/trace/trace.h | 2 -- 8 files changed, 43 insertions(+), 24 deletions(-)
diff --git a/include/linux/pid.h b/include/linux/pid.h index 653a527574c4..7f0f84a368b7 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -113,9 +113,6 @@ extern void transfer_pid(struct task_struct *old, struct task_struct *new, struct pid_namespace; extern struct pid_namespace init_pid_ns;
-extern int pid_max; -extern int pid_max_min, pid_max_max; - /* * look up a PID in the hash table. Must be called with the tasklist_lock * or rcu_read_lock() held. diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index f9f9931e02d6..338012f75f6a 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -36,6 +36,7 @@ struct pid_namespace { #endif struct user_namespace *user_ns; struct ucounts *ucounts; + int pid_max; int reboot; /* group exit code if this pidns was rebooted */ struct ns_common ns; #if defined(CONFIG_SYSCTL) && defined(CONFIG_MEMFD_CREATE) diff --git a/kernel/pid.c b/kernel/pid.c index 383abde0c208..1cf72619d922 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -43,6 +43,7 @@ #include <linux/sched/task.h> #include <linux/idr.h> #include <net/sock.h> +#include <linux/kmemleak.h> #include <uapi/linux/pidfd.h>
struct pid init_struct_pid = { @@ -59,12 +60,10 @@ struct pid init_struct_pid = { }, } };
-int pid_max = PID_MAX_DEFAULT; - #define RESERVED_PIDS 300
-int pid_max_min = RESERVED_PIDS + 1; -int pid_max_max = PID_MAX_LIMIT; +static int pid_max_min = RESERVED_PIDS + 1; +static int pid_max_max = PID_MAX_LIMIT;
/* * PID-map pages start out as NULL, they get allocated upon @@ -80,6 +79,7 @@ struct pid_namespace init_pid_ns = { .child_reaper = &init_task, .user_ns = &init_user_ns, .ns.inum = PROC_PID_INIT_INO, + .pid_max = PID_MAX_DEFAULT, #ifdef CONFIG_PID_NS .ns.ops = &pidns_operations, #endif @@ -194,7 +194,7 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid, tid = set_tid[ns->level - i];
retval = -EINVAL; - if (tid < 1 || tid >= pid_max) + if (tid < 1 || tid >= task_active_pid_ns(current)->pid_max) goto out_free; /* * Also fail if a PID != 1 is requested and @@ -234,7 +234,7 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid, * a partially initialized PID (see below). */ nr = idr_alloc_cyclic(&tmp->idr, NULL, pid_min, - pid_max, GFP_ATOMIC); + tmp->pid_max, GFP_ATOMIC); } spin_unlock_irq(&pidmap_lock); idr_preload_end(); @@ -646,8 +646,34 @@ SYSCALL_DEFINE2(pidfd_open, pid_t, pid, unsigned int, flags) return fd; }
+static int proc_dointvec_pidmax(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + struct ctl_table tmp; + + tmp = *table; + tmp.data = &task_active_pid_ns(current)->pid_max; + + return proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); +} + +static struct ctl_table pid_ctl_table[] = { + { + .procname = "pid_max", + .data = &init_pid_ns.pid_max, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_pidmax, + .extra1 = &pid_max_min, + .extra2 = &pid_max_max, + }, + {} +}; + void __init pid_idr_init(void) { + int pid_max = init_pid_ns.pid_max; + /* Verify no one has done anything silly: */ BUILD_BUG_ON(PID_MAX_LIMIT >= PIDNS_ADDING);
@@ -658,6 +684,8 @@ void __init pid_idr_init(void) PIDS_PER_CPU_MIN * num_possible_cpus()); pr_info("pid_max: default: %u minimum: %u\n", pid_max, pid_max_min);
+ init_pid_ns.pid_max = pid_max; + idr_init(&init_pid_ns.idr);
init_pid_ns.pid_cachep = kmem_cache_create("pid", @@ -665,6 +693,8 @@ void __init pid_idr_init(void) __alignof__(struct pid), SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT, NULL); + + register_sysctl_init("kernel", pid_ctl_table); }
static struct file *__pidfd_fget(struct task_struct *task, int fd) diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index 619972c78774..f66e4f866472 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -110,6 +110,7 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns ns->user_ns = get_user_ns(user_ns); ns->ucounts = ucounts; ns->pid_allocated = PIDNS_ADDING; + ns->pid_max = parent_pid_ns->pid_max; #if defined(CONFIG_SYSCTL) && defined(CONFIG_MEMFD_CREATE) ns->memfd_noexec_scope = pidns_memfd_noexec_scope(parent_pid_ns); #endif @@ -295,6 +296,8 @@ static int pid_ns_ctl_handler(struct ctl_table *table, int write, next = idr_get_cursor(&pid_ns->idr) - 1;
tmp.data = &next; + tmp.extra2 = &pid_ns->pid_max; + ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); if (!ret && write) idr_set_cursor(&pid_ns->idr, next + 1); @@ -302,7 +305,6 @@ static int pid_ns_ctl_handler(struct ctl_table *table, int write, return ret; }
-extern int pid_max; static struct ctl_table pid_ns_ctl_table[] = { { .procname = "ns_last_pid", @@ -310,7 +312,7 @@ static struct ctl_table pid_ns_ctl_table[] = { .mode = 0666, /* permissions are checked in the handler */ .proc_handler = pid_ns_ctl_handler, .extra1 = SYSCTL_ZERO, - .extra2 = &pid_max, + .extra2 = &init_pid_ns.pid_max, }, { } }; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 354a2d294f52..b000e733281e 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1809,15 +1809,6 @@ static struct ctl_table kern_table[] = { .proc_handler = proc_dointvec, }, #endif - { - .procname = "pid_max", - .data = &pid_max, - .maxlen = sizeof (int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &pid_max_min, - .extra2 = &pid_max_max, - }, { .procname = "panic_on_oops", .data = &panic_on_oops, diff --git a/kernel/trace/pid_list.c b/kernel/trace/pid_list.c index 95106d02b32d..2ff4259928ae 100644 --- a/kernel/trace/pid_list.c +++ b/kernel/trace/pid_list.c @@ -414,7 +414,7 @@ struct trace_pid_list *trace_pid_list_alloc(void) int i;
/* According to linux/thread.h, pids can be no bigger that 30 bits */ - WARN_ON_ONCE(pid_max > (1 << 30)); + WARN_ON_ONCE(task_active_pid_ns(current)->pid_max > (1 << 30));
pid_list = kzalloc(sizeof(*pid_list), GFP_KERNEL); if (!pid_list) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index a40d6baf101f..2fac76f2a865 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -5466,7 +5466,7 @@ int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
if (mask == TRACE_ITER_RECORD_TGID) { if (!tgid_map) { - tgid_map_max = pid_max; + tgid_map_max = init_pid_ns.pid_max; map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map), GFP_KERNEL);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index d608f6128704..6077db8894fd 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -695,8 +695,6 @@ extern unsigned long tracing_thresh;
/* PID filtering */
-extern int pid_max; - bool trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid); bool trace_ignore_this_task(struct trace_pid_list *filtered_pids,
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/3751 邮件列表地址:https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/H...
FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/3751 Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/H...