From: Yutian Yang nglaive@gmail.com
mainline inclusion from mainline-v5.15-rc1 commit bb902cb47cf93b33cd92b3b7a4019330a03ef57f bugzilla: 181858 https://gitee.com/openeuler/kernel/issues/I4DDEL
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
This patch adds accounting flags to fs_context and legacy_fs_context allocation sites so that kernel could correctly charge these objects.
We have written a PoC to demonstrate the effect of the missing-charging bugs. The PoC takes around 1,200MB unaccounted memory, while it is charged for only 362MB memory usage. We evaluate the PoC on QEMU x86_64 v5.2.90 + Linux kernel v5.10.19 + Debian buster. All the limitations including ulimits and sysctl variables are set as default. Specifically, the hard NOFILE limit and nr_open in sysctl are both 1,048,576.
/*------------------------- POC code ----------------------------*/
} while (0)
static inline int fsopen(const char *fs_name, unsigned int flags) { return syscall(__NR_fsopen, fs_name, flags); }
static char thread_stack[512][STACK_SIZE];
int thread_fn(void* arg) { for (int i = 0; i< 800000; ++i) { int fsfd = fsopen("nfs", FSOPEN_CLOEXEC); if (fsfd == -1) { errExit("fsopen"); } } while(1); return 0; }
int main(int argc, char *argv[]) { int thread_pid; for (int i = 0; i < 1; ++i) { thread_pid = clone(thread_fn, thread_stack[i] + STACK_SIZE, \ SIGCHLD, NULL); } while(1); return 0; }
/*-------------------------- end --------------------------------*/
Link: https://lkml.kernel.org/r/1626517201-24086-1-git-send-email-nglaive@gmail.co... Signed-off-by: Yutian Yang nglaive@gmail.com Reviewed-by: Shakeel Butt shakeelb@google.com Cc: Michal Hocko mhocko@kernel.org Cc: Johannes Weiner hannes@cmpxchg.org Cc: Vladimir Davydov vdavydov.dev@gmail.com Cc: shenwenbo@zju.edu.cn Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Li Ming limingming.li@huawei.com Signed-off-by: Lu Jialin lujialin4@huawei.com Reviewed-by: Xiu Jianfeng xiujianfeng@huawei.com
Signed-off-by: Chen Jun chenjun102@huawei.com --- fs/fs_context.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/fs/fs_context.c b/fs/fs_context.c index 2834d1afa6e8..4858645ca620 100644 --- a/fs/fs_context.c +++ b/fs/fs_context.c @@ -231,7 +231,7 @@ static struct fs_context *alloc_fs_context(struct file_system_type *fs_type, struct fs_context *fc; int ret = -ENOMEM;
- fc = kzalloc(sizeof(struct fs_context), GFP_KERNEL); + fc = kzalloc(sizeof(struct fs_context), GFP_KERNEL_ACCOUNT); if (!fc) return ERR_PTR(-ENOMEM);
@@ -631,7 +631,7 @@ const struct fs_context_operations legacy_fs_context_ops = { */ static int legacy_init_fs_context(struct fs_context *fc) { - fc->fs_private = kzalloc(sizeof(struct legacy_fs_context), GFP_KERNEL); + fc->fs_private = kzalloc(sizeof(struct legacy_fs_context), GFP_KERNEL_ACCOUNT); if (!fc->fs_private) return -ENOMEM; fc->ops = &legacy_fs_context_ops;