Chen Ridong (4): filescgroup: remove files of dfl_cftypes. filecgroup: move code of filescontrol.c to files-cgroup.c misc: add a type of resource to control fd. filscgroup: fix null pointer access when files_cgroup is not inited.
Documentation/admin-guide/cgroup-v2.rst | 7 + fs/Makefile | 2 + fs/file.c | 16 +- fs/files-cgroup.c | 335 +++++++++++++++++++++++ fs/filescontrol.c | 344 +++--------------------- fs/misc-fd.c | 77 ++++++ include/linux/fdtable.h | 3 + include/linux/files-cgroup.h | 32 +++ include/linux/filescontrol.h | 46 ++-- include/linux/misc-fd.h | 21 ++ include/linux/misc_cgroup.h | 4 + kernel/cgroup/misc.c | 4 + 12 files changed, 556 insertions(+), 335 deletions(-) create mode 100644 fs/files-cgroup.c create mode 100644 fs/misc-fd.c create mode 100644 include/linux/files-cgroup.h create mode 100644 include/linux/misc-fd.h
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8YC6O
----------------------------------------------------------------------
Cgroup v2 will not support filescgroup, so just remove files of dfl_cftypes. Misc will be used to control fd in cgroup v2.
Signed-off-by: Chen Ridong chenridong@huawei.com --- fs/filescontrol.c | 1 - 1 file changed, 1 deletion(-)
diff --git a/fs/filescontrol.c b/fs/filescontrol.c index 0947566b9119..18924673b806 100644 --- a/fs/filescontrol.c +++ b/fs/filescontrol.c @@ -306,7 +306,6 @@ struct cgroup_subsys files_cgrp_subsys = { .css_free = files_cgroup_css_free, .can_attach = files_cgroup_can_attach, .legacy_cftypes = files, - .dfl_cftypes = files, };
/*
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8YC6O
----------------------------------------------------------------------
Move code of filescontrol.c/filescontrol.h to files-cgroup.c /files-cgroup.h. For replacing filescgroup with misc fd in the future, rename functions that are used by files.c from files_cgroup_xxx to files_cg_xxx, files_cg_xxx just calls files_cgroup_xxx.
Signed-off-by: Chen Ridong chenridong@huawei.com --- fs/Makefile | 1 + fs/file.c | 16 +- fs/files-cgroup.c | 335 +++++++++++++++++++++++++++++++++++ fs/filescontrol.c | 330 ++-------------------------------- include/linux/files-cgroup.h | 33 ++++ include/linux/filescontrol.h | 45 ++--- 6 files changed, 414 insertions(+), 346 deletions(-) create mode 100644 fs/files-cgroup.c create mode 100644 include/linux/files-cgroup.h
diff --git a/fs/Makefile b/fs/Makefile index db63c24fae57..c4e433f8c46e 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -48,6 +48,7 @@ obj-$(CONFIG_DIRTY_PAGES) += dirty_pages.o
obj-$(CONFIG_FHANDLE) += fhandle.o obj-$(CONFIG_CGROUP_FILES) += filescontrol.o +obj-$(CONFIG_CGROUP_FILES) += files-cgroup.o obj-y += iomap/
obj-y += quota/ diff --git a/fs/file.c b/fs/file.c index 4419f6975c96..d560e45fde40 100644 --- a/fs/file.c +++ b/fs/file.c @@ -338,7 +338,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, unsigned int max_fds, int new_fdt->open_fds = newf->open_fds_init; new_fdt->full_fds_bits = newf->full_fds_bits_init; new_fdt->fd = &newf->fd_array[0]; - files_cgroup_assign(newf); + files_cg_assign(newf);
spin_lock(&oldf->file_lock); old_fdt = files_fdtable(oldf); @@ -403,7 +403,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, unsigned int max_fds, int
rcu_assign_pointer(newf->fdt, new_fdt);
- if (files_cgroup_dup_fds(newf)) { + if (files_cg_dup_fds(newf)) { /* could not get enough FD resources. Need to clean up. */ new_fds = new_fdt->fd; for (i = open_files; i != 0; i--) { @@ -419,7 +419,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, unsigned int max_fds, int } return newf; out_release: - files_cgroup_remove(newf); + files_cg_remove(newf); kmem_cache_free(files_cachep, newf); out: return NULL; @@ -445,7 +445,7 @@ static struct fdtable *close_files(struct files_struct * files) if (set & 1) { struct file * file = xchg(&fdt->fd[i], NULL); if (file) { - files_cgroup_unalloc_fd(files, 1); + files_cg_unalloc_fd(files, 1); filp_close(file, files); cond_resched(); } @@ -454,7 +454,7 @@ static struct fdtable *close_files(struct files_struct * files) set >>= 1; } } - files_cgroup_remove(files); + files_cg_remove(files); return fdt; }
@@ -548,7 +548,7 @@ static int alloc_fd(unsigned start, unsigned end, unsigned flags) */ if (error) goto repeat; - if (files_cgroup_alloc_fd(files, 1)) { + if (files_cg_alloc_fd(files, 1)) { error = -EMFILE; goto out; } @@ -590,7 +590,7 @@ static void __put_unused_fd(struct files_struct *files, unsigned int fd) { struct fdtable *fdt = files_fdtable(files);
- files_cgroup_put_fd(files, fd); + files_cg_put_fd(files, fd); __clear_open_fd(fd, fdt); if (fd < files->next_fd) files->next_fd = fd; @@ -1154,7 +1154,7 @@ __releases(&files->file_lock) goto out; }
- if (!tofree && files_cgroup_alloc_fd(files, 1)) { + if (!tofree && files_cg_alloc_fd(files, 1)) { err = -EMFILE; goto out; } diff --git a/fs/files-cgroup.c b/fs/files-cgroup.c new file mode 100644 index 000000000000..dbf7452423a6 --- /dev/null +++ b/fs/files-cgroup.c @@ -0,0 +1,335 @@ +// SPDX-License-Identifier: GPL-2.0 +/* filescontrol.c - Cgroup controller for open file handles. + * + * Copyright 2014 Google Inc. + * Author: Brian Makin merimus@google.com + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/page_counter.h> +#include <linux/filescontrol.h> +#include <linux/files-cgroup.h> +#include <linux/cgroup.h> +#include <linux/export.h> +#include <linux/printk.h> +#include <linux/slab.h> +#include <linux/fs.h> +#include <linux/seq_file.h> +#include <linux/fdtable.h> +#include <linux/sched/signal.h> +#include <linux/module.h> + +#define FILES_MAX D_COUNT_MAX +#define FILES_MAX_STR "max" + +static bool no_acct; +struct cgroup_subsys files_cgrp_subsys __read_mostly; + +module_param(no_acct, bool, 0444); + +struct files_cgroup { + struct cgroup_subsys_state css; + struct page_counter open_handles; +}; + +static inline bool files_cgroup_enabled(void) +{ + return cgroup_subsys_enabled(files_cgrp_subsys); +} + +static inline struct files_cgroup *css_fcg(struct cgroup_subsys_state *css) +{ + return css ? container_of(css, struct files_cgroup, css) : NULL; +} + +static inline struct page_counter * +css_res_open_handles(struct cgroup_subsys_state *css) +{ + return &css_fcg(css)->open_handles; +} + +static inline struct files_cgroup * +files_cgroup_from_files(struct files_struct *files) +{ + return files->files_cgroup; +} + + +static struct cgroup_subsys_state * +files_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) +{ + struct files_cgroup *parent_fcg; + struct files_cgroup *fcg; + + parent_fcg = css_fcg(parent_css); + fcg = kzalloc(sizeof(*fcg), GFP_KERNEL); + if (!fcg) + goto out; + + if (!parent_fcg) { + page_counter_init(&fcg->open_handles, NULL); + page_counter_set_max(&fcg->open_handles, FILES_MAX); + } else { + struct page_counter *p_counter = &parent_fcg->open_handles; + + page_counter_init(&fcg->open_handles, p_counter); + page_counter_set_max(&fcg->open_handles, FILES_MAX); + } + return &fcg->css; + +out: + return ERR_PTR(-ENOMEM); +} + +static void files_cgroup_css_free(struct cgroup_subsys_state *css) +{ + kfree(css_fcg(css)); +} + +/* + * If attaching this cgroup would overcommit the resource then deny + * the attach. If not, attach the file resource into new cgroup. + */ +static int files_cgroup_can_attach(struct cgroup_taskset *tset) +{ + u64 num_files; + bool can_attach; + struct cgroup_subsys_state *to_css; + struct cgroup_subsys_state *from_css; + struct page_counter *from_res; + struct page_counter *to_res; + struct page_counter *fail_res; + struct files_struct *files; + struct task_struct *task = cgroup_taskset_first(tset, &to_css); + + to_res = css_res_open_handles(to_css); + + task_lock(task); + files = task->files; + if (!files || files == &init_files) { + task_unlock(task); + return 0; + } + + from_css = &files_cgroup_from_files(files)->css; + from_res = css_res_open_handles(from_css); + + spin_lock(&files->file_lock); + num_files = file_cg_count_fds(files); + page_counter_uncharge(from_res, num_files); + + if (!page_counter_try_charge(to_res, num_files, &fail_res)) { + page_counter_charge(from_res, num_files); + pr_err("Open files limit overcommited\n"); + can_attach = false; + } else { + css_put(from_css); + css_get(to_css); + task->files->files_cgroup = css_fcg(to_css); + can_attach = true; + } + spin_unlock(&files->file_lock); + task_unlock(task); + return can_attach ? 0 : -ENOSPC; +} + +int files_cgroup_alloc_fd(struct files_struct *files, u64 n) +{ + if (!files_cgroup_enabled()) + return 0; + /* + * Kernel threads which are forked by kthreadd inherited the + * const files_struct 'init_files', we didn't wrap it so + * there's no associated files_cgroup. + * + * Kernel threads always stay in root cgroup, and we don't + * have limit for root files cgroup, so it won't hurt if + * we don't charge their fds, only issue is that files.usage + * won't be accurate in root files cgroup. + */ + if (!no_acct && files != &init_files) { + struct page_counter *fail_res; + struct files_cgroup *files_cgroup = + files_cgroup_from_files(files); + if (!page_counter_try_charge(&files_cgroup->open_handles, + n, &fail_res)) + return -ENOMEM; + } + return 0; +} + +void files_cgroup_unalloc_fd(struct files_struct *files, u64 n) +{ + if (!files_cgroup_enabled()) + return; + /* + * It's not charged so no need to uncharge, see comments in + * files_cgroup_alloc_fd. + */ + if (!no_acct && files != &init_files) { + struct files_cgroup *files_cgroup = + files_cgroup_from_files(files); + page_counter_uncharge(&files_cgroup->open_handles, n); + } +} + +void files_cgroup_put_fd(struct files_struct *files, unsigned int fd) +{ + struct fdtable *fdt = files_fdtable(files); + + if (!files_cgroup_enabled()) + return; + + if (test_bit(fd, fdt->open_fds)) + return files_cgroup_unalloc_fd(files, 1); +} + +int files_cgroup_dup_fds(struct files_struct *newf) +{ + int err; + + if (!files_cgroup_enabled()) + return 0; + spin_lock(&newf->file_lock); + err = files_cgroup_alloc_fd(newf, file_cg_count_fds(newf)); + spin_unlock(&newf->file_lock); + return err; +} + +static u64 files_disabled_read(struct cgroup_subsys_state *css, + struct cftype *cft) +{ + return no_acct; +} + +static int files_disabled_write(struct cgroup_subsys_state *css, + struct cftype *cft, u64 val) +{ + if (!val) + return -EINVAL; + no_acct = true; + + return 0; +} + +static int files_limit_read(struct seq_file *sf, void *v) +{ + struct files_cgroup *fcg = css_fcg(seq_css(sf)); + struct page_counter *counter = &fcg->open_handles; + u64 limit = counter->max; + + if (limit >= FILES_MAX) + seq_printf(sf, "%s\n", FILES_MAX_STR); + else + seq_printf(sf, "%llu\n", limit); + + return 0; +} + +static ssize_t files_limit_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) +{ + struct files_cgroup *fcg = css_fcg(of_css(of)); + u64 limit; + int err; + + buf = strstrip((char *)buf); + if (!strcmp(buf, FILES_MAX_STR)) { + limit = FILES_MAX; + goto set_limit; + } + + err = kstrtoull(buf, 0, &limit); + if (err) + return err; + +set_limit: + /* + * Limit updates don't need to be mutex'd, since it isn't + * critical that any racing fork()s follow the new limit. + */ + page_counter_set_max(&fcg->open_handles, limit); + return nbytes; +} + + +static u64 files_usage_read(struct cgroup_subsys_state *css, + struct cftype *cft) +{ + struct files_cgroup *fcg = css_fcg(css); + + return page_counter_read(&fcg->open_handles); +} + +static struct cftype files[] = { + { + .name = "limit", + .seq_show = files_limit_read, + .write = files_limit_write, + .flags = CFTYPE_NOT_ON_ROOT, + }, + { + .name = "usage", + .read_u64 = files_usage_read, + }, + { + .name = "no_acct", + .flags = CFTYPE_ONLY_ON_ROOT, + .read_u64 = files_disabled_read, + .write_u64 = files_disabled_write, + }, + { } +}; + +struct cgroup_subsys files_cgrp_subsys = { + .css_alloc = files_cgroup_css_alloc, + .css_free = files_cgroup_css_free, + .can_attach = files_cgroup_can_attach, + .legacy_cftypes = files, +}; + +/* + * It could race against cgroup migration of current task, and + * using task_get_css() to get a valid css. + */ +void files_cgroup_assign(struct files_struct *files) +{ + struct cgroup_subsys_state *css; + + if (!files_cgroup_enabled()) + return; + + if (files == &init_files) + return; + + css = task_get_css(current, files_cgrp_id); + files->files_cgroup = container_of(css, struct files_cgroup, css); +} + +void files_cgroup_remove(struct files_struct *files) +{ + struct task_struct *tsk = current; + struct files_cgroup *fcg; + + if (!files_cgroup_enabled()) + return; + + if (files == &init_files) + return; + + task_lock(tsk); + spin_lock(&files->file_lock); + fcg = files_cgroup_from_files(files); + css_put(&fcg->css); + spin_unlock(&files->file_lock); + task_unlock(tsk); +} diff --git a/fs/filescontrol.c b/fs/filescontrol.c index 18924673b806..1102e9a8ba06 100644 --- a/fs/filescontrol.c +++ b/fs/filescontrol.c @@ -1,100 +1,15 @@ -// SPDX-License-Identifier: GPL-2.0 -/* filescontrol.c - Cgroup controller for open file handles. - * - * Copyright 2014 Google Inc. - * Author: Brian Makin merimus@google.com - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. +/* SPDX-License-Identifier: GPL-2.0-or-later OR BSD-2-Clause */ +/* + * Copyright (c) 2024-2024, Huawei Tech. Co., Ltd. * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * Author: Ridong Chen chenridong@huawei.com */
-#include <linux/page_counter.h> -#include <linux/filescontrol.h> -#include <linux/cgroup.h> -#include <linux/export.h> -#include <linux/printk.h> -#include <linux/slab.h> -#include <linux/fs.h> -#include <linux/seq_file.h> #include <linux/fdtable.h> -#include <linux/sched/signal.h> -#include <linux/module.h> - -#define FILES_MAX D_COUNT_MAX -#define FILES_MAX_STR "max" - -static bool no_acct; -struct cgroup_subsys files_cgrp_subsys __read_mostly; - -module_param(no_acct, bool, 0444); - -struct files_cgroup { - struct cgroup_subsys_state css; - struct page_counter open_handles; -}; - -static inline bool files_cgroup_enabled(void) -{ - return cgroup_subsys_enabled(files_cgrp_subsys); -} - -static inline struct files_cgroup *css_fcg(struct cgroup_subsys_state *css) -{ - return css ? container_of(css, struct files_cgroup, css) : NULL; -} - -static inline struct page_counter * -css_res_open_handles(struct cgroup_subsys_state *css) -{ - return &css_fcg(css)->open_handles; -} - -static inline struct files_cgroup * -files_cgroup_from_files(struct files_struct *files) -{ - return files->files_cgroup; -} - - -static struct cgroup_subsys_state * -files_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) -{ - struct files_cgroup *parent_fcg; - struct files_cgroup *fcg; - - parent_fcg = css_fcg(parent_css); - fcg = kzalloc(sizeof(*fcg), GFP_KERNEL); - if (!fcg) - goto out; - - if (!parent_fcg) { - page_counter_init(&fcg->open_handles, NULL); - page_counter_set_max(&fcg->open_handles, FILES_MAX); - } else { - struct page_counter *p_counter = &parent_fcg->open_handles; - - page_counter_init(&fcg->open_handles, p_counter); - page_counter_set_max(&fcg->open_handles, FILES_MAX); - } - return &fcg->css; - -out: - return ERR_PTR(-ENOMEM); -} - -static void files_cgroup_css_free(struct cgroup_subsys_state *css) -{ - kfree(css_fcg(css)); -} +#include <linux/filescontrol.h> +#include <linux/files-cgroup.h>
-u64 files_cgroup_count_fds(struct files_struct *files) +u64 file_cg_count_fds(struct files_struct *files) { int i; struct fdtable *fdt; @@ -106,241 +21,34 @@ u64 files_cgroup_count_fds(struct files_struct *files) return retval; }
-/* - * If attaching this cgroup would overcommit the resource then deny - * the attach. If not, attach the file resource into new cgroup. - */ -static int files_cgroup_can_attach(struct cgroup_taskset *tset) -{ - u64 num_files; - bool can_attach; - struct cgroup_subsys_state *to_css; - struct cgroup_subsys_state *from_css; - struct page_counter *from_res; - struct page_counter *to_res; - struct page_counter *fail_res; - struct files_struct *files; - struct task_struct *task = cgroup_taskset_first(tset, &to_css); - - to_res = css_res_open_handles(to_css); - - task_lock(task); - files = task->files; - if (!files || files == &init_files) { - task_unlock(task); - return 0; - } - - from_css = &files_cgroup_from_files(files)->css; - from_res = css_res_open_handles(from_css); - - spin_lock(&files->file_lock); - num_files = files_cgroup_count_fds(files); - page_counter_uncharge(from_res, num_files); - - if (!page_counter_try_charge(to_res, num_files, &fail_res)) { - page_counter_charge(from_res, num_files); - pr_err("Open files limit overcommited\n"); - can_attach = false; - } else { - css_put(from_css); - css_get(to_css); - task->files->files_cgroup = css_fcg(to_css); - can_attach = true; - } - spin_unlock(&files->file_lock); - task_unlock(task); - return can_attach ? 0 : -ENOSPC; -} - -int files_cgroup_alloc_fd(struct files_struct *files, u64 n) -{ - if (!files_cgroup_enabled()) - return 0; - /* - * Kernel threads which are forked by kthreadd inherited the - * const files_struct 'init_files', we didn't wrap it so - * there's no associated files_cgroup. - * - * Kernel threads always stay in root cgroup, and we don't - * have limit for root files cgroup, so it won't hurt if - * we don't charge their fds, only issue is that files.usage - * won't be accurate in root files cgroup. - */ - if (!no_acct && files != &init_files) { - struct page_counter *fail_res; - struct files_cgroup *files_cgroup = - files_cgroup_from_files(files); - if (!page_counter_try_charge(&files_cgroup->open_handles, - n, &fail_res)) - return -ENOMEM; - } - return 0; -} - -void files_cgroup_unalloc_fd(struct files_struct *files, u64 n) -{ - if (!files_cgroup_enabled()) - return; - /* - * It's not charged so no need to uncharge, see comments in - * files_cgroup_alloc_fd. - */ - if (!no_acct && files != &init_files) { - struct files_cgroup *files_cgroup = - files_cgroup_from_files(files); - page_counter_uncharge(&files_cgroup->open_handles, n); - } -} - -void files_cgroup_put_fd(struct files_struct *files, unsigned int fd) -{ - struct fdtable *fdt = files_fdtable(files); - - if (!files_cgroup_enabled()) - return; - - if (test_bit(fd, fdt->open_fds)) - return files_cgroup_unalloc_fd(files, 1); -} - -int files_cgroup_dup_fds(struct files_struct *newf) -{ - int err; - - if (!files_cgroup_enabled()) - return 0; - spin_lock(&newf->file_lock); - err = files_cgroup_alloc_fd(newf, files_cgroup_count_fds(newf)); - spin_unlock(&newf->file_lock); - return err; -} - -static u64 files_disabled_read(struct cgroup_subsys_state *css, - struct cftype *cft) +int files_cg_alloc_fd(struct files_struct *files, u64 n) { - return no_acct; + return files_cgroup_alloc_fd(files, n); }
-static int files_disabled_write(struct cgroup_subsys_state *css, - struct cftype *cft, u64 val) +void files_cg_unalloc_fd(struct files_struct *files, u64 n) { - if (!val) - return -EINVAL; - no_acct = true; - - return 0; + files_cgroup_unalloc_fd(files, n); }
-static int files_limit_read(struct seq_file *sf, void *v) +void files_cg_assign(struct files_struct *files) { - struct files_cgroup *fcg = css_fcg(seq_css(sf)); - struct page_counter *counter = &fcg->open_handles; - u64 limit = counter->max; - - if (limit >= FILES_MAX) - seq_printf(sf, "%s\n", FILES_MAX_STR); - else - seq_printf(sf, "%llu\n", limit); - - return 0; + files_cgroup_assign(files); }
-static ssize_t files_limit_write(struct kernfs_open_file *of, - char *buf, size_t nbytes, loff_t off) +void files_cg_remove(struct files_struct *files) { - struct files_cgroup *fcg = css_fcg(of_css(of)); - u64 limit; - int err; - - buf = strstrip((char *)buf); - if (!strcmp(buf, FILES_MAX_STR)) { - limit = FILES_MAX; - goto set_limit; - } - - err = kstrtoull(buf, 0, &limit); - if (err) - return err; - -set_limit: - /* - * Limit updates don't need to be mutex'd, since it isn't - * critical that any racing fork()s follow the new limit. - */ - page_counter_set_max(&fcg->open_handles, limit); - return nbytes; + files_cgroup_remove(files); }
- -static u64 files_usage_read(struct cgroup_subsys_state *css, - struct cftype *cft) +int files_cg_dup_fds(struct files_struct *newf) { - struct files_cgroup *fcg = css_fcg(css); - - return page_counter_read(&fcg->open_handles); + return files_cgroup_dup_fds(newf); }
-static struct cftype files[] = { - { - .name = "limit", - .seq_show = files_limit_read, - .write = files_limit_write, - .flags = CFTYPE_NOT_ON_ROOT, - }, - { - .name = "usage", - .read_u64 = files_usage_read, - }, - { - .name = "no_acct", - .flags = CFTYPE_ONLY_ON_ROOT, - .read_u64 = files_disabled_read, - .write_u64 = files_disabled_write, - }, - { } -}; - -struct cgroup_subsys files_cgrp_subsys = { - .css_alloc = files_cgroup_css_alloc, - .css_free = files_cgroup_css_free, - .can_attach = files_cgroup_can_attach, - .legacy_cftypes = files, -}; - -/* - * It could race against cgroup migration of current task, and - * using task_get_css() to get a valid css. - */ -void files_cgroup_assign(struct files_struct *files) +void files_cg_put_fd(struct files_struct *files, unsigned int fd) { - struct cgroup_subsys_state *css; - - if (!files_cgroup_enabled()) - return; - - if (files == &init_files) - return; - - css = task_get_css(current, files_cgrp_id); - files->files_cgroup = container_of(css, struct files_cgroup, css); + files_cgroup_put_fd(files, fd); }
-void files_cgroup_remove(struct files_struct *files) -{ - struct task_struct *tsk = current; - struct files_cgroup *fcg; - - if (!files_cgroup_enabled()) - return;
- if (files == &init_files) - return; - - task_lock(tsk); - spin_lock(&files->file_lock); - fcg = files_cgroup_from_files(files); - css_put(&fcg->css); - spin_unlock(&files->file_lock); - task_unlock(tsk); -} diff --git a/include/linux/files-cgroup.h b/include/linux/files-cgroup.h new file mode 100644 index 000000000000..771bf6d4b08f --- /dev/null +++ b/include/linux/files-cgroup.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* filescontrol.h - Files Controller + * + * Copyright 2014 Google Inc. + * Author: Brian Makin merimus@google.com + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef __FILES_CGROUP_H_ +#define __FILES_CGROUP_H_ + +#include <linux/fdtable.h> + +extern int files_cgroup_alloc_fd(struct files_struct *files, u64 n); +extern void files_cgroup_unalloc_fd(struct files_struct *files, u64 n); + +extern struct files_struct init_files; +extern void files_cgroup_assign(struct files_struct *files); +extern void files_cgroup_remove(struct files_struct *files); + +extern int files_cgroup_dup_fds(struct files_struct *newf); +extern void files_cgroup_put_fd(struct files_struct *files, unsigned int fd); + +#endif /* __FILES_CGROUP_H_ */ diff --git a/include/linux/filescontrol.h b/include/linux/filescontrol.h index 0274349bd918..7eeb25f8ecb1 100644 --- a/include/linux/filescontrol.h +++ b/include/linux/filescontrol.h @@ -1,18 +1,8 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* filescontrol.h - Files Controller +/* SPDX-License-Identifier: GPL-2.0-or-later OR BSD-2-Clause */ +/* + * Copyright (c) 2024-2024, Huawei Tech. Co., Ltd. * - * Copyright 2014 Google Inc. - * Author: Brian Makin merimus@google.com - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * Author: Ridong Chen chenridong@huawei.com */
#ifndef _LINUX_FILESCONTROL_H @@ -20,25 +10,26 @@
#include <linux/fdtable.h>
+u64 file_cg_count_fds(struct files_struct *files); + #ifdef CONFIG_CGROUP_FILES -extern int files_cgroup_alloc_fd(struct files_struct *files, u64 n); -extern void files_cgroup_unalloc_fd(struct files_struct *files, u64 n); +extern int files_cg_alloc_fd(struct files_struct *files, u64 n); +extern void files_cg_unalloc_fd(struct files_struct *files, u64 n);
-extern struct files_struct init_files; -extern void files_cgroup_assign(struct files_struct *files); -extern void files_cgroup_remove(struct files_struct *files); +extern void files_cg_assign(struct files_struct *files); +extern void files_cg_remove(struct files_struct *files);
-extern int files_cgroup_dup_fds(struct files_struct *newf); -extern void files_cgroup_put_fd(struct files_struct *files, unsigned int fd); +extern int files_cg_dup_fds(struct files_struct *newf); +extern void files_cg_put_fd(struct files_struct *files, unsigned int fd); #else /* no CONFIG_CGROUP_FILES */ -static inline int files_cgroup_alloc_fd(struct files_struct *files, u64 n) { return 0; }; -static inline void files_cgroup_unalloc_fd(struct files_struct *files, u64 n) {}; +static inline int files_cg_alloc_fd(struct files_struct *files, u64 n) { return 0; }; +static inline void files_cg_unalloc_fd(struct files_struct *files, u64 n) {};
-static inline void files_cgroup_assign(struct files_struct *files) {}; -static inline void files_cgroup_remove(struct files_struct *files) {}; +static inline void files_cg_assign(struct files_struct *files) {}; +static inline void files_cg_remove(struct files_struct *files) {};
-static inline int files_cgroup_dup_fds(struct files_struct *newf) { return 0; }; -static inline void files_cgroup_put_fd(struct files_struct *files, unsigned int fd) {}; +static inline int files_cg_dup_fds(struct files_struct *newf) { return 0; }; +static inline void files_cg_put_fd(struct files_struct *files, unsigned int fd) {}; #endif /* CONFIG_CGROUP_FILES */
#endif /* _LINUX_FILESCONTROL_H */
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8YC6O
----------------------------------------------------------------------
Misc provides the resource limiting and tracking mechanism for the scalar resources, it is more stable and concise to control fd than filescgroup, which controls fd as independent cgroup, and filescgroup will be abandoned in the future. Interfaces of misc are showed as below.
misc.capacity
A read-only flat-keyed file shown only in the root cgroup. It shows miscellaneous scalar resources available on the platform along with their quantities:: $ cat misc.capacity fd 1024
misc.current
A read-only flat-keyed file shown in the all cgroups. It shows the current usage of the resources in the cgroup and its children.
$ cat misc.current fd 3
misc.max
A read-write flat-keyed file shown in the non root cgroups. Allowed maximum usage of the resources in the cgroup and its children.
$ cat misc.max fd max
Limit can be set by:: # echo fd 1 > misc.max
Signed-off-by: Chen Ridong chenridong@huawei.com --- Documentation/admin-guide/cgroup-v2.rst | 7 +++ fs/Makefile | 1 + fs/filescontrol.c | 37 ++++++++++++ fs/misc-fd.c | 77 +++++++++++++++++++++++++ include/linux/fdtable.h | 3 + include/linux/files-cgroup.h | 1 - include/linux/filescontrol.h | 1 + include/linux/misc-fd.h | 21 +++++++ include/linux/misc_cgroup.h | 4 ++ kernel/cgroup/misc.c | 4 ++ 10 files changed, 155 insertions(+), 1 deletion(-) create mode 100644 fs/misc-fd.c create mode 100644 include/linux/misc-fd.h
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index e998aa071ced..dd92ccba20c2 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -2505,6 +2505,13 @@ first, and stays charged to that cgroup until that resource is freed. Migrating a process to a different cgroup does not move the charge to the destination cgroup where the process has moved.
+Misc type explaination +~~~~~~~~~~~~~~~~~~~~~~~ + + MISC_CG_RES_FD + Resource for controling fd. Fd is charged when alloc a new fd or copy files, + and uncharged when file is close or putting unused fd. + Others ------
diff --git a/fs/Makefile b/fs/Makefile index c4e433f8c46e..a66fa139c207 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -49,6 +49,7 @@ obj-$(CONFIG_DIRTY_PAGES) += dirty_pages.o obj-$(CONFIG_FHANDLE) += fhandle.o obj-$(CONFIG_CGROUP_FILES) += filescontrol.o obj-$(CONFIG_CGROUP_FILES) += files-cgroup.o +obj-$(CONFIG_CGROUP_FILES) += misc-fd.o obj-y += iomap/
obj-y += quota/ diff --git a/fs/filescontrol.c b/fs/filescontrol.c index 1102e9a8ba06..17a155f45b43 100644 --- a/fs/filescontrol.c +++ b/fs/filescontrol.c @@ -8,6 +8,14 @@ #include <linux/fdtable.h> #include <linux/filescontrol.h> #include <linux/files-cgroup.h> +#include <linux/misc-fd.h> + +struct static_key_false misc_fd_enable_key; + +static inline bool file_cg_misc_enabled(void) +{ + return static_branch_likely(&misc_fd_enable_key); +}
u64 file_cg_count_fds(struct files_struct *files) { @@ -23,32 +31,61 @@ u64 file_cg_count_fds(struct files_struct *files)
int files_cg_alloc_fd(struct files_struct *files, u64 n) { + if (file_cg_misc_enabled()) + return misc_fd_alloc_fd(files, n); + return files_cgroup_alloc_fd(files, n); }
void files_cg_unalloc_fd(struct files_struct *files, u64 n) { + if (file_cg_misc_enabled()) + return misc_fd_unalloc_fd(files, n); + files_cgroup_unalloc_fd(files, n); }
void files_cg_assign(struct files_struct *files) { + if (file_cg_misc_enabled()) + return misc_fd_assign(files); + files_cgroup_assign(files); }
void files_cg_remove(struct files_struct *files) { + if (file_cg_misc_enabled()) + return misc_fd_remove(files); + files_cgroup_remove(files); }
int files_cg_dup_fds(struct files_struct *newf) { + if (file_cg_misc_enabled()) + return misc_fd_dup_fds(newf); + return files_cgroup_dup_fds(newf); }
void files_cg_put_fd(struct files_struct *files, unsigned int fd) { + if (file_cg_misc_enabled()) + return misc_fd_put_fd(files, fd); + files_cgroup_put_fd(files, fd); }
+#ifdef CONFIG_CGROUP_MISC +#include <linux/misc_cgroup.h>
+static int __init enable_misc_fd(char *s) +{ + static_branch_enable(&misc_fd_enable_key); + pr_info("file_cg enable misc to control fd\n"); + misc_cg_set_capacity(MISC_CG_RES_FD, U64_MAX); + return 1; +} +__setup("file_cg=misc", enable_misc_fd); +#endif diff --git a/fs/misc-fd.c b/fs/misc-fd.c new file mode 100644 index 000000000000..023c66e686a5 --- /dev/null +++ b/fs/misc-fd.c @@ -0,0 +1,77 @@ +// SPDX-License-Identifier: GPL-2.0-or-later OR BSD-2-Clause +/* + * Copyright (c) 2024-2024, Huawei Tech. Co., Ltd. + * + * Author: Ridong Chen chenridong@huawei.com + */ + +#include <linux/misc_cgroup.h> +#include <linux/fdtable.h> +#include <linux/filescontrol.h> +#include <linux/misc-fd.h> + +/* + * If first time to alloc,it has to init capacity + */ +int misc_fd_alloc_fd(struct files_struct *files, u64 n) +{ + if (files != &init_files) + return misc_cg_try_charge(MISC_CG_RES_FD, files->misc_cg, n); + return 0; +} + +void misc_fd_unalloc_fd(struct files_struct *files, u64 n) +{ + if (files != &init_files) + return misc_cg_uncharge(MISC_CG_RES_FD, files->misc_cg, n); +} + +void misc_fd_assign(struct files_struct *files) +{ + struct cgroup_subsys_state *css; + + if (files == NULL || files == &init_files) + return; + + css = task_get_css(current, misc_cgrp_id); + files->misc_cg = (css ? container_of(css, struct misc_cg, css) : NULL); +} + +void misc_fd_remove(struct files_struct *files) +{ + struct task_struct *tsk = current; + + if (files == &init_files) + return; + + task_lock(tsk); + spin_lock(&files->file_lock); + if (files->misc_cg != NULL) + css_put(&files->misc_cg->css); + spin_unlock(&files->file_lock); + task_unlock(tsk); +} + +int misc_fd_dup_fds(struct files_struct *newf) +{ + int err; + + if (newf == &init_files) + return 0; + + spin_lock(&newf->file_lock); + err = misc_fd_alloc_fd(newf, file_cg_count_fds(newf)); + spin_unlock(&newf->file_lock); + return err; +} + +void misc_fd_put_fd(struct files_struct *files, unsigned int fd) +{ + struct fdtable *fdt = files_fdtable(files); + + if (files == &init_files) + return; + + if (test_bit(fd, fdt->open_fds)) + return misc_fd_unalloc_fd(files, 1); +} diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index 22b8b03fef6d..e43c3cebcc95 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -66,6 +66,9 @@ struct files_struct { unsigned long full_fds_bits_init[1]; struct file __rcu * fd_array[NR_OPEN_DEFAULT]; struct files_cgroup *files_cgroup; +#if (defined CONFIG_CGROUP_MISC && defined CONFIG_CGROUP_MISC) + struct misc_cg *misc_cg; +#endif };
struct file_operations; diff --git a/include/linux/files-cgroup.h b/include/linux/files-cgroup.h index 771bf6d4b08f..0f2c28045573 100644 --- a/include/linux/files-cgroup.h +++ b/include/linux/files-cgroup.h @@ -23,7 +23,6 @@ extern int files_cgroup_alloc_fd(struct files_struct *files, u64 n); extern void files_cgroup_unalloc_fd(struct files_struct *files, u64 n);
-extern struct files_struct init_files; extern void files_cgroup_assign(struct files_struct *files); extern void files_cgroup_remove(struct files_struct *files);
diff --git a/include/linux/filescontrol.h b/include/linux/filescontrol.h index 7eeb25f8ecb1..e1245bb8b906 100644 --- a/include/linux/filescontrol.h +++ b/include/linux/filescontrol.h @@ -15,6 +15,7 @@ u64 file_cg_count_fds(struct files_struct *files); #ifdef CONFIG_CGROUP_FILES extern int files_cg_alloc_fd(struct files_struct *files, u64 n); extern void files_cg_unalloc_fd(struct files_struct *files, u64 n); +extern struct files_struct init_files;
extern void files_cg_assign(struct files_struct *files); extern void files_cg_remove(struct files_struct *files); diff --git a/include/linux/misc-fd.h b/include/linux/misc-fd.h new file mode 100644 index 000000000000..3192d580987c --- /dev/null +++ b/include/linux/misc-fd.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later OR BSD-2-Clause */ +/* + * Copyright (c) 2024-2024, Huawei Tech. Co., Ltd. + * + * Author: Ridong Chen chenridong@huawei.com + */ + +#ifndef __FD_MISC_H_ +#define __FD_MISC_H_ + +#include <linux/fdtable.h> +int misc_fd_alloc_fd(struct files_struct *files, u64 n); +void misc_fd_unalloc_fd(struct files_struct *files, u64 n); + +void misc_fd_assign(struct files_struct *files); +void misc_fd_remove(struct files_struct *files); + +int misc_fd_dup_fds(struct files_struct *newf); +void misc_fd_put_fd(struct files_struct *files, unsigned int fd); + +#endif diff --git a/include/linux/misc_cgroup.h b/include/linux/misc_cgroup.h index e799b1f8d05b..5c320679040f 100644 --- a/include/linux/misc_cgroup.h +++ b/include/linux/misc_cgroup.h @@ -17,6 +17,10 @@ enum misc_res_type { MISC_CG_RES_SEV, /* AMD SEV-ES ASIDs resource */ MISC_CG_RES_SEV_ES, +#endif +#ifdef CONFIG_CGROUP_FILES + /* fd for file control */ + MISC_CG_RES_FD, #endif MISC_CG_RES_TYPES }; diff --git a/kernel/cgroup/misc.c b/kernel/cgroup/misc.c index 79a3717a5803..ccd2b04edab3 100644 --- a/kernel/cgroup/misc.c +++ b/kernel/cgroup/misc.c @@ -24,6 +24,10 @@ static const char *const misc_res_name[] = { /* AMD SEV-ES ASIDs resource */ "sev_es", #endif +#ifdef CONFIG_CGROUP_FILES + /* fd for file control */ + "fd", +#endif };
/* Root misc cgroup */
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I8OIQR
----------------------------------------------------------------------
If files_cgroup of file is not inited, and migrate process, it will lead to null pointer access. PGD 0 P4D 0 Oops: 0002 [#1] PREEMPT SMP NOPTI CPU: 5 PID: 453 Comm: bash Not tainted 6.6.0-02775-g7381ac6593bc-di8 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14 RIP: 0010:page_counter_uncharge+0x1c/0x80 Code: 90 90 90 90 90 90 90 90 90 90 90 90 90 90 66 0f 1f 00 41 54 4d RSP: 0018:ffffc9000081bc88 EFLAGS: 00000206 RAX: 0000000000000005 RBX: 0000000000000100 RCX: 0000000000000005 RDX: ffff888104b8d620 RSI: fffffffffffffffb RDI: 0000000000000100 RBP: 0000000000000005 R08: ffffc9000081bd70 R09: ffffc9000081bcb0 R10: ffff88810736f740 R11: 0000000000000001 R12: fffffffffffffffb R13: ffff888100bdc218 R14: 0000000000000100 R15: ffff888107481c00 FS: 00007fb78e5b9740(0000) GS:ffff888237b40000(0000) knlGS:00000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000100 CR3: 00000001012c2000 CR4: 00000000000006e0 Call Trace: <TASK> ? __die+0x1f/0x70 ? page_fault_oops+0x156/0x420 ? search_exception_tables+0x37/0x50 ? fixup_exception+0x21/0x310 ? exc_page_fault+0x69/0x150 ? asm_exc_page_fault+0x26/0x30 ? page_counter_uncharge+0x1c/0x80 files_cgroup_can_attach+0x98/0x1c0 cgroup_migrate_execute+0x79/0x470 ? cgroup_migrate_prepare_dst+0x43/0x220 cgroup_attach_task+0x141/0x1f0 __cgroup1_procs_write.constprop.0+0x10a/0x150 kernfs_fop_write_iter+0x117/0x1b0 vfs_write+0x2bd/0x3e0 ksys_write+0x5e/0xe0 do_syscall_64+0x3f/0x90 entry_SYSCALL_64_after_hwframe+0x6e/0xd8
Signed-off-by: Chen Ridong chenridong@huawei.com --- fs/files-cgroup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/files-cgroup.c b/fs/files-cgroup.c index dbf7452423a6..26c54ae6defe 100644 --- a/fs/files-cgroup.c +++ b/fs/files-cgroup.c @@ -115,7 +115,7 @@ static int files_cgroup_can_attach(struct cgroup_taskset *tset)
task_lock(task); files = task->files; - if (!files || files == &init_files) { + if (!files || !files->files_cgroup || files == &init_files) { task_unlock(task); return 0; }