V2: Add a new misc resource for controlling fd.
Chen Ridong (4): filescgroup: remove files of dfl_cftypes. filecgroup: move code of filescontrol.c to files-cgroup.c misc: add MISC_CG_RES_FD resource to control fd. Documentation: add misc resource statement.
Documentation/admin-guide/cgroup-v2.rst | 7 + fs/Makefile | 2 + fs/file.c | 16 +- fs/files-cgroup.c | 335 +++++++++++++++++++++++ fs/filescontrol.c | 339 +++--------------------- fs/misc-fd.c | 92 +++++++ include/linux/fdtable.h | 1 + include/linux/files-cgroup.h | 33 +++ include/linux/filescontrol.h | 45 ++-- include/linux/misc-fd.h | 23 ++ include/linux/misc_cgroup.h | 2 + kernel/cgroup/misc.c | 2 + 12 files changed, 563 insertions(+), 334 deletions(-) create mode 100644 fs/files-cgroup.c create mode 100644 fs/misc-fd.c create mode 100644 include/linux/files-cgroup.h create mode 100644 include/linux/misc-fd.h
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8OIQR
----------------------------------------------------------------------
Cgroup v2 will not support filescgroup, so just remove files of dfl_cftypes. As a replacement, misc fd will be used to control fd in the same way.
Signed-off-by: Chen Ridong chenridong@huawei.com --- fs/filescontrol.c | 1 - 1 file changed, 1 deletion(-)
diff --git a/fs/filescontrol.c b/fs/filescontrol.c index 0947566b9119..18924673b806 100644 --- a/fs/filescontrol.c +++ b/fs/filescontrol.c @@ -306,7 +306,6 @@ struct cgroup_subsys files_cgrp_subsys = { .css_free = files_cgroup_css_free, .can_attach = files_cgroup_can_attach, .legacy_cftypes = files, - .dfl_cftypes = files, };
/*
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8OIQR
----------------------------------------------------------------------
Move code of filescontrol.c/filescontrol.h to files-cgroup.c /files-cgroup.h. For replacing filescgroup with misc fd in the future, rename functions that are used by files.c from files_cgroup_xxx to files_cg_xxx, files_cg_xxx just calls files_cgroup_xxx.
Signed-off-by: Chen Ridong chenridong@huawei.com --- fs/Makefile | 1 + fs/file.c | 16 +- fs/files-cgroup.c | 335 +++++++++++++++++++++++++++++++++++ fs/filescontrol.c | 330 ++-------------------------------- include/linux/files-cgroup.h | 33 ++++ include/linux/filescontrol.h | 45 ++--- 6 files changed, 414 insertions(+), 346 deletions(-) create mode 100644 fs/files-cgroup.c create mode 100644 include/linux/files-cgroup.h
diff --git a/fs/Makefile b/fs/Makefile index db63c24fae57..c4e433f8c46e 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -48,6 +48,7 @@ obj-$(CONFIG_DIRTY_PAGES) += dirty_pages.o
obj-$(CONFIG_FHANDLE) += fhandle.o obj-$(CONFIG_CGROUP_FILES) += filescontrol.o +obj-$(CONFIG_CGROUP_FILES) += files-cgroup.o obj-y += iomap/
obj-y += quota/ diff --git a/fs/file.c b/fs/file.c index 4419f6975c96..d560e45fde40 100644 --- a/fs/file.c +++ b/fs/file.c @@ -338,7 +338,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, unsigned int max_fds, int new_fdt->open_fds = newf->open_fds_init; new_fdt->full_fds_bits = newf->full_fds_bits_init; new_fdt->fd = &newf->fd_array[0]; - files_cgroup_assign(newf); + files_cg_assign(newf);
spin_lock(&oldf->file_lock); old_fdt = files_fdtable(oldf); @@ -403,7 +403,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, unsigned int max_fds, int
rcu_assign_pointer(newf->fdt, new_fdt);
- if (files_cgroup_dup_fds(newf)) { + if (files_cg_dup_fds(newf)) { /* could not get enough FD resources. Need to clean up. */ new_fds = new_fdt->fd; for (i = open_files; i != 0; i--) { @@ -419,7 +419,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, unsigned int max_fds, int } return newf; out_release: - files_cgroup_remove(newf); + files_cg_remove(newf); kmem_cache_free(files_cachep, newf); out: return NULL; @@ -445,7 +445,7 @@ static struct fdtable *close_files(struct files_struct * files) if (set & 1) { struct file * file = xchg(&fdt->fd[i], NULL); if (file) { - files_cgroup_unalloc_fd(files, 1); + files_cg_unalloc_fd(files, 1); filp_close(file, files); cond_resched(); } @@ -454,7 +454,7 @@ static struct fdtable *close_files(struct files_struct * files) set >>= 1; } } - files_cgroup_remove(files); + files_cg_remove(files); return fdt; }
@@ -548,7 +548,7 @@ static int alloc_fd(unsigned start, unsigned end, unsigned flags) */ if (error) goto repeat; - if (files_cgroup_alloc_fd(files, 1)) { + if (files_cg_alloc_fd(files, 1)) { error = -EMFILE; goto out; } @@ -590,7 +590,7 @@ static void __put_unused_fd(struct files_struct *files, unsigned int fd) { struct fdtable *fdt = files_fdtable(files);
- files_cgroup_put_fd(files, fd); + files_cg_put_fd(files, fd); __clear_open_fd(fd, fdt); if (fd < files->next_fd) files->next_fd = fd; @@ -1154,7 +1154,7 @@ __releases(&files->file_lock) goto out; }
- if (!tofree && files_cgroup_alloc_fd(files, 1)) { + if (!tofree && files_cg_alloc_fd(files, 1)) { err = -EMFILE; goto out; } diff --git a/fs/files-cgroup.c b/fs/files-cgroup.c new file mode 100644 index 000000000000..dbf7452423a6 --- /dev/null +++ b/fs/files-cgroup.c @@ -0,0 +1,335 @@ +// SPDX-License-Identifier: GPL-2.0 +/* filescontrol.c - Cgroup controller for open file handles. + * + * Copyright 2014 Google Inc. + * Author: Brian Makin merimus@google.com + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/page_counter.h> +#include <linux/filescontrol.h> +#include <linux/files-cgroup.h> +#include <linux/cgroup.h> +#include <linux/export.h> +#include <linux/printk.h> +#include <linux/slab.h> +#include <linux/fs.h> +#include <linux/seq_file.h> +#include <linux/fdtable.h> +#include <linux/sched/signal.h> +#include <linux/module.h> + +#define FILES_MAX D_COUNT_MAX +#define FILES_MAX_STR "max" + +static bool no_acct; +struct cgroup_subsys files_cgrp_subsys __read_mostly; + +module_param(no_acct, bool, 0444); + +struct files_cgroup { + struct cgroup_subsys_state css; + struct page_counter open_handles; +}; + +static inline bool files_cgroup_enabled(void) +{ + return cgroup_subsys_enabled(files_cgrp_subsys); +} + +static inline struct files_cgroup *css_fcg(struct cgroup_subsys_state *css) +{ + return css ? container_of(css, struct files_cgroup, css) : NULL; +} + +static inline struct page_counter * +css_res_open_handles(struct cgroup_subsys_state *css) +{ + return &css_fcg(css)->open_handles; +} + +static inline struct files_cgroup * +files_cgroup_from_files(struct files_struct *files) +{ + return files->files_cgroup; +} + + +static struct cgroup_subsys_state * +files_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) +{ + struct files_cgroup *parent_fcg; + struct files_cgroup *fcg; + + parent_fcg = css_fcg(parent_css); + fcg = kzalloc(sizeof(*fcg), GFP_KERNEL); + if (!fcg) + goto out; + + if (!parent_fcg) { + page_counter_init(&fcg->open_handles, NULL); + page_counter_set_max(&fcg->open_handles, FILES_MAX); + } else { + struct page_counter *p_counter = &parent_fcg->open_handles; + + page_counter_init(&fcg->open_handles, p_counter); + page_counter_set_max(&fcg->open_handles, FILES_MAX); + } + return &fcg->css; + +out: + return ERR_PTR(-ENOMEM); +} + +static void files_cgroup_css_free(struct cgroup_subsys_state *css) +{ + kfree(css_fcg(css)); +} + +/* + * If attaching this cgroup would overcommit the resource then deny + * the attach. If not, attach the file resource into new cgroup. + */ +static int files_cgroup_can_attach(struct cgroup_taskset *tset) +{ + u64 num_files; + bool can_attach; + struct cgroup_subsys_state *to_css; + struct cgroup_subsys_state *from_css; + struct page_counter *from_res; + struct page_counter *to_res; + struct page_counter *fail_res; + struct files_struct *files; + struct task_struct *task = cgroup_taskset_first(tset, &to_css); + + to_res = css_res_open_handles(to_css); + + task_lock(task); + files = task->files; + if (!files || files == &init_files) { + task_unlock(task); + return 0; + } + + from_css = &files_cgroup_from_files(files)->css; + from_res = css_res_open_handles(from_css); + + spin_lock(&files->file_lock); + num_files = file_cg_count_fds(files); + page_counter_uncharge(from_res, num_files); + + if (!page_counter_try_charge(to_res, num_files, &fail_res)) { + page_counter_charge(from_res, num_files); + pr_err("Open files limit overcommited\n"); + can_attach = false; + } else { + css_put(from_css); + css_get(to_css); + task->files->files_cgroup = css_fcg(to_css); + can_attach = true; + } + spin_unlock(&files->file_lock); + task_unlock(task); + return can_attach ? 0 : -ENOSPC; +} + +int files_cgroup_alloc_fd(struct files_struct *files, u64 n) +{ + if (!files_cgroup_enabled()) + return 0; + /* + * Kernel threads which are forked by kthreadd inherited the + * const files_struct 'init_files', we didn't wrap it so + * there's no associated files_cgroup. + * + * Kernel threads always stay in root cgroup, and we don't + * have limit for root files cgroup, so it won't hurt if + * we don't charge their fds, only issue is that files.usage + * won't be accurate in root files cgroup. + */ + if (!no_acct && files != &init_files) { + struct page_counter *fail_res; + struct files_cgroup *files_cgroup = + files_cgroup_from_files(files); + if (!page_counter_try_charge(&files_cgroup->open_handles, + n, &fail_res)) + return -ENOMEM; + } + return 0; +} + +void files_cgroup_unalloc_fd(struct files_struct *files, u64 n) +{ + if (!files_cgroup_enabled()) + return; + /* + * It's not charged so no need to uncharge, see comments in + * files_cgroup_alloc_fd. + */ + if (!no_acct && files != &init_files) { + struct files_cgroup *files_cgroup = + files_cgroup_from_files(files); + page_counter_uncharge(&files_cgroup->open_handles, n); + } +} + +void files_cgroup_put_fd(struct files_struct *files, unsigned int fd) +{ + struct fdtable *fdt = files_fdtable(files); + + if (!files_cgroup_enabled()) + return; + + if (test_bit(fd, fdt->open_fds)) + return files_cgroup_unalloc_fd(files, 1); +} + +int files_cgroup_dup_fds(struct files_struct *newf) +{ + int err; + + if (!files_cgroup_enabled()) + return 0; + spin_lock(&newf->file_lock); + err = files_cgroup_alloc_fd(newf, file_cg_count_fds(newf)); + spin_unlock(&newf->file_lock); + return err; +} + +static u64 files_disabled_read(struct cgroup_subsys_state *css, + struct cftype *cft) +{ + return no_acct; +} + +static int files_disabled_write(struct cgroup_subsys_state *css, + struct cftype *cft, u64 val) +{ + if (!val) + return -EINVAL; + no_acct = true; + + return 0; +} + +static int files_limit_read(struct seq_file *sf, void *v) +{ + struct files_cgroup *fcg = css_fcg(seq_css(sf)); + struct page_counter *counter = &fcg->open_handles; + u64 limit = counter->max; + + if (limit >= FILES_MAX) + seq_printf(sf, "%s\n", FILES_MAX_STR); + else + seq_printf(sf, "%llu\n", limit); + + return 0; +} + +static ssize_t files_limit_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) +{ + struct files_cgroup *fcg = css_fcg(of_css(of)); + u64 limit; + int err; + + buf = strstrip((char *)buf); + if (!strcmp(buf, FILES_MAX_STR)) { + limit = FILES_MAX; + goto set_limit; + } + + err = kstrtoull(buf, 0, &limit); + if (err) + return err; + +set_limit: + /* + * Limit updates don't need to be mutex'd, since it isn't + * critical that any racing fork()s follow the new limit. + */ + page_counter_set_max(&fcg->open_handles, limit); + return nbytes; +} + + +static u64 files_usage_read(struct cgroup_subsys_state *css, + struct cftype *cft) +{ + struct files_cgroup *fcg = css_fcg(css); + + return page_counter_read(&fcg->open_handles); +} + +static struct cftype files[] = { + { + .name = "limit", + .seq_show = files_limit_read, + .write = files_limit_write, + .flags = CFTYPE_NOT_ON_ROOT, + }, + { + .name = "usage", + .read_u64 = files_usage_read, + }, + { + .name = "no_acct", + .flags = CFTYPE_ONLY_ON_ROOT, + .read_u64 = files_disabled_read, + .write_u64 = files_disabled_write, + }, + { } +}; + +struct cgroup_subsys files_cgrp_subsys = { + .css_alloc = files_cgroup_css_alloc, + .css_free = files_cgroup_css_free, + .can_attach = files_cgroup_can_attach, + .legacy_cftypes = files, +}; + +/* + * It could race against cgroup migration of current task, and + * using task_get_css() to get a valid css. + */ +void files_cgroup_assign(struct files_struct *files) +{ + struct cgroup_subsys_state *css; + + if (!files_cgroup_enabled()) + return; + + if (files == &init_files) + return; + + css = task_get_css(current, files_cgrp_id); + files->files_cgroup = container_of(css, struct files_cgroup, css); +} + +void files_cgroup_remove(struct files_struct *files) +{ + struct task_struct *tsk = current; + struct files_cgroup *fcg; + + if (!files_cgroup_enabled()) + return; + + if (files == &init_files) + return; + + task_lock(tsk); + spin_lock(&files->file_lock); + fcg = files_cgroup_from_files(files); + css_put(&fcg->css); + spin_unlock(&files->file_lock); + task_unlock(tsk); +} diff --git a/fs/filescontrol.c b/fs/filescontrol.c index 18924673b806..35fb2fe9c2d0 100644 --- a/fs/filescontrol.c +++ b/fs/filescontrol.c @@ -1,100 +1,15 @@ -// SPDX-License-Identifier: GPL-2.0 -/* filescontrol.c - Cgroup controller for open file handles. - * - * Copyright 2014 Google Inc. - * Author: Brian Makin merimus@google.com - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. +/* SPDX-License-Identifier: GPL-2.0-or-later OR BSD-2-Clause */ +/* + * Copyright (c) 2024-2024, Huawei Tech. Co., Ltd. * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * Author: Ridong Chen chenridong@huawei.com */
-#include <linux/page_counter.h> -#include <linux/filescontrol.h> -#include <linux/cgroup.h> -#include <linux/export.h> -#include <linux/printk.h> -#include <linux/slab.h> -#include <linux/fs.h> -#include <linux/seq_file.h> #include <linux/fdtable.h> -#include <linux/sched/signal.h> -#include <linux/module.h> - -#define FILES_MAX D_COUNT_MAX -#define FILES_MAX_STR "max" - -static bool no_acct; -struct cgroup_subsys files_cgrp_subsys __read_mostly; - -module_param(no_acct, bool, 0444); - -struct files_cgroup { - struct cgroup_subsys_state css; - struct page_counter open_handles; -}; - -static inline bool files_cgroup_enabled(void) -{ - return cgroup_subsys_enabled(files_cgrp_subsys); -} - -static inline struct files_cgroup *css_fcg(struct cgroup_subsys_state *css) -{ - return css ? container_of(css, struct files_cgroup, css) : NULL; -} - -static inline struct page_counter * -css_res_open_handles(struct cgroup_subsys_state *css) -{ - return &css_fcg(css)->open_handles; -} - -static inline struct files_cgroup * -files_cgroup_from_files(struct files_struct *files) -{ - return files->files_cgroup; -} - - -static struct cgroup_subsys_state * -files_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) -{ - struct files_cgroup *parent_fcg; - struct files_cgroup *fcg; - - parent_fcg = css_fcg(parent_css); - fcg = kzalloc(sizeof(*fcg), GFP_KERNEL); - if (!fcg) - goto out; - - if (!parent_fcg) { - page_counter_init(&fcg->open_handles, NULL); - page_counter_set_max(&fcg->open_handles, FILES_MAX); - } else { - struct page_counter *p_counter = &parent_fcg->open_handles; - - page_counter_init(&fcg->open_handles, p_counter); - page_counter_set_max(&fcg->open_handles, FILES_MAX); - } - return &fcg->css; - -out: - return ERR_PTR(-ENOMEM); -} - -static void files_cgroup_css_free(struct cgroup_subsys_state *css) -{ - kfree(css_fcg(css)); -} +#include <linux/filescontrol.h> +#include <linux/files-cgroup.h>
-u64 files_cgroup_count_fds(struct files_struct *files) +u64 file_cg_count_fds(struct files_struct *files) { int i; struct fdtable *fdt; @@ -106,241 +21,34 @@ u64 files_cgroup_count_fds(struct files_struct *files) return retval; }
-/* - * If attaching this cgroup would overcommit the resource then deny - * the attach. If not, attach the file resource into new cgroup. - */ -static int files_cgroup_can_attach(struct cgroup_taskset *tset) -{ - u64 num_files; - bool can_attach; - struct cgroup_subsys_state *to_css; - struct cgroup_subsys_state *from_css; - struct page_counter *from_res; - struct page_counter *to_res; - struct page_counter *fail_res; - struct files_struct *files; - struct task_struct *task = cgroup_taskset_first(tset, &to_css); - - to_res = css_res_open_handles(to_css); - - task_lock(task); - files = task->files; - if (!files || files == &init_files) { - task_unlock(task); - return 0; - } - - from_css = &files_cgroup_from_files(files)->css; - from_res = css_res_open_handles(from_css); - - spin_lock(&files->file_lock); - num_files = files_cgroup_count_fds(files); - page_counter_uncharge(from_res, num_files); - - if (!page_counter_try_charge(to_res, num_files, &fail_res)) { - page_counter_charge(from_res, num_files); - pr_err("Open files limit overcommited\n"); - can_attach = false; - } else { - css_put(from_css); - css_get(to_css); - task->files->files_cgroup = css_fcg(to_css); - can_attach = true; - } - spin_unlock(&files->file_lock); - task_unlock(task); - return can_attach ? 0 : -ENOSPC; -} - -int files_cgroup_alloc_fd(struct files_struct *files, u64 n) -{ - if (!files_cgroup_enabled()) - return 0; - /* - * Kernel threads which are forked by kthreadd inherited the - * const files_struct 'init_files', we didn't wrap it so - * there's no associated files_cgroup. - * - * Kernel threads always stay in root cgroup, and we don't - * have limit for root files cgroup, so it won't hurt if - * we don't charge their fds, only issue is that files.usage - * won't be accurate in root files cgroup. - */ - if (!no_acct && files != &init_files) { - struct page_counter *fail_res; - struct files_cgroup *files_cgroup = - files_cgroup_from_files(files); - if (!page_counter_try_charge(&files_cgroup->open_handles, - n, &fail_res)) - return -ENOMEM; - } - return 0; -} - -void files_cgroup_unalloc_fd(struct files_struct *files, u64 n) -{ - if (!files_cgroup_enabled()) - return; - /* - * It's not charged so no need to uncharge, see comments in - * files_cgroup_alloc_fd. - */ - if (!no_acct && files != &init_files) { - struct files_cgroup *files_cgroup = - files_cgroup_from_files(files); - page_counter_uncharge(&files_cgroup->open_handles, n); - } -} - -void files_cgroup_put_fd(struct files_struct *files, unsigned int fd) -{ - struct fdtable *fdt = files_fdtable(files); - - if (!files_cgroup_enabled()) - return; - - if (test_bit(fd, fdt->open_fds)) - return files_cgroup_unalloc_fd(files, 1); -} - -int files_cgroup_dup_fds(struct files_struct *newf) -{ - int err; - - if (!files_cgroup_enabled()) - return 0; - spin_lock(&newf->file_lock); - err = files_cgroup_alloc_fd(newf, files_cgroup_count_fds(newf)); - spin_unlock(&newf->file_lock); - return err; -} - -static u64 files_disabled_read(struct cgroup_subsys_state *css, - struct cftype *cft) +int files_cg_alloc_fd(struct files_struct *files, u64 n) { - return no_acct; + return files_cgroup_alloc_fd(files, n); }
-static int files_disabled_write(struct cgroup_subsys_state *css, - struct cftype *cft, u64 val) +void files_cg_unalloc_fd(struct files_struct *files, u64 n) { - if (!val) - return -EINVAL; - no_acct = true; - - return 0; + return files_cgroup_unalloc_fd(files, n); }
-static int files_limit_read(struct seq_file *sf, void *v) +void files_cg_assign(struct files_struct *files) { - struct files_cgroup *fcg = css_fcg(seq_css(sf)); - struct page_counter *counter = &fcg->open_handles; - u64 limit = counter->max; - - if (limit >= FILES_MAX) - seq_printf(sf, "%s\n", FILES_MAX_STR); - else - seq_printf(sf, "%llu\n", limit); - - return 0; + return files_cgroup_assign(files); }
-static ssize_t files_limit_write(struct kernfs_open_file *of, - char *buf, size_t nbytes, loff_t off) +void files_cg_remove(struct files_struct *files) { - struct files_cgroup *fcg = css_fcg(of_css(of)); - u64 limit; - int err; - - buf = strstrip((char *)buf); - if (!strcmp(buf, FILES_MAX_STR)) { - limit = FILES_MAX; - goto set_limit; - } - - err = kstrtoull(buf, 0, &limit); - if (err) - return err; - -set_limit: - /* - * Limit updates don't need to be mutex'd, since it isn't - * critical that any racing fork()s follow the new limit. - */ - page_counter_set_max(&fcg->open_handles, limit); - return nbytes; + return files_cgroup_remove(files); }
- -static u64 files_usage_read(struct cgroup_subsys_state *css, - struct cftype *cft) +int files_cg_dup_fds(struct files_struct *newf) { - struct files_cgroup *fcg = css_fcg(css); - - return page_counter_read(&fcg->open_handles); + return files_cgroup_dup_fds(newf); }
-static struct cftype files[] = { - { - .name = "limit", - .seq_show = files_limit_read, - .write = files_limit_write, - .flags = CFTYPE_NOT_ON_ROOT, - }, - { - .name = "usage", - .read_u64 = files_usage_read, - }, - { - .name = "no_acct", - .flags = CFTYPE_ONLY_ON_ROOT, - .read_u64 = files_disabled_read, - .write_u64 = files_disabled_write, - }, - { } -}; - -struct cgroup_subsys files_cgrp_subsys = { - .css_alloc = files_cgroup_css_alloc, - .css_free = files_cgroup_css_free, - .can_attach = files_cgroup_can_attach, - .legacy_cftypes = files, -}; - -/* - * It could race against cgroup migration of current task, and - * using task_get_css() to get a valid css. - */ -void files_cgroup_assign(struct files_struct *files) +void files_cg_put_fd(struct files_struct *files, unsigned int fd) { - struct cgroup_subsys_state *css; - - if (!files_cgroup_enabled()) - return; - - if (files == &init_files) - return; - - css = task_get_css(current, files_cgrp_id); - files->files_cgroup = container_of(css, struct files_cgroup, css); + return files_cgroup_put_fd(files, fd); }
-void files_cgroup_remove(struct files_struct *files) -{ - struct task_struct *tsk = current; - struct files_cgroup *fcg; - - if (!files_cgroup_enabled()) - return;
- if (files == &init_files) - return; - - task_lock(tsk); - spin_lock(&files->file_lock); - fcg = files_cgroup_from_files(files); - css_put(&fcg->css); - spin_unlock(&files->file_lock); - task_unlock(tsk); -} diff --git a/include/linux/files-cgroup.h b/include/linux/files-cgroup.h new file mode 100644 index 000000000000..771bf6d4b08f --- /dev/null +++ b/include/linux/files-cgroup.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* filescontrol.h - Files Controller + * + * Copyright 2014 Google Inc. + * Author: Brian Makin merimus@google.com + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef __FILES_CGROUP_H_ +#define __FILES_CGROUP_H_ + +#include <linux/fdtable.h> + +extern int files_cgroup_alloc_fd(struct files_struct *files, u64 n); +extern void files_cgroup_unalloc_fd(struct files_struct *files, u64 n); + +extern struct files_struct init_files; +extern void files_cgroup_assign(struct files_struct *files); +extern void files_cgroup_remove(struct files_struct *files); + +extern int files_cgroup_dup_fds(struct files_struct *newf); +extern void files_cgroup_put_fd(struct files_struct *files, unsigned int fd); + +#endif /* __FILES_CGROUP_H_ */ diff --git a/include/linux/filescontrol.h b/include/linux/filescontrol.h index 0274349bd918..7eeb25f8ecb1 100644 --- a/include/linux/filescontrol.h +++ b/include/linux/filescontrol.h @@ -1,18 +1,8 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* filescontrol.h - Files Controller +/* SPDX-License-Identifier: GPL-2.0-or-later OR BSD-2-Clause */ +/* + * Copyright (c) 2024-2024, Huawei Tech. Co., Ltd. * - * Copyright 2014 Google Inc. - * Author: Brian Makin merimus@google.com - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * Author: Ridong Chen chenridong@huawei.com */
#ifndef _LINUX_FILESCONTROL_H @@ -20,25 +10,26 @@
#include <linux/fdtable.h>
+u64 file_cg_count_fds(struct files_struct *files); + #ifdef CONFIG_CGROUP_FILES -extern int files_cgroup_alloc_fd(struct files_struct *files, u64 n); -extern void files_cgroup_unalloc_fd(struct files_struct *files, u64 n); +extern int files_cg_alloc_fd(struct files_struct *files, u64 n); +extern void files_cg_unalloc_fd(struct files_struct *files, u64 n);
-extern struct files_struct init_files; -extern void files_cgroup_assign(struct files_struct *files); -extern void files_cgroup_remove(struct files_struct *files); +extern void files_cg_assign(struct files_struct *files); +extern void files_cg_remove(struct files_struct *files);
-extern int files_cgroup_dup_fds(struct files_struct *newf); -extern void files_cgroup_put_fd(struct files_struct *files, unsigned int fd); +extern int files_cg_dup_fds(struct files_struct *newf); +extern void files_cg_put_fd(struct files_struct *files, unsigned int fd); #else /* no CONFIG_CGROUP_FILES */ -static inline int files_cgroup_alloc_fd(struct files_struct *files, u64 n) { return 0; }; -static inline void files_cgroup_unalloc_fd(struct files_struct *files, u64 n) {}; +static inline int files_cg_alloc_fd(struct files_struct *files, u64 n) { return 0; }; +static inline void files_cg_unalloc_fd(struct files_struct *files, u64 n) {};
-static inline void files_cgroup_assign(struct files_struct *files) {}; -static inline void files_cgroup_remove(struct files_struct *files) {}; +static inline void files_cg_assign(struct files_struct *files) {}; +static inline void files_cg_remove(struct files_struct *files) {};
-static inline int files_cgroup_dup_fds(struct files_struct *newf) { return 0; }; -static inline void files_cgroup_put_fd(struct files_struct *files, unsigned int fd) {}; +static inline int files_cg_dup_fds(struct files_struct *newf) { return 0; }; +static inline void files_cg_put_fd(struct files_struct *files, unsigned int fd) {}; #endif /* CONFIG_CGROUP_FILES */
#endif /* _LINUX_FILESCONTROL_H */
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8OIQR
----------------------------------------------------------------------
Filescgroup is used to control fd in cgroup v1 as cgroup controller, which will not be merged to mainline. It is better to control fd with misc, which provides the resource limiting and tracking mechanism for the scalar resources and already be merged, so add MISC_CG_RES_FD resource to control fd. Filescgroup will be abandoned in the future.
Signed-off-by: Chen Ridong chenridong@huawei.com --- fs/Makefile | 1 + fs/filescontrol.c | 34 ++++++++++++++ fs/misc-fd.c | 92 +++++++++++++++++++++++++++++++++++++ include/linux/fdtable.h | 1 + include/linux/misc-fd.h | 23 ++++++++++ include/linux/misc_cgroup.h | 2 + kernel/cgroup/misc.c | 2 + 7 files changed, 155 insertions(+) create mode 100644 fs/misc-fd.c create mode 100644 include/linux/misc-fd.h
diff --git a/fs/Makefile b/fs/Makefile index c4e433f8c46e..a66fa139c207 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -49,6 +49,7 @@ obj-$(CONFIG_DIRTY_PAGES) += dirty_pages.o obj-$(CONFIG_FHANDLE) += fhandle.o obj-$(CONFIG_CGROUP_FILES) += filescontrol.o obj-$(CONFIG_CGROUP_FILES) += files-cgroup.o +obj-$(CONFIG_CGROUP_FILES) += misc-fd.o obj-y += iomap/
obj-y += quota/ diff --git a/fs/filescontrol.c b/fs/filescontrol.c index 35fb2fe9c2d0..eb9f67fca68b 100644 --- a/fs/filescontrol.c +++ b/fs/filescontrol.c @@ -8,6 +8,14 @@ #include <linux/fdtable.h> #include <linux/filescontrol.h> #include <linux/files-cgroup.h> +#include <linux/misc-fd.h> + +struct static_key_false misc_fd_enable_key; + +static inline bool file_cg_misc_enabled(void) +{ + return static_branch_likely(&misc_fd_enable_key); +}
u64 file_cg_count_fds(struct files_struct *files) { @@ -23,32 +31,58 @@ u64 file_cg_count_fds(struct files_struct *files)
int files_cg_alloc_fd(struct files_struct *files, u64 n) { + if (file_cg_misc_enabled()) + return misc_fd_alloc_fd(files, n); + return files_cgroup_alloc_fd(files, n); }
void files_cg_unalloc_fd(struct files_struct *files, u64 n) { + if (file_cg_misc_enabled()) + return misc_fd_unalloc_fd(files, n); + return files_cgroup_unalloc_fd(files, n); }
void files_cg_assign(struct files_struct *files) { + if (file_cg_misc_enabled()) + return misc_fd_assign(files); + return files_cgroup_assign(files); }
void files_cg_remove(struct files_struct *files) { + if (file_cg_misc_enabled()) + return misc_fd_remove(files); + return files_cgroup_remove(files); }
int files_cg_dup_fds(struct files_struct *newf) { + if (file_cg_misc_enabled()) + return misc_fd_dup_fds(newf); + return files_cgroup_dup_fds(newf); }
void files_cg_put_fd(struct files_struct *files, unsigned int fd) { + if (file_cg_misc_enabled()) + return misc_fd_put_fd(files, fd); + return files_cgroup_put_fd(files, fd); }
+static int __init enable_misc_fd(char *s) +{ + static_branch_enable(&misc_fd_enable_key); + pr_info("file_cg enable misc to control fd\n"); + + return 1; +} +__setup("file_cg=misc", enable_misc_fd);
diff --git a/fs/misc-fd.c b/fs/misc-fd.c new file mode 100644 index 000000000000..b72dcd4852db --- /dev/null +++ b/fs/misc-fd.c @@ -0,0 +1,92 @@ +// SPDX-License-Identifier: GPL-2.0-or-later OR BSD-2-Clause +/* + * Copyright (c) 2024-2024, Huawei Tech. Co., Ltd. + * + * Author: Ridong Chen chenridong@huawei.com + */ + +#include <linux/misc_cgroup.h> +#include <linux/fdtable.h> +#include <linux/filescontrol.h> +#include <linux/misc-fd.h> + +static bool is_init; + +bool is_misc_fd_init(void) +{ + return is_init; +} + +void misc_fd_init(void) +{ + misc_cg_set_capacity(MISC_CG_RES_FD, U64_MAX); + is_init = true; +} + +/* + * If first time to alloc,it has to init capacity + */ +int misc_fd_alloc_fd(struct files_struct *files, u64 n) +{ + if (!is_misc_fd_init()) + misc_fd_init(); + if (files != &init_files) + return misc_cg_try_charge(MISC_CG_RES_FD, files->misc_cg, n); + return 0; +} + +void misc_fd_unalloc_fd(struct files_struct *files, u64 n) +{ + if (files != &init_files) + return misc_cg_uncharge(MISC_CG_RES_FD, files->misc_cg, n); +} + +void misc_fd_assign(struct files_struct *files) +{ + struct cgroup_subsys_state *css; + + if (files == NULL || files == &init_files) + return; + + css = task_get_css(current, misc_cgrp_id); + files->misc_cg = (css ? container_of(css, struct misc_cg, css) : NULL); +} + +void misc_fd_remove(struct files_struct *files) +{ + struct task_struct *tsk = current; + + if (files == &init_files) + return; + + task_lock(tsk); + spin_lock(&files->file_lock); + if (files->misc_cg != NULL) + css_put(&files->misc_cg->css); + spin_unlock(&files->file_lock); + task_unlock(tsk); +} + +int misc_fd_dup_fds(struct files_struct *newf) +{ + int err; + + if (newf == &init_files) + return 0; + + spin_lock(&newf->file_lock); + err = misc_fd_alloc_fd(newf, file_cg_count_fds(newf)); + spin_unlock(&newf->file_lock); + return err; +} + +void misc_fd_put_fd(struct files_struct *files, unsigned int fd) +{ + struct fdtable *fdt = files_fdtable(files); + + if (files == &init_files) + return; + + if (test_bit(fd, fdt->open_fds)) + return misc_fd_unalloc_fd(files, 1); +} diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index 22b8b03fef6d..07f4940206b1 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -66,6 +66,7 @@ struct files_struct { unsigned long full_fds_bits_init[1]; struct file __rcu * fd_array[NR_OPEN_DEFAULT]; struct files_cgroup *files_cgroup; + struct misc_cg *misc_cg; };
struct file_operations; diff --git a/include/linux/misc-fd.h b/include/linux/misc-fd.h new file mode 100644 index 000000000000..92c238fe8d3b --- /dev/null +++ b/include/linux/misc-fd.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later OR BSD-2-Clause */ +/* + * Copyright (c) 2024-2024, Huawei Tech. Co., Ltd. + * + * Author: Ridong Chen chenridong@huawei.com + */ + +#ifndef __FD_MISC_H_ +#define __FD_MISC_H_ + +#include <linux/fdtable.h> +extern struct files_struct init_files; + +extern int misc_fd_alloc_fd(struct files_struct *files, u64 n); +extern void misc_fd_unalloc_fd(struct files_struct *files, u64 n); + +extern void misc_fd_assign(struct files_struct *files); +extern void misc_fd_remove(struct files_struct *files); + +extern int misc_fd_dup_fds(struct files_struct *newf); +extern void misc_fd_put_fd(struct files_struct *files, unsigned int fd); + +#endif diff --git a/include/linux/misc_cgroup.h b/include/linux/misc_cgroup.h index e799b1f8d05b..c859a1ed583f 100644 --- a/include/linux/misc_cgroup.h +++ b/include/linux/misc_cgroup.h @@ -18,6 +18,8 @@ enum misc_res_type { /* AMD SEV-ES ASIDs resource */ MISC_CG_RES_SEV_ES, #endif + /* fd for file control */ + MISC_CG_RES_FD, MISC_CG_RES_TYPES };
diff --git a/kernel/cgroup/misc.c b/kernel/cgroup/misc.c index 79a3717a5803..366a12b85f03 100644 --- a/kernel/cgroup/misc.c +++ b/kernel/cgroup/misc.c @@ -24,6 +24,8 @@ static const char *const misc_res_name[] = { /* AMD SEV-ES ASIDs resource */ "sev_es", #endif + /* fd for file control */ + "fd", };
/* Root misc cgroup */
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8OIQR
----------------------------------------------------------------------
Add misc fd resource statement, and explain when to charge and uncharge.
Signed-off-by: Chen Ridong chenridong@huawei.com --- Documentation/admin-guide/cgroup-v2.rst | 7 +++++++ 1 file changed, 7 insertions(+)
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index e998aa071ced..dd92ccba20c2 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -2505,6 +2505,13 @@ first, and stays charged to that cgroup until that resource is freed. Migrating a process to a different cgroup does not move the charge to the destination cgroup where the process has moved.
+Misc type explaination +~~~~~~~~~~~~~~~~~~~~~~~ + + MISC_CG_RES_FD + Resource for controling fd. Fd is charged when alloc a new fd or copy files, + and uncharged when file is close or putting unused fd. + Others ------