hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8YC6O
----------------------------------------------------------------------
Filescgroup is used to control fd in cgroup v1, while in cgroup v2, it is better to control fd with misc, which provides the resource limiting and tracking mechanism for the scalar resources. Filescontrol.c was renamed to files-cgroup.c, filescontrol.h contains interfaces to outside, which decides using filescgroup or misc with config.
Signed-off-by: Chen Ridong chenridong@huawei.com --- fs/Makefile | 1 - fs/file.c | 16 +-- include/linux/fdtable.h | 1 + include/linux/files-cgroup.h | 33 +++++++ include/linux/filescontrol.h | 99 ++++++++++++++++--- include/linux/misc-fd.h | 22 +++++ include/linux/misc_cgroup.h | 4 + init/Kconfig | 10 ++ kernel/cgroup/Makefile | 2 + .../cgroup/files-cgroup.c | 18 +--- kernel/cgroup/misc-fd.c | 91 +++++++++++++++++ kernel/cgroup/misc.c | 4 + 12 files changed, 263 insertions(+), 38 deletions(-) create mode 100644 include/linux/files-cgroup.h create mode 100644 include/linux/misc-fd.h rename fs/filescontrol.c => kernel/cgroup/files-cgroup.c (95%) create mode 100644 kernel/cgroup/misc-fd.c
diff --git a/fs/Makefile b/fs/Makefile index cc4735c23c32..6246e173e1e4 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -47,7 +47,6 @@ obj-$(CONFIG_SYSCTL) += drop_caches.o sysctls.o obj-$(CONFIG_DIRTY_PAGES) += dirty_pages.o
obj-$(CONFIG_FHANDLE) += fhandle.o -obj-$(CONFIG_CGROUP_FILES) += filescontrol.o obj-y += iomap/
obj-y += quota/ diff --git a/fs/file.c b/fs/file.c index 4419f6975c96..2ad93f34120e 100644 --- a/fs/file.c +++ b/fs/file.c @@ -338,7 +338,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, unsigned int max_fds, int new_fdt->open_fds = newf->open_fds_init; new_fdt->full_fds_bits = newf->full_fds_bits_init; new_fdt->fd = &newf->fd_array[0]; - files_cgroup_assign(newf); + fc_assign(newf);
spin_lock(&oldf->file_lock); old_fdt = files_fdtable(oldf); @@ -403,7 +403,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, unsigned int max_fds, int
rcu_assign_pointer(newf->fdt, new_fdt);
- if (files_cgroup_dup_fds(newf)) { + if (fc_dup_fds(newf)) { /* could not get enough FD resources. Need to clean up. */ new_fds = new_fdt->fd; for (i = open_files; i != 0; i--) { @@ -419,7 +419,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, unsigned int max_fds, int } return newf; out_release: - files_cgroup_remove(newf); + fc_remove(newf); kmem_cache_free(files_cachep, newf); out: return NULL; @@ -445,7 +445,7 @@ static struct fdtable *close_files(struct files_struct * files) if (set & 1) { struct file * file = xchg(&fdt->fd[i], NULL); if (file) { - files_cgroup_unalloc_fd(files, 1); + fc_unalloc_fd(files, 1); filp_close(file, files); cond_resched(); } @@ -454,7 +454,7 @@ static struct fdtable *close_files(struct files_struct * files) set >>= 1; } } - files_cgroup_remove(files); + fc_remove(files); return fdt; }
@@ -548,7 +548,7 @@ static int alloc_fd(unsigned start, unsigned end, unsigned flags) */ if (error) goto repeat; - if (files_cgroup_alloc_fd(files, 1)) { + if (fc_alloc_fd(files, 1)) { error = -EMFILE; goto out; } @@ -590,7 +590,7 @@ static void __put_unused_fd(struct files_struct *files, unsigned int fd) { struct fdtable *fdt = files_fdtable(files);
- files_cgroup_put_fd(files, fd); + fc_put_fd(files, fd); __clear_open_fd(fd, fdt); if (fd < files->next_fd) files->next_fd = fd; @@ -1154,7 +1154,7 @@ __releases(&files->file_lock) goto out; }
- if (!tofree && files_cgroup_alloc_fd(files, 1)) { + if (!tofree && fc_alloc_fd(files, 1)) { err = -EMFILE; goto out; } diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index 22b8b03fef6d..07f4940206b1 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -66,6 +66,7 @@ struct files_struct { unsigned long full_fds_bits_init[1]; struct file __rcu * fd_array[NR_OPEN_DEFAULT]; struct files_cgroup *files_cgroup; + struct misc_cg *misc_cg; };
struct file_operations; diff --git a/include/linux/files-cgroup.h b/include/linux/files-cgroup.h new file mode 100644 index 000000000000..771bf6d4b08f --- /dev/null +++ b/include/linux/files-cgroup.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* filescontrol.h - Files Controller + * + * Copyright 2014 Google Inc. + * Author: Brian Makin merimus@google.com + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef __FILES_CGROUP_H_ +#define __FILES_CGROUP_H_ + +#include <linux/fdtable.h> + +extern int files_cgroup_alloc_fd(struct files_struct *files, u64 n); +extern void files_cgroup_unalloc_fd(struct files_struct *files, u64 n); + +extern struct files_struct init_files; +extern void files_cgroup_assign(struct files_struct *files); +extern void files_cgroup_remove(struct files_struct *files); + +extern int files_cgroup_dup_fds(struct files_struct *newf); +extern void files_cgroup_put_fd(struct files_struct *files, unsigned int fd); + +#endif /* __FILES_CGROUP_H_ */ diff --git a/include/linux/filescontrol.h b/include/linux/filescontrol.h index 0274349bd918..9e4caeacbfbd 100644 --- a/include/linux/filescontrol.h +++ b/include/linux/filescontrol.h @@ -19,26 +19,97 @@ #define _LINUX_FILESCONTROL_H
#include <linux/fdtable.h> +#include <linux/files-cgroup.h> +#include <linux/misc-fd.h> + +static inline u64 fc_count_fds(struct files_struct *files) +{ + int i; + struct fdtable *fdt; + int retval = 0; + + fdt = files_fdtable(files); + for (i = 0; i < DIV_ROUND_UP(fdt->max_fds, BITS_PER_LONG); i++) + retval += hweight64((__u64)fdt->open_fds[i]); + return retval; +}
#ifdef CONFIG_CGROUP_FILES -extern int files_cgroup_alloc_fd(struct files_struct *files, u64 n); -extern void files_cgroup_unalloc_fd(struct files_struct *files, u64 n); +#ifdef CONFIG_CGROUP_MISC_FD +static inline int fc_alloc_fd(struct files_struct *files, u64 n) +{ + return misc_fd_alloc_fd(files, n); +} +static inline void fc_unalloc_fd(struct files_struct *files, u64 n) +{ + return misc_fd_unalloc_fd(files, n); +} + +static inline void fc_assign(struct files_struct *files) +{ + return misc_fd_assign(files); +} + +static inline void fc_remove(struct files_struct *files) +{ + return misc_fd_assign(files); +} + +static inline int fc_dup_fds(struct files_struct *newf) +{ + return misc_fd_dup_fds(newf); +} + +static inline void fc_put_fd(struct files_struct *files, unsigned int fd) +{ + return misc_fd_put_fd(files, fd); +} +#else /* no CONFIG_CGROUP_MISC */ +static inline int fc_alloc_fd(struct files_struct *files, u64 n) +{ + return files_cgroup_alloc_fd(files, n); +}
-extern struct files_struct init_files; -extern void files_cgroup_assign(struct files_struct *files); -extern void files_cgroup_remove(struct files_struct *files); +static inline void fc_unalloc_fd(struct files_struct *files, u64 n) +{ + return files_cgroup_unalloc_fd(files, n); +}
-extern int files_cgroup_dup_fds(struct files_struct *newf); -extern void files_cgroup_put_fd(struct files_struct *files, unsigned int fd); +static inline void fc_assign(struct files_struct *files) +{ + return files_cgroup_assign(files); +} + +static inline void fc_remove(struct files_struct *files) +{ + return files_cgroup_assign(files); +} + +static inline int fc_dup_fds(struct files_struct *newf) +{ + return files_cgroup_dup_fds(newf); +} + +static inline void fc_put_fd(struct files_struct *files, unsigned int fd) +{ + return files_cgroup_put_fd(files, fd); +} + +#endif /* CONFIG_CGROUP_MISC */ #else /* no CONFIG_CGROUP_FILES */ -static inline int files_cgroup_alloc_fd(struct files_struct *files, u64 n) { return 0; }; -static inline void files_cgroup_unalloc_fd(struct files_struct *files, u64 n) {}; +static inline int fc_alloc_fd(struct files_struct *files, u64 n) +{ + return 0; +}; +static inline void fc_unalloc_fd(struct files_struct *files, u64 n) {};
-static inline void files_cgroup_assign(struct files_struct *files) {}; -static inline void files_cgroup_remove(struct files_struct *files) {}; +static inline void fc_assign(struct files_struct *files) {}; +static inline void fc_remove(struct files_struct *files) {};
-static inline int files_cgroup_dup_fds(struct files_struct *newf) { return 0; }; -static inline void files_cgroup_put_fd(struct files_struct *files, unsigned int fd) {}; +static inline int fc_dup_fds(struct files_struct *newf) +{ + return 0; +}; +static inline void fc_put_fd(struct files_struct *files, unsigned int fd) {}; #endif /* CONFIG_CGROUP_FILES */ - #endif /* _LINUX_FILESCONTROL_H */ diff --git a/include/linux/misc-fd.h b/include/linux/misc-fd.h new file mode 100644 index 000000000000..84f2dd8197ee --- /dev/null +++ b/include/linux/misc-fd.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later OR BSD-2-Clause */ +/* + * Copyright (c) 2024-2024, Huawei Tech. Co., Ltd. + * + * Author: Ridong Chen chenridong@huawei.com + */ + +#ifndef __FD_MISC_H_ +#define __FD_MISC_H_ + +#include <linux/fdtable.h> + +extern int misc_fd_alloc_fd(struct files_struct *files, u64 n); +extern void misc_fd_unalloc_fd(struct files_struct *files, u64 n); + +extern void misc_fd_assign(struct files_struct *files); +extern void misc_fd_remove(struct files_struct *files); + +extern int misc_fd_dup_fds(struct files_struct *newf); +extern void misc_fd_put_fd(struct files_struct *files, unsigned int fd); + +#endif diff --git a/include/linux/misc_cgroup.h b/include/linux/misc_cgroup.h index e799b1f8d05b..ff8d54f7dd1b 100644 --- a/include/linux/misc_cgroup.h +++ b/include/linux/misc_cgroup.h @@ -17,6 +17,10 @@ enum misc_res_type { MISC_CG_RES_SEV, /* AMD SEV-ES ASIDs resource */ MISC_CG_RES_SEV_ES, +#endif +#ifdef CONFIG_CGROUP_MISC_FD + /* fd for file control */ + MISC_FD, #endif MISC_CG_RES_TYPES }; diff --git a/init/Kconfig b/init/Kconfig index f6a08293f75b..3d8ed563d4ae 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1271,6 +1271,16 @@ config CGROUP_MISC For more information, please check misc cgroup section in /Documentation/admin-guide/cgroup-v2.rst.
+config CGROUP_MISC_FD + bool "Misc for controllling fd" + depends on CGROUP_MISC + default n + help + Privides a way to control fd. + + Misc as scalar resources controller, it is a good idea controlling + fd with it. + config CGROUP_DEBUG bool "Debug controller" default n diff --git a/kernel/cgroup/Makefile b/kernel/cgroup/Makefile index 12f8457ad1f9..3962d854d21a 100644 --- a/kernel/cgroup/Makefile +++ b/kernel/cgroup/Makefile @@ -6,4 +6,6 @@ obj-$(CONFIG_CGROUP_PIDS) += pids.o obj-$(CONFIG_CGROUP_RDMA) += rdma.o obj-$(CONFIG_CPUSETS) += cpuset.o obj-$(CONFIG_CGROUP_MISC) += misc.o +obj-$(CONFIG_CGROUP_MISC_FD) += misc-fd.o obj-$(CONFIG_CGROUP_DEBUG) += debug.o +obj-$(CONFIG_CGROUP_FILES) += files-cgroup.o diff --git a/fs/filescontrol.c b/kernel/cgroup/files-cgroup.c similarity index 95% rename from fs/filescontrol.c rename to kernel/cgroup/files-cgroup.c index 0947566b9119..532c3d8f38e9 100644 --- a/fs/filescontrol.c +++ b/kernel/cgroup/files-cgroup.c @@ -17,6 +17,7 @@
#include <linux/page_counter.h> #include <linux/filescontrol.h> +#include <linux/files-cgroup.h> #include <linux/cgroup.h> #include <linux/export.h> #include <linux/printk.h> @@ -94,18 +95,6 @@ static void files_cgroup_css_free(struct cgroup_subsys_state *css) kfree(css_fcg(css)); }
-u64 files_cgroup_count_fds(struct files_struct *files) -{ - int i; - struct fdtable *fdt; - int retval = 0; - - fdt = files_fdtable(files); - for (i = 0; i < DIV_ROUND_UP(fdt->max_fds, BITS_PER_LONG); i++) - retval += hweight64((__u64)fdt->open_fds[i]); - return retval; -} - /* * If attaching this cgroup would overcommit the resource then deny * the attach. If not, attach the file resource into new cgroup. @@ -135,7 +124,7 @@ static int files_cgroup_can_attach(struct cgroup_taskset *tset) from_res = css_res_open_handles(from_css);
spin_lock(&files->file_lock); - num_files = files_cgroup_count_fds(files); + num_files = fc_count_fds(files); page_counter_uncharge(from_res, num_files);
if (!page_counter_try_charge(to_res, num_files, &fail_res)) { @@ -211,7 +200,7 @@ int files_cgroup_dup_fds(struct files_struct *newf) if (!files_cgroup_enabled()) return 0; spin_lock(&newf->file_lock); - err = files_cgroup_alloc_fd(newf, files_cgroup_count_fds(newf)); + err = files_cgroup_alloc_fd(newf, fc_count_fds(newf)); spin_unlock(&newf->file_lock); return err; } @@ -306,7 +295,6 @@ struct cgroup_subsys files_cgrp_subsys = { .css_free = files_cgroup_css_free, .can_attach = files_cgroup_can_attach, .legacy_cftypes = files, - .dfl_cftypes = files, };
/* diff --git a/kernel/cgroup/misc-fd.c b/kernel/cgroup/misc-fd.c new file mode 100644 index 000000000000..37c8168b6636 --- /dev/null +++ b/kernel/cgroup/misc-fd.c @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: GPL-2.0-or-later OR BSD-2-Clause +/* + * Copyright (c) 2024-2024, Huawei Tech. Co., Ltd. + * + * Author: Ridong Chen chenridong@huawei.com + */ + +#include <linux/misc_cgroup.h> +#include <linux/fdtable.h> +#include <linux/filescontrol.h> + +static bool is_init; + +bool is_misc_fd_init(void) +{ + return is_init; +} + +void misc_fd_init(void) +{ + misc_cg_set_capacity(MISC_FD, U64_MAX); + is_init = true; +} + +/* + * If first time to alloc,it has to init capacity + */ +int misc_fd_alloc_fd(struct files_struct *files, u64 n) +{ + if (!is_misc_fd_init()) + misc_fd_init(); + if (files != &init_files) + return misc_cg_try_charge(MISC_FD, files->misc_cg, n); + return 0; +} + +void misc_fd_unalloc_fd(struct files_struct *files, u64 n) +{ + if (files != &init_files) + return misc_cg_uncharge(MISC_FD, files->misc_cg, n); +} + +void misc_fd_assign(struct files_struct *files) +{ + struct cgroup_subsys_state *css; + + if (files == NULL || files == &init_files) + return; + + css = task_get_css(current, misc_cgrp_id); + files->misc_cg = (css ? container_of(css, struct misc_cg, css) : NULL); +} + +void misc_fd_remove(struct files_struct *files) +{ + struct task_struct *tsk = current; + + if (files == &init_files) + return; + + task_lock(tsk); + spin_lock(&files->file_lock); + if (files->misc_cg != NULL) + css_put(&files->misc_cg->css); + spin_unlock(&files->file_lock); + task_unlock(tsk); +} + +int misc_fd_dup_fds(struct files_struct *newf) +{ + int err; + + if (newf == &init_files) + return 0; + + spin_lock(&newf->file_lock); + err = misc_fd_alloc_fd(newf, fc_count_fds(newf)); + spin_unlock(&newf->file_lock); + return err; +} + +void misc_fd_put_fd(struct files_struct *files, unsigned int fd) +{ + struct fdtable *fdt = files_fdtable(files); + + if (files == &init_files) + return; + + if (test_bit(fd, fdt->open_fds)) + return misc_fd_unalloc_fd(files, 1); +} diff --git a/kernel/cgroup/misc.c b/kernel/cgroup/misc.c index 79a3717a5803..95c2e43e7f48 100644 --- a/kernel/cgroup/misc.c +++ b/kernel/cgroup/misc.c @@ -24,6 +24,10 @@ static const char *const misc_res_name[] = { /* AMD SEV-ES ASIDs resource */ "sev_es", #endif +#ifdef CONFIG_CGROUP_MISC_FD + /* fd for file control */ + "fd", +#endif };
/* Root misc cgroup */