From: Hou Tao houtao1@huawei.com
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I8ND8I
--------------------------------
this is bugs fix as mentioned, you can see issues for details
Merge OLK-5.10 fix: 26ba3a842868 Merge OLK-5.10 fix: 7e485fecbd7f Merge OLK-5.10 fix: a466f5dda467 Merge OLK-5.10 fix: 338fc539bb48 Merge OLK-5.10 fix: 1951d5627095 Merge OLK-5.10 fix: 69cbcd5b9a67 Merge OLK-5.10 fix: 0b7329771d3d Merge OLK-5.10 fix: e2b24a5adac6
Signed-off-by: chenridong chenridong@huawei.com --- .../admin-guide/kernel-parameters.txt | 7 +- fs/file.c | 31 +++-- fs/filescontrol.c | 115 ++++++++---------- include/linux/cgroup.h | 6 - include/linux/filescontrol.h | 6 + 5 files changed, 86 insertions(+), 79 deletions(-)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 41644336e358..fa4775c9d6fc 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -565,9 +565,10 @@ - if foo is an optional feature then the feature is disabled and corresponding cgroup files are not created - {Currently only "memory" controller deal with this and - cut the overhead, others just disable the usage. So - only cgroup_disable=memory is actually worthy} + {Currently only "memory" and and "files" controller + deal with this and cut the overhead, others just + disable the usage. So only cgroup_disable=memory and + cgroup_disable=files are actually worthy} Specifying "pressure" disables per-cgroup pressure stall information accounting feature
diff --git a/fs/file.c b/fs/file.c index c6565d998f8c..a9558b693dba 100644 --- a/fs/file.c +++ b/fs/file.c @@ -339,7 +339,8 @@ struct files_struct *dup_fd(struct files_struct *oldf, unsigned int max_fds, int new_fdt->full_fds_bits = newf->full_fds_bits_init; new_fdt->fd = &newf->fd_array[0]; #ifdef CONFIG_CGROUP_FILES - files_cgroup_assign(newf); + if (files_cgroup_enabled()) + files_cgroup_assign(newf); #endif
spin_lock(&oldf->file_lock); @@ -405,10 +406,17 @@ struct files_struct *dup_fd(struct files_struct *oldf, unsigned int max_fds, int
rcu_assign_pointer(newf->fdt, new_fdt); #ifdef CONFIG_CGROUP_FILES - if (!files_cgroup_alloc_fd(newf, files_cgroup_count_fds(newf))) + if (!files_cgroup_enabled()) return newf; + spin_lock(&newf->file_lock); + if (!files_cgroup_alloc_fd(newf, files_cgroup_count_fds(newf))) { + spin_unlock(&newf->file_lock); + return newf; + } + spin_unlock(&newf->file_lock); +
-/* could not get enough FD resources. Need to clean up. */ + /* could not get enough FD resources. Need to clean up. */ new_fds = new_fdt->fd; for (i = open_files; i != 0; i--) { struct file *f = *new_fds++; @@ -425,7 +433,8 @@ struct files_struct *dup_fd(struct files_struct *oldf, unsigned int max_fds, int
out_release: #ifdef CONFIG_CGROUP_FILES - files_cgroup_remove(newf); + if (files_cgroup_enabled()) + files_cgroup_remove(newf); #endif kmem_cache_free(files_cachep, newf); out: @@ -453,7 +462,8 @@ static struct fdtable *close_files(struct files_struct * files) struct file * file = xchg(&fdt->fd[i], NULL); if (file) { #ifdef CONFIG_CGROUP_FILES - files_cgroup_unalloc_fd(files, 1); + if (files_cgroup_enabled()) + files_cgroup_unalloc_fd(files, 1); #endif filp_close(file, files); cond_resched(); @@ -463,6 +473,10 @@ static struct fdtable *close_files(struct files_struct * files) set >>= 1; } } +#ifdef CONFIG_CGROUP_FILES + if (files_cgroup_enabled()) + files_cgroup_remove(files); +#endif
return fdt; } @@ -558,7 +572,7 @@ static int alloc_fd(unsigned start, unsigned end, unsigned flags) if (error) goto repeat; #ifdef CONFIG_CGROUP_FILES - if (files_cgroup_alloc_fd(files, 1)) { + if (files_cgroup_enabled() && files_cgroup_alloc_fd(files, 1)) { error = -EMFILE; goto out; } @@ -601,7 +615,7 @@ static void __put_unused_fd(struct files_struct *files, unsigned int fd) { struct fdtable *fdt = files_fdtable(files); #ifdef CONFIG_CGROUP_FILES - if (test_bit(fd, fdt->open_fds)) + if (files_cgroup_enabled() && test_bit(fd, fdt->open_fds)) files_cgroup_unalloc_fd(files, 1); #endif __clear_open_fd(fd, fdt); @@ -1167,7 +1181,8 @@ __releases(&files->file_lock) goto out; } #ifdef CONFIG_CGROUP_FILES - if (!tofree && files_cgroup_alloc_fd(files, 1)) { + if (files_cgroup_enabled() && + !tofree && files_cgroup_alloc_fd(files, 1)) { err = -EMFILE; goto out; } diff --git a/fs/filescontrol.c b/fs/filescontrol.c index 44ad9ef44e20..4ad500f40025 100644 --- a/fs/filescontrol.c +++ b/fs/filescontrol.c @@ -25,14 +25,17 @@ #include <linux/seq_file.h> #include <linux/fdtable.h> #include <linux/sched/signal.h> +#include <linux/module.h>
-#define FILES_MAX ULLONG_MAX +#define FILES_MAX ULONG_MAX #define FILES_MAX_STR "max"
- +static bool no_acct; struct cgroup_subsys files_cgrp_subsys __read_mostly; EXPORT_SYMBOL(files_cgrp_subsys);
+module_param(no_acct, bool, 0444); + struct files_cgroup { struct cgroup_subsys_state css; struct page_counter open_handles; @@ -99,56 +102,14 @@ u64 files_cgroup_count_fds(struct files_struct *files) return retval; }
-static u64 files_in_taskset(struct cgroup_taskset *tset) -{ - struct task_struct *task; - u64 files = 0; - struct cgroup_subsys_state *css; - - cgroup_taskset_for_each(task, css, tset) { - if (!thread_group_leader(task)) - continue; - - task_lock(task); - files += files_cgroup_count_fds(task->files); - task_unlock(task); - } - return files; -} - /* * If attaching this cgroup would overcommit the resource then deny - * the attach. + * the attach. If not, attach the file resource into new cgroup. */ static int files_cgroup_can_attach(struct cgroup_taskset *tset) -{ - struct cgroup_subsys_state *css; - unsigned long margin; - struct page_counter *cnt; - unsigned long counter; - u64 files = files_in_taskset(tset); - - cgroup_taskset_first(tset, &css); - cnt = css_res_open_handles(css); - - counter = (unsigned long)atomic_long_read(&cnt->usage); - if (cnt->max > counter) - margin = cnt->max - counter; - else - margin = 0; - if (margin < files) - return -ENOMEM; - return 0; -} - -/* - * If resource counts have gone up between can_attach and attach then - * this may overcommit resources. In that case just deny further allocation - * until the resource usage drops. - */ -static void files_cgroup_attach(struct cgroup_taskset *tset) { u64 num_files; + bool can_attach; struct cgroup_subsys_state *to_css; struct cgroup_subsys_state *from_css; struct page_counter *from_res; @@ -161,9 +122,9 @@ static void files_cgroup_attach(struct cgroup_taskset *tset)
task_lock(task); files = task->files; - if (!files) { + if (!files || files == &init_files) { task_unlock(task); - return; + return 0; }
from_css = &files_cgroup_from_files(files)->css; @@ -172,14 +133,20 @@ static void files_cgroup_attach(struct cgroup_taskset *tset) spin_lock(&files->file_lock); num_files = files_cgroup_count_fds(files); page_counter_uncharge(from_res, num_files); - css_put(from_css);
- if (!page_counter_try_charge(to_res, num_files, &fail_res)) + if (!page_counter_try_charge(to_res, num_files, &fail_res)) { + page_counter_charge(from_res, num_files); pr_err("Open files limit overcommited\n"); - css_get(to_css); - task->files->files_cgroup = css_fcg(to_css); + can_attach = false; + } else { + css_put(from_css); + css_get(to_css); + task->files->files_cgroup = css_fcg(to_css); + can_attach = true; + } spin_unlock(&files->file_lock); task_unlock(task); + return can_attach ? 0 : -ENOSPC; }
int files_cgroup_alloc_fd(struct files_struct *files, u64 n) @@ -194,7 +161,7 @@ int files_cgroup_alloc_fd(struct files_struct *files, u64 n) * we don't charge their fds, only issue is that files.usage * won't be accurate in root files cgroup. */ - if (files != &init_files) { + if (!no_acct && files != &init_files) { struct page_counter *fail_res; struct files_cgroup *files_cgroup = files_cgroup_from_files(files); @@ -212,7 +179,7 @@ void files_cgroup_unalloc_fd(struct files_struct *files, u64 n) * It's not charged so no need to uncharge, see comments in * files_cgroup_alloc_fd. */ - if (files != &init_files) { + if (!no_acct && files != &init_files) { struct files_cgroup *files_cgroup = files_cgroup_from_files(files); page_counter_uncharge(&files_cgroup->open_handles, n); @@ -220,6 +187,21 @@ void files_cgroup_unalloc_fd(struct files_struct *files, u64 n) } EXPORT_SYMBOL(files_cgroup_unalloc_fd);
+static u64 files_disabled_read(struct cgroup_subsys_state *css, + struct cftype *cft) +{ + return no_acct; +} + +static int files_disabled_write(struct cgroup_subsys_state *css, + struct cftype *cft, u64 val) +{ + if (!val) + return -EINVAL; + no_acct = true; + + return 0; +}
static int files_limit_read(struct seq_file *sf, void *v) { @@ -281,6 +263,12 @@ static struct cftype files[] = { .name = "usage", .read_u64 = files_usage_read, }, + { + .name = "no_acct", + .flags = CFTYPE_ONLY_ON_ROOT, + .read_u64 = files_disabled_read, + .write_u64 = files_disabled_write, + }, { } };
@@ -288,23 +276,23 @@ struct cgroup_subsys files_cgrp_subsys = { .css_alloc = files_cgroup_css_alloc, .css_free = files_cgroup_css_free, .can_attach = files_cgroup_can_attach, - .attach = files_cgroup_attach, .legacy_cftypes = files, .dfl_cftypes = files, };
+/* + * It could race against cgroup migration of current task, and + * using task_get_css() to get a valid css. + */ void files_cgroup_assign(struct files_struct *files) { - struct task_struct *tsk = current; struct cgroup_subsys_state *css; - struct cgroup *cgrp;
- task_lock(tsk); - cgrp = task_cgroup(tsk, files_cgrp_id); - css = cgroup_subsys_state(cgrp, files_cgrp_id); - css_get(css); + if (files == &init_files) + return; + + css = task_get_css(current, files_cgrp_id); files->files_cgroup = container_of(css, struct files_cgroup, css); - task_unlock(tsk); }
void files_cgroup_remove(struct files_struct *files) @@ -312,6 +300,9 @@ void files_cgroup_remove(struct files_struct *files) struct task_struct *tsk = current; struct files_cgroup *fcg;
+ if (files == &init_files) + return; + task_lock(tsk); spin_lock(&files->file_lock); fcg = files_cgroup_from_files(files); diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 1cb90ea3299a..7fa51b600ee8 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -362,12 +362,6 @@ static inline void cgroup_put(struct cgroup *cgrp) css_put(&cgrp->self); }
-static inline struct cgroup_subsys_state *cgroup_subsys_state( - struct cgroup *cgrp, int subsys_id) -{ - return cgrp->subsys[subsys_id]; -} - extern struct mutex cgroup_mutex;
static inline void cgroup_lock(void) diff --git a/include/linux/filescontrol.h b/include/linux/filescontrol.h index 49dc620cf64e..0182f145a339 100644 --- a/include/linux/filescontrol.h +++ b/include/linux/filescontrol.h @@ -19,6 +19,7 @@ #define _LINUX_FILESCONTROL_H
#include <linux/fdtable.h> +#include <linux/cgroup.h>
#ifdef CONFIG_CGROUP_FILES
@@ -30,5 +31,10 @@ extern struct files_struct init_files; void files_cgroup_assign(struct files_struct *files); void files_cgroup_remove(struct files_struct *files);
+static inline bool files_cgroup_enabled(void) +{ + return cgroup_subsys_enabled(files_cgrp_subsys); +} + #endif /* CONFIG_CGROUP_FILES */ #endif /* _LINUX_FILESCONTROL_H */