From: Al Viro <viro@zeniv.linux.org.uk> stable inclusion from stable-v6.6.103 commit 40b36d9a612b8c099b639fe5a10059ad3638d9d1 category: bugfix bugzilla: https://atomgit.com/openeuler/kernel/issues/9181 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=... -------------------------------- [ Upstream commit 1d3b4bec3ce55e0c46cdce7d0402dbd6b4af3a3d ] First of all, tell it how many slots do we want, not which slot is wanted. It makes one caller (dup_fd()) more straightforward and doesn't harm another (expand_fdtable()). Furthermore, make it return ERR_PTR() on failure rather than returning NULL. Simplifies the callers. Simplify the size calculation, while we are at it - note that we always have slots_wanted greater than BITS_PER_LONG. What the rules boil down to is * use the smallest power of two large enough to give us that many slots * on 32bit skip 64 and 128 - the minimal capacity we want there is 256 slots (i.e. 1Kb fd array). * on 64bit don't skip anything, the minimal capacity is 128 - and we'll never be asked for 64 or less. 128 slots means 1Kb fd array, again. * on 128bit, if that ever happens, don't skip anything - we'll never be asked for 128 or less, so the fd array allocation will be at least 2Kb. Reviewed-by: Christian Brauner <brauner@kernel.org> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: Sasha Levin <sashal@kernel.org> Conflicts: fs/file.c [723cc66127e1 ("filescgroup: add adapter for legacy and misc cgroup") has introduced attitional logic, a simple adaptation has been made to the conflicts in dup_fd().] Signed-off-by: Zizhi Wo <wozizhi@huawei.com> --- fs/file.c | 81 +++++++++++++++++++++++-------------------------------- 1 file changed, 33 insertions(+), 48 deletions(-) diff --git a/fs/file.c b/fs/file.c index 2b6960e6f596..9d763a7377ab 100644 --- a/fs/file.c +++ b/fs/file.c @@ -88,45 +88,48 @@ static void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt) * Note how the fdtable bitmap allocations very much have to be a multiple of * BITS_PER_LONG. This is not only because we walk those things in chunks of * 'unsigned long' in some places, but simply because that is how the Linux * kernel bitmaps are defined to work: they are not "bits in an array of bytes", * they are very much "bits in an array of unsigned long". - * - * The ALIGN(nr, BITS_PER_LONG) here is for clarity: since we just multiplied - * by that "1024/sizeof(ptr)" before, we already know there are sufficient - * clear low bits. Clang seems to realize that, gcc ends up being confused. - * - * On a 128-bit machine, the ALIGN() would actually matter. In the meantime, - * let's consider it documentation (and maybe a test-case for gcc to improve - * its code generation ;) */ -static struct fdtable * alloc_fdtable(unsigned int nr) +static struct fdtable *alloc_fdtable(unsigned int slots_wanted) { struct fdtable *fdt; + unsigned int nr; void *data; /* * Figure out how many fds we actually want to support in this fdtable. * Allocation steps are keyed to the size of the fdarray, since it * grows far faster than any of the other dynamic data. We try to fit * the fdarray into comfortable page-tuned chunks: starting at 1024B - * and growing in powers of two from there on. + * and growing in powers of two from there on. Since we called only + * with slots_wanted > BITS_PER_LONG (embedded instance in files->fdtab + * already gives BITS_PER_LONG slots), the above boils down to + * 1. use the smallest power of two large enough to give us that many + * slots. + * 2. on 32bit skip 64 and 128 - the minimal capacity we want there is + * 256 slots (i.e. 1Kb fd array). + * 3. on 64bit don't skip anything, 1Kb fd array means 128 slots there + * and we are never going to be asked for 64 or less. */ - nr /= (1024 / sizeof(struct file *)); - nr = roundup_pow_of_two(nr + 1); - nr *= (1024 / sizeof(struct file *)); - nr = ALIGN(nr, BITS_PER_LONG); + if (IS_ENABLED(CONFIG_32BIT) && slots_wanted < 256) + nr = 256; + else + nr = roundup_pow_of_two(slots_wanted); /* * Note that this can drive nr *below* what we had passed if sysctl_nr_open - * had been set lower between the check in expand_files() and here. Deal - * with that in caller, it's cheaper that way. + * had been set lower between the check in expand_files() and here. * * We make sure that nr remains a multiple of BITS_PER_LONG - otherwise * bitmaps handling below becomes unpleasant, to put it mildly... */ - if (unlikely(nr > sysctl_nr_open)) - nr = ((sysctl_nr_open - 1) | (BITS_PER_LONG - 1)) + 1; + if (unlikely(nr > sysctl_nr_open)) { + nr = round_down(sysctl_nr_open, BITS_PER_LONG); + if (nr < slots_wanted) + return ERR_PTR(-EMFILE); + } /* * Check if the allocation size would exceed INT_MAX. kvmalloc_array() * and kvmalloc() will warn if the allocation size is greater than * INT_MAX, as filp_cachep objects are not __GFP_NOWARN. @@ -166,11 +169,11 @@ static struct fdtable * alloc_fdtable(unsigned int nr) out_arr: kvfree(fdt->fd); out_fdt: kfree(fdt); out: - return NULL; + return ERR_PTR(-ENOMEM); } /* * Expand the file descriptor table. * This function will allocate a new fdtable and both fd array and fdset, of @@ -183,29 +186,21 @@ static int expand_fdtable(struct files_struct *files, unsigned int nr) __acquires(files->file_lock) { struct fdtable *new_fdt, *cur_fdt; spin_unlock(&files->file_lock); - new_fdt = alloc_fdtable(nr); + new_fdt = alloc_fdtable(nr + 1); /* make sure all fd_install() have seen resize_in_progress * or have finished their rcu_read_lock_sched() section. */ if (atomic_read(&files->count) > 1) synchronize_rcu(); spin_lock(&files->file_lock); - if (!new_fdt) - return -ENOMEM; - /* - * extremely unlikely race - sysctl_nr_open decreased between the check in - * caller and alloc_fdtable(). Cheaper to catch it here... - */ - if (unlikely(new_fdt->max_fds <= nr)) { - __free_fdtable(new_fdt); - return -EMFILE; - } + if (IS_ERR(new_fdt)) + return PTR_ERR(new_fdt); cur_fdt = files_fdtable(files); BUG_ON(nr < cur_fdt->max_fds); copy_fdtable(new_fdt, cur_fdt); rcu_assign_pointer(files->fdt, new_fdt); if (cur_fdt != &files->fdtab) @@ -317,11 +312,10 @@ struct files_struct *dup_fd(struct files_struct *oldf, struct fd_range *punch_ho { struct files_struct *newf; struct file **old_fds, **new_fds; unsigned int open_files, i; struct fdtable *old_fdt, *new_fdt; - int error; newf = kmem_cache_alloc(files_cachep, GFP_KERNEL); if (!newf) return ERR_PTR(-ENOMEM); @@ -350,21 +344,15 @@ struct files_struct *dup_fd(struct files_struct *oldf, struct fd_range *punch_ho spin_unlock(&oldf->file_lock); if (new_fdt != &newf->fdtab) __free_fdtable(new_fdt); - new_fdt = alloc_fdtable(open_files - 1); - if (!new_fdt) { - error = -ENOMEM; - goto out_release; - } - - /* beyond sysctl_nr_open; nothing to do */ - if (unlikely(new_fdt->max_fds < open_files)) { - __free_fdtable(new_fdt); - error = -EMFILE; - goto out_release; + new_fdt = alloc_fdtable(open_files); + if (IS_ERR(new_fdt)) { + files_cg_remove(newf); + kmem_cache_free(files_cachep, newf); + return ERR_CAST(new_fdt); } /* * Reacquire the oldf lock and a pointer to its fd table * who knows it may have a new bigger fd table. We need @@ -419,18 +407,15 @@ struct files_struct *dup_fd(struct files_struct *oldf, struct fd_range *punch_ho if (f) fput(f); } if (new_fdt != &newf->fdtab) __free_fdtable(new_fdt); - error = -EMFILE; - goto out_release; + files_cg_remove(newf); + kmem_cache_free(files_cachep, newf); + return ERR_PTR(-EMFILE); } return newf; -out_release: - files_cg_remove(newf); - kmem_cache_free(files_cachep, newf); - return ERR_PTR(error); } static struct fdtable *close_files(struct files_struct * files) { /* -- 2.52.0