
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IC97W5 -------------------------------- After the commit fda31c50292a ("signal: avoid double atomic counter increments for user accounting") and the commit 15bc01effefe ("ucounts: Fix signal ucount refcounting"), the reference counting mechanism for ucounts has the following behavior. The reference count is incremented when the first pending signal pins to the ucounts, and it is decremented when the last pending signal is dequeued. This implies that as long as there are any pending signals pinned to the ucounts, the ucounts cannot be freed. To address the scalability issue, the next patch will mention, the ucounts.rlimits will be converted to percpu_counter. However, summing up the percpu counters is expensive. To overcome this, this patch modifies the conditions for freeing ucounts. Instead of complex checks regarding whether a pending signal is the first or the last one, the ucounts can now be freed only when both the refcount and the rlimits are zero. This change not only simplifies the logic but also reduces the number of atomic operations. Signed-off-by: Chen Ridong <chenridong@huawei.com> --- include/linux/user_namespace.h | 1 + kernel/ucount.c | 75 ++++++++++++++++++++++++++-------- 2 files changed, 59 insertions(+), 17 deletions(-) diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index c3b4de67471c8..d504d506a70f1 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -119,6 +119,7 @@ struct ucounts { kuid_t uid; struct rcu_head rcu; rcuref_t count; + atomic_long_t freed; atomic_long_t ucount[UCOUNT_COUNTS]; atomic_long_t rlimit[UCOUNT_RLIMIT_COUNTS]; }; diff --git a/kernel/ucount.c b/kernel/ucount.c index fd2ccffe08394..1e300184f5edb 100644 --- a/kernel/ucount.c +++ b/kernel/ucount.c @@ -190,18 +190,61 @@ struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid) return new; } -void put_ucounts(struct ucounts *ucounts) +/* + * Whether all the rlimits are zero. + * For now, only UCOUNT_RLIMIT_SIGPENDING is considered. + * Other rlimit can be added. + */ +static bool rlimits_are_zero(struct ucounts *ucounts) +{ + int rtypes[] = { UCOUNT_RLIMIT_SIGPENDING }; + int rtype; + + for (int i = 0; i < sizeof(rtypes) / sizeof(int); ++i) { + rtype = rtypes[i]; + if (atomic_long_read(&ucounts->rlimit[rtype]) > 0) + return false; + } + return true; +} + +/* + * Ucounts can be freed only when the ucount->count is released + * and the rlimits are zero. + * The caller should hold rcu_read_lock(); + */ +static bool ucounts_can_be_freed(struct ucounts *ucounts) +{ + if (rcuref_read(&ucounts->count) > 0) + return false; + if (!rlimits_are_zero(ucounts)) + return false; + /* Prevent double free */ + return atomic_long_cmpxchg(&ucounts->freed, 0, 1) == 0; +} + +static void free_ucounts(struct ucounts *ucounts) { unsigned long flags; - if (rcuref_put(&ucounts->count)) { - spin_lock_irqsave(&ucounts_lock, flags); - hlist_nulls_del_rcu(&ucounts->node); - spin_unlock_irqrestore(&ucounts_lock, flags); + spin_lock_irqsave(&ucounts_lock, flags); + hlist_nulls_del_rcu(&ucounts->node); + spin_unlock_irqrestore(&ucounts_lock, flags); + + put_user_ns(ucounts->ns); + kfree_rcu(ucounts, rcu); +} - put_user_ns(ucounts->ns); - kfree_rcu(ucounts, rcu); +void put_ucounts(struct ucounts *ucounts) +{ + rcu_read_lock(); + if (rcuref_put(&ucounts->count) && + ucounts_can_be_freed(ucounts)) { + rcu_read_unlock(); + free_ucounts(ucounts); + return; } + rcu_read_unlock(); } static inline bool atomic_long_inc_below(atomic_long_t *v, int u) @@ -286,11 +329,17 @@ static void do_dec_rlimit_put_ucounts(struct ucounts *ucounts, { struct ucounts *iter, *next; for (iter = ucounts; iter != last; iter = next) { + bool to_free; + + rcu_read_lock(); long dec = atomic_long_sub_return(1, &iter->rlimit[type]); WARN_ON_ONCE(dec < 0); next = iter->ns->ucounts; - if (dec == 0) - put_ucounts(iter); + to_free = ucounts_can_be_freed(iter); + rcu_read_unlock(); + /* If ucounts->count is zero and the rlimits are zero, free ucounts */ + if (to_free) + free_ucounts(iter); } } @@ -315,14 +364,6 @@ long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum rlimit_type type, ret = new; if (!override_rlimit) max = get_userns_rlimit_max(iter->ns, type); - /* - * Grab an extra ucount reference for the caller when - * the rlimit count was previously 0. - */ - if (new != 1) - continue; - if (!get_ucounts(iter)) - goto dec_unwind; } return ret; dec_unwind: -- 2.34.1