From: Florian Westphal <fw@strlen.de> mainline inclusion from mainline-v7.0-rc1 commit 9df95785d3d8302f7c066050117b04cd3c2048c2 category: bugfix bugzilla: https://atomgit.com/src-openeuler/kernel/issues/13930 CVE: CVE-2026-23351 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i... -------------------------------- Yiming Qian reports Use-after-free in the pipapo set type: Under a large number of expired elements, commit-time GC can run for a very long time in a non-preemptible context, triggering soft lockup warnings and RCU stall reports (local denial of service). We must split GC in an unlink and a reclaim phase. We cannot queue elements for freeing until pointers have been swapped. Expired elements are still exposed to both the packet path and userspace dumpers via the live copy of the data structure. call_rcu() does not protect us: dump operations or element lookups starting after call_rcu has fired can still observe the free'd element, unless the commit phase has made enough progress to swap the clone and live pointers before any new reader has picked up the old version. This a similar approach as done recently for the rbtree backend in commit 35f83a75529a ("netfilter: nft_set_rbtree: don't gc elements on insert"). Fixes: 3c4287f62044 ("nf_tables: Add set type for arbitrary concatenation of ranges") Reported-by: Yiming Qian <yimingqian591@gmail.com> Signed-off-by: Florian Westphal <fw@strlen.de> Conflicts: include/net/netfilter/nf_tables.h net/netfilter/nft_set_pipapo.c net/netfilter/nft_set_pipapo.h [commit e169285f8c56(nf_tables.h), f04df573faf9, 532aec7e878b, f6c383b8c31a (no catchcall lead to conflict),3f1d886cc7c3 are not backport] Signed-off-by: Dong Chenchen <dongchenchen2@huawei.com> --- include/net/netfilter/nf_tables.h | 5 ++++ net/netfilter/nf_tables_api.c | 5 ---- net/netfilter/nft_set_pipapo.c | 49 ++++++++++++++++++++++++++----- net/netfilter/nft_set_pipapo.h | 2 ++ 4 files changed, 48 insertions(+), 13 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 3638db98a153..da6ce70eb1ba 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1568,6 +1568,11 @@ struct nft_trans_gc { struct rcu_head rcu; }; +static inline int nft_trans_gc_space(const struct nft_trans_gc *trans) +{ + return NFT_TRANS_GC_BATCHCOUNT - trans->count; +} + struct nft_trans_gc *nft_trans_gc_alloc(struct nft_set *set, unsigned int gc_seq, gfp_t gfp); void nft_trans_gc_destroy(struct nft_trans_gc *trans); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 81caffd53912..649b27999102 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -8302,11 +8302,6 @@ static void nft_trans_gc_queue_work(struct nft_trans_gc *trans) schedule_work(&trans_gc_work); } -static int nft_trans_gc_space(struct nft_trans_gc *trans) -{ - return NFT_TRANS_GC_BATCHCOUNT - trans->count; -} - struct nft_trans_gc *nft_trans_gc_queue_async(struct nft_trans_gc *gc, unsigned int gc_seq, gfp_t gfp) { diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index f2fac34c753b..8849b7946cf9 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -1582,11 +1582,11 @@ static void nft_pipapo_gc_deactivate(struct net *net, struct nft_set *set, } /** - * pipapo_gc() - Drop expired entries from set, destroy start and end elements + * pipapo_gc_scan() - Drop expired entries from set, destroy start and end elements * @set: nftables API set representation * @m: Matching data */ -static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m) +static void pipapo_gc_scan(struct nft_set *set, struct nft_pipapo_match *m) { struct nft_pipapo *priv = nft_set_priv(set); struct net *net = read_pnet(&set->net); @@ -1598,6 +1598,8 @@ static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m) if (!gc) return; + list_add(&gc->list, &priv->gc_head); + while ((rules_f0 = pipapo_rules_same_key(m->f, first_rule))) { union nft_pipapo_map_bucket rulemap[NFT_PIPAPO_MAX_FIELDS]; struct nft_pipapo_field *f; @@ -1627,9 +1629,13 @@ static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m) if (__nft_set_elem_expired(&e->ext, tstamp)) { priv->dirty = true; - gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC); - if (!gc) - return; + if (!nft_trans_gc_space(gc)) { + gc = nft_trans_gc_alloc(set, 0, GFP_KERNEL); + if (!gc) + return; + + list_add(&gc->list, &priv->gc_head); + } nft_pipapo_gc_deactivate(net, set, e); pipapo_drop(m, rulemap); @@ -1643,9 +1649,28 @@ static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m) } } - if (gc) { - nft_trans_gc_queue_sync_done(gc); + if (gc) priv->last_gc = jiffies; +} + +/** + * pipapo_gc_queue() - Free expired elements + * @set: nftables API set representation + */ +static void pipapo_gc_queue(struct nft_set *set) +{ + struct nft_pipapo *priv = nft_set_priv(set); + struct nft_trans_gc *gc, *next; + + /* always do a catchall cycle: */ + gc = nft_trans_gc_alloc(set, 0, GFP_KERNEL); + if (gc) + nft_trans_gc_queue_sync_done(gc); + + /* always purge queued gc elements. */ + list_for_each_entry_safe(gc, next, &priv->gc_head, list) { + list_del(&gc->list); + nft_trans_gc_queue_sync_done(gc); } } @@ -1699,6 +1724,10 @@ static void pipapo_reclaim_match(struct rcu_head *rcu) * * We also need to create a new working copy for subsequent insertions and * deletions. + * + * After the live copy has been replaced by the clone, we can safely queue + * expired elements that have been collected by pipapo_gc_scan() for + * memory reclaim. */ static void nft_pipapo_commit(struct nft_set *set) { @@ -1706,7 +1735,7 @@ static void nft_pipapo_commit(struct nft_set *set) struct nft_pipapo_match *new_clone, *old; if (time_after_eq(jiffies, priv->last_gc + nft_set_gc_interval(set))) - pipapo_gc(set, priv->clone); + pipapo_gc_scan(set, priv->clone); if (!priv->dirty) return; @@ -1723,6 +1752,7 @@ static void nft_pipapo_commit(struct nft_set *set) call_rcu(&old->rcu, pipapo_reclaim_match); priv->clone = new_clone; + pipapo_gc_queue(set); } static void nft_pipapo_abort(const struct nft_set *set) @@ -2184,6 +2214,7 @@ static int nft_pipapo_init(const struct nft_set *set, priv->dirty = false; + INIT_LIST_HEAD(&priv->gc_head); rcu_assign_pointer(priv->match, m); return 0; @@ -2236,6 +2267,8 @@ static void nft_pipapo_destroy(const struct nft_ctx *ctx, struct nft_pipapo_match *m; int cpu; + WARN_ON_ONCE(!list_empty(&priv->gc_head)); + m = rcu_dereference_protected(priv->match, true); if (m) { rcu_barrier(); diff --git a/net/netfilter/nft_set_pipapo.h b/net/netfilter/nft_set_pipapo.h index 8f8f58af4e34..38f577af5349 100644 --- a/net/netfilter/nft_set_pipapo.h +++ b/net/netfilter/nft_set_pipapo.h @@ -165,6 +165,7 @@ struct nft_pipapo_match { * @width: Total bytes to be matched for one packet, including padding * @dirty: Working copy has pending insertions or deletions * @last_gc: Timestamp of last garbage collection run, jiffies + * @gc_head: list of nft_trans_gc to queue up for mem reclaim */ struct nft_pipapo { struct nft_pipapo_match __rcu *match; @@ -172,6 +173,7 @@ struct nft_pipapo { int width; bool dirty; unsigned long last_gc; + struct list_head gc_head; }; struct nft_pipapo_elem; -- 2.25.1