[PATCH openEuler-1.0-LTS 0/3] CVE-2024-57982

Ahmed S. Darwish (1): net: xfrm: Localize sequence counter per network namespace Florian Westphal (1): xfrm: state: fix out-of-bounds read during lookup Wang Liang (1): net: fix kabi broken in struct netns_xfrm include/net/netns/xfrm.h | 7 +++- net/xfrm/xfrm_state.c | 85 ++++++++++++++++++++++++++++++---------- 2 files changed, 71 insertions(+), 21 deletions(-) -- 2.34.1

反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/16290 邮件列表地址:https://mailweb.openeuler.org/archives/list/kernel@openeuler.org/message/KJY... FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/16290 Mailing list address: https://mailweb.openeuler.org/archives/list/kernel@openeuler.org/message/KJY...

From: "Ahmed S. Darwish" <a.darwish@linutronix.de> mainline inclusion from mainline-v5.12-rc7 commit e88add19f68191448427a6e4eb059664650a837f category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/IBPBKJ CVE: CVE-2024-57982 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i... -------------------------------- A sequence counter write section must be serialized or its internal state can get corrupted. The "xfrm_state_hash_generation" seqcount is global, but its write serialization lock (net->xfrm.xfrm_state_lock) is instantiated per network namespace. The write protection is thus insufficient. To provide full protection, localize the sequence counter per network namespace instead. This should be safe as both the seqcount read and write sections access data exclusively within the network namespace. It also lays the foundation for transforming "xfrm_state_hash_generation" data type from seqcount_t to seqcount_LOCKNAME_t in further commits. Fixes: b65e3d7be06f ("xfrm: state: add sequence count to detect hash resizes") Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de> Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com> Signed-off-by: Wang Liang <wangliang74@huawei.com> --- include/net/netns/xfrm.h | 4 +++- net/xfrm/xfrm_state.c | 10 +++++----- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 55a1f26d9a8b..96e603443796 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -73,7 +73,9 @@ struct netns_xfrm { #if IS_ENABLED(CONFIG_IPV6) struct dst_ops xfrm6_dst_ops; #endif - spinlock_t xfrm_state_lock; + spinlock_t xfrm_state_lock; + seqcount_t xfrm_state_hash_generation; + spinlock_t xfrm_policy_lock; struct mutex xfrm_cfg_mutex; }; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index bc7e982a86d1..3faeebd967de 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -41,7 +41,6 @@ static void xfrm_state_gc_task(struct work_struct *work); */ static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024; -static __read_mostly seqcount_t xfrm_state_hash_generation = SEQCNT_ZERO(xfrm_state_hash_generation); static struct kmem_cache *xfrm_state_cache __ro_after_init; static DECLARE_WORK(xfrm_state_gc_work, xfrm_state_gc_task); @@ -137,7 +136,7 @@ static void xfrm_hash_resize(struct work_struct *work) } spin_lock_bh(&net->xfrm.xfrm_state_lock); - write_seqcount_begin(&xfrm_state_hash_generation); + write_seqcount_begin(&net->xfrm.xfrm_state_hash_generation); nhashmask = (nsize / sizeof(struct hlist_head)) - 1U; odst = xfrm_state_deref_prot(net->xfrm.state_bydst, net); @@ -153,7 +152,7 @@ static void xfrm_hash_resize(struct work_struct *work) rcu_assign_pointer(net->xfrm.state_byspi, nspi); net->xfrm.state_hmask = nhashmask; - write_seqcount_end(&xfrm_state_hash_generation); + write_seqcount_end(&net->xfrm.xfrm_state_hash_generation); spin_unlock_bh(&net->xfrm.xfrm_state_lock); osize = (ohashmask + 1) * sizeof(struct hlist_head); @@ -965,7 +964,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, to_put = NULL; - sequence = read_seqcount_begin(&xfrm_state_hash_generation); + sequence = read_seqcount_begin(&net->xfrm.xfrm_state_hash_generation); rcu_read_lock(); h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family); @@ -1076,7 +1075,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, if (to_put) xfrm_state_put(to_put); - if (read_seqcount_retry(&xfrm_state_hash_generation, sequence)) { + if (read_seqcount_retry(&net->xfrm.xfrm_state_hash_generation, sequence)) { *err = -EAGAIN; if (x) { xfrm_state_put(x); @@ -2408,6 +2407,7 @@ int __net_init xfrm_state_init(struct net *net) net->xfrm.state_num = 0; INIT_WORK(&net->xfrm.state_hash_work, xfrm_hash_resize); spin_lock_init(&net->xfrm.xfrm_state_lock); + seqcount_init(&net->xfrm.xfrm_state_hash_generation); return 0; out_byspi: -- 2.34.1

From: Florian Westphal <fw@strlen.de> mainline inclusion from mainline-v6.14-rc1 commit e952837f3ddb0ff726d5b582aa1aad9aa38d024d category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/IBPBKJ CVE: CVE-2024-57982 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i... -------------------------------- lookup and resize can run in parallel. The xfrm_state_hash_generation seqlock ensures a retry, but the hash functions can observe a hmask value that is too large for the new hlist array. rehash does: rcu_assign_pointer(net->xfrm.state_bydst, ndst) [..] net->xfrm.state_hmask = nhashmask; While state lookup does: h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family); hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h, bydst) { This is only safe in case the update to state_bydst is larger than net->xfrm.xfrm_state_hmask (or if the lookup function gets serialized via state spinlock again). Fix this by prefetching state_hmask and the associated pointers. The xfrm_state_hash_generation seqlock retry will ensure that the pointer and the hmask will be consistent. The existing helpers, like xfrm_dst_hash(), are now unsafe for RCU side, add lockdep assertions to document that they are only safe for insert side. xfrm_state_lookup_byaddr() uses the spinlock rather than RCU. AFAICS this is an oversight from back when state lookup was converted to RCU, this lock should be replaced with RCU in a future patch. Reported-by: syzbot+5f9f31cb7d985f584d8e@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/CACT4Y+azwfrE3uz6A5ZErov5YN2LYBN5KrsymBerT36V... Diagnosed-by: Dmitry Vyukov <dvyukov@google.com> Fixes: c2f672fc9464 ("xfrm: state lookup can be lockless") Signed-off-by: Florian Westphal <fw@strlen.de> Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com> Conflicts: net/xfrm/xfrm_state.c [conflicts due to not merge fe9f1d8779cb ("xfrm: add state hashtable keyed by seq"), conflicts due to not merge f8a70afafc17 ("xfrm: add TX datapath support for IPsec packet offload mode"), conflicts due to not merge 81a331a0e72d ("xfrm: Add an inbound percpu state cache."), conflicts due to not merge 5c1b9ab3ec81 ("xfrm: remove init_temprop indirection from xfrm_state_afinfo")] Signed-off-by: Wang Liang <wangliang74@huawei.com> --- net/xfrm/xfrm_state.c | 75 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 60 insertions(+), 15 deletions(-) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 3faeebd967de..c89e2834937b 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -30,6 +30,8 @@ #define xfrm_state_deref_prot(table, net) \ rcu_dereference_protected((table), lockdep_is_held(&(net)->xfrm.xfrm_state_lock)) +#define xfrm_state_deref_check(table, net) \ + rcu_dereference_check((table), lockdep_is_held(&(net)->xfrm.xfrm_state_lock)) static void xfrm_state_gc_task(struct work_struct *work); @@ -57,6 +59,8 @@ static inline unsigned int xfrm_dst_hash(struct net *net, u32 reqid, unsigned short family) { + lockdep_assert_held(&net->xfrm.xfrm_state_lock); + return __xfrm_dst_hash(daddr, saddr, reqid, family, net->xfrm.state_hmask); } @@ -65,6 +69,8 @@ static inline unsigned int xfrm_src_hash(struct net *net, const xfrm_address_t *saddr, unsigned short family) { + lockdep_assert_held(&net->xfrm.xfrm_state_lock); + return __xfrm_src_hash(daddr, saddr, family, net->xfrm.state_hmask); } @@ -72,6 +78,8 @@ static inline unsigned int xfrm_spi_hash(struct net *net, const xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family) { + lockdep_assert_held(&net->xfrm.xfrm_state_lock); + return __xfrm_spi_hash(daddr, spi, proto, family, net->xfrm.state_hmask); } @@ -830,15 +838,37 @@ xfrm_init_tempstate(struct xfrm_state *x, const struct flowi *fl, afinfo->init_temprop(x, tmpl, daddr, saddr); } -static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark, +struct xfrm_hash_state_ptrs { + const struct hlist_head *bydst; + const struct hlist_head *bysrc; + const struct hlist_head *byspi; + unsigned int hmask; +}; + +static void xfrm_hash_ptrs_get(const struct net *net, struct xfrm_hash_state_ptrs *ptrs) +{ + unsigned int sequence; + + do { + sequence = read_seqcount_begin(&net->xfrm.xfrm_state_hash_generation); + + ptrs->bydst = xfrm_state_deref_check(net->xfrm.state_bydst, net); + ptrs->bysrc = xfrm_state_deref_check(net->xfrm.state_bysrc, net); + ptrs->byspi = xfrm_state_deref_check(net->xfrm.state_byspi, net); + ptrs->hmask = net->xfrm.state_hmask; + } while (read_seqcount_retry(&net->xfrm.xfrm_state_hash_generation, sequence)); +} + +static struct xfrm_state *__xfrm_state_lookup(const struct xfrm_hash_state_ptrs *state_ptrs, + u32 mark, const xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family) { - unsigned int h = xfrm_spi_hash(net, daddr, spi, proto, family); + unsigned int h = __xfrm_spi_hash(daddr, spi, proto, family, state_ptrs->hmask); struct xfrm_state *x; - hlist_for_each_entry_rcu(x, net->xfrm.state_byspi + h, byspi) { + hlist_for_each_entry_rcu(x, state_ptrs->byspi + h, byspi) { if (x->props.family != family || x->id.spi != spi || x->id.proto != proto || @@ -855,15 +885,16 @@ static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark, return NULL; } -static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark, +static struct xfrm_state *__xfrm_state_lookup_byaddr(const struct xfrm_hash_state_ptrs *state_ptrs, + u32 mark, const xfrm_address_t *daddr, const xfrm_address_t *saddr, u8 proto, unsigned short family) { - unsigned int h = xfrm_src_hash(net, daddr, saddr, family); + unsigned int h = __xfrm_src_hash(daddr, saddr, family, state_ptrs->hmask); struct xfrm_state *x; - hlist_for_each_entry_rcu(x, net->xfrm.state_bysrc + h, bysrc) { + hlist_for_each_entry_rcu(x, state_ptrs->bysrc + h, bysrc) { if (x->props.family != family || x->id.proto != proto || !xfrm_addr_equal(&x->id.daddr, daddr, family) || @@ -883,14 +914,17 @@ static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark, static inline struct xfrm_state * __xfrm_state_locate(struct xfrm_state *x, int use_spi, int family) { + struct xfrm_hash_state_ptrs state_ptrs; struct net *net = xs_net(x); u32 mark = x->mark.v & x->mark.m; + xfrm_hash_ptrs_get(net, &state_ptrs); + if (use_spi) - return __xfrm_state_lookup(net, mark, &x->id.daddr, + return __xfrm_state_lookup(&state_ptrs, mark, &x->id.daddr, x->id.spi, x->id.proto, family); else - return __xfrm_state_lookup_byaddr(net, mark, + return __xfrm_state_lookup_byaddr(&state_ptrs, mark, &x->id.daddr, &x->props.saddr, x->id.proto, family); @@ -951,6 +985,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, unsigned short family, u32 if_id) { static xfrm_address_t saddr_wildcard = { }; + struct xfrm_hash_state_ptrs state_ptrs; struct net *net = xp_net(pol); unsigned int h, h_wildcard; struct xfrm_state *x, *x0, *to_put; @@ -967,8 +1002,10 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, sequence = read_seqcount_begin(&net->xfrm.xfrm_state_hash_generation); rcu_read_lock(); - h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family); - hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h, bydst) { + xfrm_hash_ptrs_get(net, &state_ptrs); + + h = __xfrm_dst_hash(daddr, saddr, tmpl->reqid, encap_family, state_ptrs.hmask); + hlist_for_each_entry_rcu(x, state_ptrs.bydst + h, bydst) { if (x->props.family == encap_family && x->props.reqid == tmpl->reqid && (mark & x->mark.m) == x->mark.v && @@ -984,8 +1021,9 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, if (best || acquire_in_progress) goto found; - h_wildcard = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, encap_family); - hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h_wildcard, bydst) { + h_wildcard = __xfrm_dst_hash(daddr, &saddr_wildcard, tmpl->reqid, + encap_family, state_ptrs.hmask); + hlist_for_each_entry_rcu(x, state_ptrs.bydst + h_wildcard, bydst) { if (x->props.family == encap_family && x->props.reqid == tmpl->reqid && (mark & x->mark.m) == x->mark.v && @@ -1003,7 +1041,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, x = best; if (!x && !error && !acquire_in_progress) { if (tmpl->id.spi && - (x0 = __xfrm_state_lookup(net, mark, daddr, tmpl->id.spi, + (x0 = __xfrm_state_lookup(&state_ptrs, mark, daddr, tmpl->id.spi, tmpl->id.proto, encap_family)) != NULL) { to_put = x0; error = -EEXIST; @@ -1663,10 +1701,13 @@ struct xfrm_state * xfrm_state_lookup(struct net *net, u32 mark, const xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family) { + struct xfrm_hash_state_ptrs state_ptrs; struct xfrm_state *x; rcu_read_lock(); - x = __xfrm_state_lookup(net, mark, daddr, spi, proto, family); + xfrm_hash_ptrs_get(net, &state_ptrs); + + x = __xfrm_state_lookup(&state_ptrs, mark, daddr, spi, proto, family); rcu_read_unlock(); return x; } @@ -1677,10 +1718,14 @@ xfrm_state_lookup_byaddr(struct net *net, u32 mark, const xfrm_address_t *daddr, const xfrm_address_t *saddr, u8 proto, unsigned short family) { + struct xfrm_hash_state_ptrs state_ptrs; struct xfrm_state *x; spin_lock_bh(&net->xfrm.xfrm_state_lock); - x = __xfrm_state_lookup_byaddr(net, mark, daddr, saddr, proto, family); + + xfrm_hash_ptrs_get(net, &state_ptrs); + + x = __xfrm_state_lookup_byaddr(&state_ptrs, mark, daddr, saddr, proto, family); spin_unlock_bh(&net->xfrm.xfrm_state_lock); return x; } -- 2.34.1

hulk inclusion category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/IBPBKJ CVE: CVE-2024-57982 -------------------------------- Fix kabi broken in struct netns_xfrm. Fixes: e88add19f681 ("net: xfrm: Localize sequence counter per network namespace") Signed-off-by: Wang Liang <wangliang74@huawei.com> --- include/net/netns/xfrm.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 96e603443796..9a04e2f2076e 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -69,12 +69,15 @@ struct netns_xfrm { struct ctl_table_header *sysctl_hdr; #endif +#ifndef __GENKSYMS__ + seqcount_t xfrm_state_hash_generation; +#endif + struct dst_ops xfrm4_dst_ops; #if IS_ENABLED(CONFIG_IPV6) struct dst_ops xfrm6_dst_ops; #endif spinlock_t xfrm_state_lock; - seqcount_t xfrm_state_hash_generation; spinlock_t xfrm_policy_lock; struct mutex xfrm_cfg_mutex; -- 2.34.1
participants (2)
-
patchwork bot
-
Wang Liang