[PATCH OLK-6.6 00/11] net: Netdev use RCU protection
Netdev use RCU protection Dong Chenchen (1): net: dst: Fix kabi-breakage for struct dst_entry Eric Dumazet (8): net: dst: add four helpers to annotate data-races around dst->dev net: dst: introduce dst->dev_rcu tcp_metrics: use dst_dev_net_rcu() ipv4: start using dst_dev_rcu() ipv6: use RCU in ip6_xmit() ipv6: use RCU in ip6_output() ipv4: use RCU protection in ip_dst_mtu_maybe_forward() net: use dst_dev_rcu() in sk_setup_caps() Kuniyuki Iwashima (1): tls: Use __sk_dst_get() and dst_dev_rcu() in get_netdev_for_sock(). Sharath Chandra Vurukala (1): net: Add locking to protect skb->dev access in ip_output include/net/dst.h | 40 ++++++++++++++++++++++++- include/net/ip.h | 17 ++++++++--- include/net/ip6_route.h | 2 +- include/net/route.h | 2 +- net/core/dst.c | 4 +-- net/core/sock.c | 16 ++++++---- net/ipv4/icmp.c | 10 ++++--- net/ipv4/ip_fragment.c | 9 ++++-- net/ipv4/ip_output.c | 15 ++++++---- net/ipv4/ipmr.c | 2 +- net/ipv4/route.c | 8 ++--- net/ipv4/tcp_metrics.c | 6 ++-- net/ipv6/ip6_output.c | 65 +++++++++++++++++++++++------------------ net/tls/tls_device.c | 18 +++++++----- 14 files changed, 144 insertions(+), 70 deletions(-) -- 2.25.1
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/19306 邮件列表地址:https://mailweb.openeuler.org/archives/list/kernel@openeuler.org/message/DD3... FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/19306 Mailing list address: https://mailweb.openeuler.org/archives/list/kernel@openeuler.org/message/DD3...
From: Eric Dumazet <edumazet@google.com> mainline inclusion from mainline-v6.17-rc1 commit 88fe14253e181878c2ddb51a298ae8c468a63010 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/ID3WJ1 CVE: CVE-2025-40074 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i... -------------------------------- dst->dev is read locklessly in many contexts, and written in dst_dev_put(). Fixing all the races is going to need many changes. We probably will have to add full RCU protection. Add three helpers to ease this painful process. static inline struct net_device *dst_dev(const struct dst_entry *dst) { return READ_ONCE(dst->dev); } static inline struct net_device *skb_dst_dev(const struct sk_buff *skb) { return dst_dev(skb_dst(skb)); } static inline struct net *skb_dst_dev_net(const struct sk_buff *skb) { return dev_net(skb_dst_dev(skb)); } static inline struct net *skb_dst_dev_net_rcu(const struct sk_buff *skb) { return dev_net_rcu(skb_dst_dev(skb)); } Fixes: 4a6ce2b6f2ec ("net: introduce a new function dst_dev_put()") Signed-off-by: Eric Dumazet <edumazet@google.com> Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com> Link: https://patch.msgid.link/20250630121934.3399505-7-edumazet@google.com Signed-off-by: Jakub Kicinski <kuba@kernel.org> Conflicts: net/core/sock.c net/core/dst.c [conflicts due to not merge 15492700ac41 ("tcp: cache RTAX_QUICKACK metric in a hot cache line"), conflicts due to not merge 8a402bbe5476 ("net: dst: annotate data-races around dst->obsolete")] Signed-off-by: Dong Chenchen <dongchenchen2@huawei.com> --- include/net/dst.h | 20 ++++++++++++++++++++ net/core/dst.c | 4 ++-- net/core/sock.c | 8 ++++---- 3 files changed, 26 insertions(+), 6 deletions(-) diff --git a/include/net/dst.h b/include/net/dst.h index badbb80796e4..3e2ff1b9b361 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -579,6 +579,26 @@ static inline void skb_dst_update_pmtu_no_confirm(struct sk_buff *skb, u32 mtu) dst->ops->update_pmtu(dst, NULL, skb, mtu, false); } +static inline struct net_device *dst_dev(const struct dst_entry *dst) +{ + return READ_ONCE(dst->dev); +} + +static inline struct net_device *skb_dst_dev(const struct sk_buff *skb) +{ + return dst_dev(skb_dst(skb)); +} + +static inline struct net *skb_dst_dev_net(const struct sk_buff *skb) +{ + return dev_net(skb_dst_dev(skb)); +} + +static inline struct net *skb_dst_dev_net_rcu(const struct sk_buff *skb) +{ + return dev_net_rcu(skb_dst_dev(skb)); +} + struct dst_entry *dst_blackhole_check(struct dst_entry *dst, u32 cookie); void dst_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, u32 mtu, bool confirm_neigh); diff --git a/net/core/dst.c b/net/core/dst.c index aad197e761cb..455c8bddc523 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -152,7 +152,7 @@ void dst_dev_put(struct dst_entry *dst) dst->ops->ifdown(dst, dev); dst->input = dst_discard; dst->output = dst_discard_out; - dst->dev = blackhole_netdev; + WRITE_ONCE(dst->dev, blackhole_netdev); netdev_ref_replace(dev, blackhole_netdev, &dst->dev_tracker, GFP_ATOMIC); } @@ -265,7 +265,7 @@ unsigned int dst_blackhole_mtu(const struct dst_entry *dst) { unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); - return mtu ? : dst->dev->mtu; + return mtu ? : dst_dev(dst)->mtu; } EXPORT_SYMBOL_GPL(dst_blackhole_mtu); diff --git a/net/core/sock.c b/net/core/sock.c index 84e3a4be73cd..cbf46bfa5510 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2445,8 +2445,8 @@ static u32 sk_dst_gso_max_size(struct sock *sk, struct dst_entry *dst) !ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)); #endif /* pairs with the WRITE_ONCE() in netif_set_gso(_ipv4)_max_size() */ - max_size = is_ipv6 ? READ_ONCE(dst->dev->gso_max_size) : - READ_ONCE(dst->dev->gso_ipv4_max_size); + max_size = is_ipv6 ? READ_ONCE(dst_dev(dst)->gso_max_size) : + READ_ONCE(dst_dev(dst)->gso_ipv4_max_size); if (max_size > GSO_LEGACY_MAX_SIZE && !sk_is_tcp(sk)) max_size = GSO_LEGACY_MAX_SIZE; @@ -2457,7 +2457,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst) { u32 max_segs = 1; - sk->sk_route_caps = dst->dev->features; + sk->sk_route_caps = dst_dev(dst)->features; if (sk_is_tcp(sk)) sk->sk_route_caps |= NETIF_F_GSO; if (sk->sk_route_caps & NETIF_F_GSO) @@ -2471,7 +2471,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst) sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM; sk->sk_gso_max_size = sk_dst_gso_max_size(sk, dst); /* pairs with the WRITE_ONCE() in netif_set_gso_max_segs() */ - max_segs = max_t(u32, READ_ONCE(dst->dev->gso_max_segs), 1); + max_segs = max_t(u32, READ_ONCE(dst_dev(dst)->gso_max_segs), 1); } } sk->sk_gso_max_segs = max_segs; -- 2.25.1
From: Sharath Chandra Vurukala <quic_sharathv@quicinc.com> mainline inclusion from mainline-v6.17-rc1 commit 1dbf1d590d10a6d1978e8184f8dfe20af22d680a category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/ID3WJ1 CVE: CVE-2025-40074 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i... -------------------------------- In ip_output() skb->dev is updated from the skb_dst(skb)->dev this can become invalid when the interface is unregistered and freed, Introduced new skb_dst_dev_rcu() function to be used instead of skb_dst_dev() within rcu_locks in ip_output.This will ensure that all the skb's associated with the dev being deregistered will be transnmitted out first, before freeing the dev. Given that ip_output() is called within an rcu_read_lock() critical section or from a bottom-half context, it is safe to introduce an RCU read-side critical section within it. Multiple panic call stacks were observed when UL traffic was run in concurrency with device deregistration from different functions, pasting one sample for reference. [496733.627565][T13385] Call trace: [496733.627570][T13385] bpf_prog_ce7c9180c3b128ea_cgroupskb_egres+0x24c/0x7f0 [496733.627581][T13385] __cgroup_bpf_run_filter_skb+0x128/0x498 [496733.627595][T13385] ip_finish_output+0xa4/0xf4 [496733.627605][T13385] ip_output+0x100/0x1a0 [496733.627613][T13385] ip_send_skb+0x68/0x100 [496733.627618][T13385] udp_send_skb+0x1c4/0x384 [496733.627625][T13385] udp_sendmsg+0x7b0/0x898 [496733.627631][T13385] inet_sendmsg+0x5c/0x7c [496733.627639][T13385] __sys_sendto+0x174/0x1e4 [496733.627647][T13385] __arm64_sys_sendto+0x28/0x3c [496733.627653][T13385] invoke_syscall+0x58/0x11c [496733.627662][T13385] el0_svc_common+0x88/0xf4 [496733.627669][T13385] do_el0_svc+0x2c/0xb0 [496733.627676][T13385] el0_svc+0x2c/0xa4 [496733.627683][T13385] el0t_64_sync_handler+0x68/0xb4 [496733.627689][T13385] el0t_64_sync+0x1a4/0x1a8 Changes in v3: - Replaced WARN_ON() with WARN_ON_ONCE(), as suggested by Willem de Bruijn. - Dropped legacy lines mistakenly pulled in from an outdated branch. Changes in v2: - Addressed review comments from Eric Dumazet - Used READ_ONCE() to prevent potential load/store tearing - Added skb_dst_dev_rcu() and used along with rcu_read_lock() in ip_output Signed-off-by: Sharath Chandra Vurukala <quic_sharathv@quicinc.com> Reviewed-by: Eric Dumazet <edumazet@google.com> Link: https://patch.msgid.link/20250730105118.GA26100@hu-sharathv-hyd.qualcomm.com Signed-off-by: Jakub Kicinski <kuba@kernel.org> Conflicts: net/ipv4/ip_output.c [conflicts due to not merge a74fc62eec15 ("ipv4: adopt dst_dev, skb_dst_dev and skb_dst_dev_net[_rcu]")] Signed-off-by: Dong Chenchen <dongchenchen2@huawei.com> --- include/net/dst.h | 12 ++++++++++++ net/ipv4/ip_output.c | 15 ++++++++++----- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/include/net/dst.h b/include/net/dst.h index 3e2ff1b9b361..96d2e4df5d30 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -584,11 +584,23 @@ static inline struct net_device *dst_dev(const struct dst_entry *dst) return READ_ONCE(dst->dev); } +static inline struct net_device *dst_dev_rcu(const struct dst_entry *dst) +{ + /* In the future, use rcu_dereference(dst->dev) */ + WARN_ON_ONCE(!rcu_read_lock_held()); + return READ_ONCE(dst->dev); +} + static inline struct net_device *skb_dst_dev(const struct sk_buff *skb) { return dst_dev(skb_dst(skb)); } +static inline struct net_device *skb_dst_dev_rcu(const struct sk_buff *skb) +{ + return dst_dev_rcu(skb_dst(skb)); +} + static inline struct net *skb_dst_dev_net(const struct sk_buff *skb) { return dev_net(skb_dst_dev(skb)); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 89f5f3b178e1..a03449ba9740 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -430,15 +430,20 @@ int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb) int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb) { - struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev; + struct net_device *dev, *indev = skb->dev; + int ret_val; + rcu_read_lock(); + dev = skb_dst_dev_rcu(skb); skb->dev = dev; skb->protocol = htons(ETH_P_IP); - return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, - net, sk, skb, indev, dev, - ip_finish_output, - !(IPCB(skb)->flags & IPSKB_REROUTED)); + ret_val = NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, + net, sk, skb, indev, dev, + ip_finish_output, + !(IPCB(skb)->flags & IPSKB_REROUTED)); + rcu_read_unlock(); + return ret_val; } EXPORT_SYMBOL(ip_output); -- 2.25.1
From: Eric Dumazet <edumazet@google.com> mainline inclusion from mainline-v6.18-rc1 commit caedcc5b6df1b2e2b5f39079e3369c1d4d5c5f50 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/ID3WJ1 CVE: CVE-2025-40074 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i... -------------------------------- Followup of commit 88fe14253e18 ("net: dst: add four helpers to annotate data-races around dst->dev"). We want to gradually add explicit RCU protection to dst->dev, including lockdep support. Add an union to alias dst->dev_rcu and dst->dev. Add dst_dev_net_rcu() helper. Fixes: 4a6ce2b6f2ec ("net: introduce a new function dst_dev_put()") Signed-off-by: Eric Dumazet <edumazet@google.com> Reviewed-by: David Ahern <dsahern@kernel.org> Link: https://patch.msgid.link/20250828195823.3958522-2-edumazet@google.com Signed-off-by: Jakub Kicinski <kuba@kernel.org> Conflicts: include/net/dst.h net/core/dst.c net/ipv4/route.c [commit a74fc62eec15 and 8a402bbe5476 are not backport] Signed-off-by: Dong Chenchen <dongchenchen2@huawei.com> --- include/net/dst.h | 16 +++++++++++----- net/core/dst.c | 2 +- net/ipv4/route.c | 4 ++-- 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/include/net/dst.h b/include/net/dst.h index 96d2e4df5d30..22d8d1e7b64c 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -25,7 +25,10 @@ struct sk_buff; struct dst_entry { - struct net_device *dev; + union { + struct net_device *dev; + struct net_device __rcu *dev_rcu; + }; struct dst_ops *ops; unsigned long _metrics; unsigned long expires; @@ -586,9 +589,12 @@ static inline struct net_device *dst_dev(const struct dst_entry *dst) static inline struct net_device *dst_dev_rcu(const struct dst_entry *dst) { - /* In the future, use rcu_dereference(dst->dev) */ - WARN_ON_ONCE(!rcu_read_lock_held()); - return READ_ONCE(dst->dev); + return rcu_dereference(dst->dev_rcu); +} + +static inline struct net *dst_dev_net_rcu(const struct dst_entry *dst) +{ + return dev_net_rcu(dst_dev_rcu(dst)); } static inline struct net_device *skb_dst_dev(const struct sk_buff *skb) @@ -608,7 +614,7 @@ static inline struct net *skb_dst_dev_net(const struct sk_buff *skb) static inline struct net *skb_dst_dev_net_rcu(const struct sk_buff *skb) { - return dev_net_rcu(skb_dst_dev(skb)); + return dev_net_rcu(skb_dst_dev_rcu(skb)); } struct dst_entry *dst_blackhole_check(struct dst_entry *dst, u32 cookie); diff --git a/net/core/dst.c b/net/core/dst.c index 455c8bddc523..6f7a2a4d3ad2 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -152,7 +152,7 @@ void dst_dev_put(struct dst_entry *dst) dst->ops->ifdown(dst, dev); dst->input = dst_discard; dst->output = dst_discard_out; - WRITE_ONCE(dst->dev, blackhole_netdev); + rcu_assign_pointer(dst->dev_rcu, blackhole_netdev); netdev_ref_replace(dev, blackhole_netdev, &dst->dev_tracker, GFP_ATOMIC); } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 97dc30a03dbf..00b956fa7784 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1033,7 +1033,7 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) return; rcu_read_lock(); - net = dev_net_rcu(dst->dev); + net = dst_dev_net_rcu(dst); if (mtu < net->ipv4.ip_rt_min_pmtu) { lock = true; mtu = min(old_mtu, net->ipv4.ip_rt_min_pmtu); @@ -1331,7 +1331,7 @@ static unsigned int ipv4_default_advmss(const struct dst_entry *dst) struct net *net; rcu_read_lock(); - net = dev_net_rcu(dst->dev); + net = dst_dev_net_rcu(dst); advmss = max_t(unsigned int, ipv4_mtu(dst) - header_size, net->ipv4.ip_rt_min_advmss); rcu_read_unlock(); -- 2.25.1
From: Eric Dumazet <edumazet@google.com> mainline inclusion from mainline-v6.18-rc1 commit 50c127a69cd6285300931853b352a1918cfa180f category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/ID3WJ1 CVE: CVE-2025-40074 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i... -------------------------------- Replace three dst_dev() with a lockdep enabled helper. Fixes: 4a6ce2b6f2ec ("net: introduce a new function dst_dev_put()") Signed-off-by: Eric Dumazet <edumazet@google.com> Reviewed-by: David Ahern <dsahern@kernel.org> Link: https://patch.msgid.link/20250828195823.3958522-7-edumazet@google.com Signed-off-by: Jakub Kicinski <kuba@kernel.org> Conflicts: net/ipv4/tcp_metrics.c [commit a74fc62eec15 and e7b9ecce562c are not backport] Signed-off-by: Dong Chenchen <dongchenchen2@huawei.com> --- net/ipv4/tcp_metrics.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index e0883ba709b0..16c358bf1a4f 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -170,7 +170,7 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst, bool reclaim = false; spin_lock_bh(&tcp_metrics_lock); - net = dev_net(dst->dev); + net = dst_dev_net_rcu(dst); /* While waiting for the spin-lock the cache might have been populated * with this entry and so we have to check again. @@ -273,7 +273,7 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req, return NULL; } - net = dev_net(dst->dev); + net = dst_dev_net_rcu(dst); hash ^= net_hash_mix(net); hash = hash_32(hash, tcp_metrics_hash_log); @@ -318,7 +318,7 @@ static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk, else return NULL; - net = dev_net(dst->dev); + net = dst_dev_net_rcu(dst); hash ^= net_hash_mix(net); hash = hash_32(hash, tcp_metrics_hash_log); -- 2.25.1
From: Eric Dumazet <edumazet@google.com> mainline inclusion from mainline-v6.18-rc1 commit 6ad8de3cefdb6ffa6708b21c567df0dbf82c43a8 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/ID3WJ1 CVE: CVE-2025-40074 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i... -------------------------------- Change icmpv4_xrlim_allow(), ip_defrag() to prevent possible UAF. Change ipmr_prepare_xmit(), ipmr_queue_fwd_xmit(), ip_mr_output(), ipv4_neigh_lookup() to use lockdep enabled dst_dev_rcu(). Fixes: 4a6ce2b6f2ec ("net: introduce a new function dst_dev_put()") Signed-off-by: Eric Dumazet <edumazet@google.com> Reviewed-by: David Ahern <dsahern@kernel.org> Link: https://patch.msgid.link/20250828195823.3958522-9-edumazet@google.com Signed-off-by: Jakub Kicinski <kuba@kernel.org> Conflicts: net/ipv4/icmp.c net/ipv4/ip_fragment.c net/ipv4/ipmr.c net/ipv4/route.c [commit a74fc62eec15 is not backport. commit b2e653bcff0f and 3b7bc938e0ad and 35bec72a24ac(ipmr.c) are not backport] Signed-off-by: Dong Chenchen <dongchenchen2@huawei.com> --- net/ipv4/icmp.c | 10 ++++++---- net/ipv4/ip_fragment.c | 9 +++++++-- net/ipv4/ipmr.c | 2 +- net/ipv4/route.c | 4 ++-- 4 files changed, 16 insertions(+), 9 deletions(-) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 745dae4dd19f..0429194342f3 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -315,22 +315,24 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt, { struct dst_entry *dst = &rt->dst; struct inet_peer *peer; + struct net_device *dev; bool rc = true; if (!apply_ratelimit) return true; /* No rate limit on loopback */ - if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) + rcu_read_lock(); + dev = dst_dev_rcu(dst); + if (dev && (dev->flags&IFF_LOOPBACK)) goto out; - rcu_read_lock(); peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, - l3mdev_master_ifindex_rcu(dst->dev)); + l3mdev_master_ifindex_rcu(dev)); rc = inet_peer_xrlim_allow(peer, READ_ONCE(net->ipv4.sysctl_icmp_ratelimit)); - rcu_read_unlock(); out: + rcu_read_unlock(); if (!rc) __ICMP_INC_STATS(net, ICMP_MIB_RATELIMITHOST); else diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 877d1e03150c..652ef3245c81 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -488,13 +488,16 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb, /* Process an incoming IP datagram fragment. */ int ip_defrag(struct net *net, struct sk_buff *skb, u32 user) { - struct net_device *dev = skb->dev ? : skb_dst(skb)->dev; - int vif = l3mdev_master_ifindex_rcu(dev); + struct net_device *dev; struct ipq *qp; + int vif; __IP_INC_STATS(net, IPSTATS_MIB_REASMREQDS); /* Lookup (or create) queue header */ + rcu_read_lock(); + dev = skb->dev ? : skb_dst_dev_rcu(skb); + vif = l3mdev_master_ifindex_rcu(dev); qp = ip_find(net, ip_hdr(skb), user, vif); if (qp) { int ret; @@ -504,9 +507,11 @@ int ip_defrag(struct net *net, struct sk_buff *skb, u32 user) ret = ip_frag_queue(qp, skb); spin_unlock(&qp->q.lock); + rcu_read_unlock(); ipq_put(qp); return ret; } + rcu_read_unlock(); __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS); kfree_skb(skb); diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index af9412a507cf..f39cd565569a 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1893,7 +1893,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, goto out_free; } - dev = rt->dst.dev; + dev = dst_dev_rcu(&rt->dst); if (skb->len+encap > dst_mtu(&rt->dst) && (ntohs(iph->frag_off) & IP_DF)) { /* Do not fragment multicasts. Alas, IPv4 does not diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 00b956fa7784..79c354896554 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -412,11 +412,11 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, const void *daddr) { const struct rtable *rt = container_of(dst, struct rtable, dst); - struct net_device *dev = dst->dev; + struct net_device *dev; struct neighbour *n; rcu_read_lock(); - + dev = dst_dev_rcu(dst); if (likely(rt->rt_gw_family == AF_INET)) { n = ip_neigh_gw4(dev, rt->rt_gw4); } else if (rt->rt_gw_family == AF_INET6) { -- 2.25.1
From: Eric Dumazet <edumazet@google.com> mainline inclusion from mainline-v6.18-rc1 commit 9085e56501d93af9f2d7bd16f7fcfacdde47b99c category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/ID3WJ1 CVE: CVE-2025-40074 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i... -------------------------------- Use RCU in ip6_xmit() in order to use dst_dev_rcu() to prevent possible UAF. Fixes: 4a6ce2b6f2ec ("net: introduce a new function dst_dev_put()") Signed-off-by: Eric Dumazet <edumazet@google.com> Reviewed-by: David Ahern <dsahern@kernel.org> Link: https://patch.msgid.link/20250828195823.3958522-4-edumazet@google.com Signed-off-by: Jakub Kicinski <kuba@kernel.org> Conflicts: net/ipv6/ip6_output.c [commit 1caf27297215 is not backport] Signed-off-by: Dong Chenchen <dongchenchen2@huawei.com> --- net/ipv6/ip6_output.c | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index cd89a2b35dfb..66863043bdb2 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -261,35 +261,36 @@ bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np) int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority) { - struct net *net = sock_net(sk); const struct ipv6_pinfo *np = inet6_sk(sk); struct in6_addr *first_hop = &fl6->daddr; struct dst_entry *dst = skb_dst(skb); - struct net_device *dev = dst->dev; struct inet6_dev *idev = ip6_dst_idev(dst); struct hop_jumbo_hdr *hop_jumbo; int hoplen = sizeof(*hop_jumbo); + struct net *net = sock_net(sk); unsigned int head_room; + struct net_device *dev; struct ipv6hdr *hdr; u8 proto = fl6->flowi6_proto; int seg_len = skb->len; - int hlimit = -1; + int ret, hlimit = -1; u32 mtu; + rcu_read_lock(); + + dev = dst_dev_rcu(dst); head_room = sizeof(struct ipv6hdr) + hoplen + LL_RESERVED_SPACE(dev); if (opt) head_room += opt->opt_nflen + opt->opt_flen; if (unlikely(head_room > skb_headroom(skb))) { - /* Make sure idev stays alive */ - rcu_read_lock(); + /* idev stays alive while we hold rcu_read_lock(). */ skb = skb_expand_head(skb, head_room); if (!skb) { IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); - rcu_read_unlock(); - return -ENOBUFS; + ret = -ENOBUFS; + goto unlock; } - rcu_read_unlock(); } if (opt) { @@ -351,17 +352,21 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, * skb to its handler for processing */ skb = l3mdev_ip6_out((struct sock *)sk, skb); - if (unlikely(!skb)) - return 0; + if (unlikely(!skb)) { + ret = 0; + goto unlock; + } /* hooks should never assume socket lock is held. * we promote our socket to non const */ - return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, - net, (struct sock *)sk, skb, NULL, dev, - dst_output); + ret = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, + net, (struct sock *)sk, skb, NULL, dev, + dst_output); + goto unlock; } + ret = -EMSGSIZE; skb->dev = dev; /* ipv6_local_error() does not require socket lock, * we promote our socket to non const @@ -370,7 +375,9 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, IP6_INC_STATS(net, idev, IPSTATS_MIB_FRAGFAILS); kfree_skb(skb); - return -EMSGSIZE; +unlock: + rcu_read_unlock(); + return ret; } EXPORT_SYMBOL(ip6_xmit); -- 2.25.1
hulk inclusion category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/ID3WJ1 CVE: CVE-2025-40074 -------------------------------- Use KBAI_REPLACE to fix kabi breakage of struct dst_entry for adding dev_rcu. Fixes: caedcc5b6df1 ("net: dst: introduce dst->dev_rcu") Signed-off-by: Dong Chenchen <dongchenchen2@huawei.com> --- include/net/dst.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/net/dst.h b/include/net/dst.h index 22d8d1e7b64c..ae8e31997016 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -25,10 +25,10 @@ struct sk_buff; struct dst_entry { - union { + KABI_REPLACE(struct net_device *dev, union { struct net_device *dev; struct net_device __rcu *dev_rcu; - }; + }) struct dst_ops *ops; unsigned long _metrics; unsigned long expires; -- 2.25.1
From: Kuniyuki Iwashima <kuniyu@google.com> mainline inclusion from mainline-v6.18-rc1 commit c65f27b9c3be2269918e1cbad6d8884741f835c5 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/ID3WJ1 CVE: CVE-2025-40074 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i... -------------------------------- get_netdev_for_sock() is called during setsockopt(), so not under RCU. Using sk_dst_get(sk)->dev could trigger UAF. Let's use __sk_dst_get() and dst_dev_rcu(). Note that the only ->ndo_sk_get_lower_dev() user is bond_sk_get_lower_dev(), which uses RCU. Fixes: e8f69799810c ("net/tls: Add generic NIC offload infrastructure") Signed-off-by: Kuniyuki Iwashima <kuniyu@google.com> Reviewed-by: Eric Dumazet <edumazet@google.com> Reviewed-by: Sabrina Dubroca <sd@queasysnail.net> Link: https://patch.msgid.link/20250916214758.650211-6-kuniyu@google.com Signed-off-by: Jakub Kicinski <kuba@kernel.org> Signed-off-by: Dong Chenchen <dongchenchen2@huawei.com> --- net/tls/tls_device.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 8c94c926606a..571c3e3daea7 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -125,17 +125,19 @@ static void tls_device_queue_ctx_destruction(struct tls_context *ctx) /* We assume that the socket is already connected */ static struct net_device *get_netdev_for_sock(struct sock *sk) { - struct dst_entry *dst = sk_dst_get(sk); - struct net_device *netdev = NULL; + struct net_device *dev, *lowest_dev = NULL; + struct dst_entry *dst; - if (likely(dst)) { - netdev = netdev_sk_get_lowest_dev(dst->dev, sk); - dev_hold(netdev); + rcu_read_lock(); + dst = __sk_dst_get(sk); + dev = dst ? dst_dev_rcu(dst) : NULL; + if (likely(dev)) { + lowest_dev = netdev_sk_get_lowest_dev(dev, sk); + dev_hold(lowest_dev); } + rcu_read_unlock(); - dst_release(dst); - - return netdev; + return lowest_dev; } static void destroy_record(struct tls_record_info *record) -- 2.25.1
From: Eric Dumazet <edumazet@google.com> mainline inclusion from mainline-v6.18-rc1 commit 11709573cc4e48dc34c80fc7ab9ce5b159e29695 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/ID3WJ1 CVE: CVE-2025-40074 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i... -------------------------------- Use RCU in ip6_output() in order to use dst_dev_rcu() to prevent possible UAF. We can remove rcu_read_lock()/rcu_read_unlock() pairs from ip6_finish_output2(). Fixes: 4a6ce2b6f2ec ("net: introduce a new function dst_dev_put()") Signed-off-by: Eric Dumazet <edumazet@google.com> Reviewed-by: David Ahern <dsahern@kernel.org> Link: https://patch.msgid.link/20250828195823.3958522-5-edumazet@google.com Signed-off-by: Jakub Kicinski <kuba@kernel.org> Signed-off-by: Dong Chenchen <dongchenchen2@huawei.com> --- net/ipv6/ip6_output.c | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 66863043bdb2..f39cddd44e3e 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -60,7 +60,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); - struct net_device *dev = dst->dev; + struct net_device *dev = dst_dev_rcu(dst); struct inet6_dev *idev = ip6_dst_idev(dst); unsigned int hh_len = LL_RESERVED_SPACE(dev); const struct in6_addr *daddr, *nexthop; @@ -70,15 +70,12 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * /* Be paranoid, rather than too clever. */ if (unlikely(hh_len > skb_headroom(skb)) && dev->header_ops) { - /* Make sure idev stays alive */ - rcu_read_lock(); + /* idev stays alive because we hold rcu_read_lock(). */ skb = skb_expand_head(skb, hh_len); if (!skb) { IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); - rcu_read_unlock(); return -ENOMEM; } - rcu_read_unlock(); } hdr = ipv6_hdr(skb); @@ -123,7 +120,6 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); - rcu_read_lock(); nexthop = rt6_nexthop(dst_rt6_info(dst), daddr); neigh = __ipv6_neigh_lookup_noref(dev, nexthop); @@ -131,7 +127,6 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * if (unlikely(!neigh)) neigh = __neigh_create(&nd_tbl, nexthop, dev, false); if (IS_ERR(neigh)) { - rcu_read_unlock(); IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTNOROUTES); kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_CREATEFAIL); return -EINVAL; @@ -139,7 +134,6 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * } sock_confirm_neigh(skb, neigh); ret = neigh_output(neigh, skb, false); - rcu_read_unlock(); return ret; } @@ -225,22 +219,30 @@ static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *s int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb) { - struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev; - struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); + struct dst_entry *dst = skb_dst(skb); + struct net_device *dev, *indev = skb->dev; + struct inet6_dev *idev; + int ret; skb->protocol = htons(ETH_P_IPV6); + rcu_read_lock(); + dev = dst_dev_rcu(dst); + idev = ip6_dst_idev(dst); skb->dev = dev; if (unlikely(!idev || READ_ONCE(idev->cnf.disable_ipv6))) { IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); + rcu_read_unlock(); kfree_skb_reason(skb, SKB_DROP_REASON_IPV6DISABLED); return 0; } - return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, - net, sk, skb, indev, dev, - ip6_finish_output, - !(IP6CB(skb)->flags & IP6SKB_REROUTED)); + ret = NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, + net, sk, skb, indev, dev, + ip6_finish_output, + !(IP6CB(skb)->flags & IP6SKB_REROUTED)); + rcu_read_unlock(); + return ret; } EXPORT_SYMBOL(ip6_output); -- 2.25.1
From: Eric Dumazet <edumazet@google.com> mainline inclusion from mainline-v6.14-rc3 commit 071d8012869b6af352acca346ade13e7be90a49f category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/ID3WJ1 CVE: CVE-2025-40074 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i... -------------------------------- ip_dst_mtu_maybe_forward() must use RCU protection to make sure the net structure it reads does not disappear. Fixes: f87c10a8aa1e8 ("ipv4: introduce ip_dst_mtu_maybe_forward and protect forwarding path against pmtu spoofing") Signed-off-by: Eric Dumazet <edumazet@google.com> Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com> Link: https://patch.msgid.link/20250205155120.1676781-4-edumazet@google.com Signed-off-by: Jakub Kicinski <kuba@kernel.org> Conflicts: include/net/ip.h [commit 1caf27297215 is not backport] Signed-off-by: Dong Chenchen <dongchenchen2@huawei.com> --- include/net/ip.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/include/net/ip.h b/include/net/ip.h index a21f2af40be0..268fbcbfd276 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -456,9 +456,12 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst, bool forwarding) { const struct rtable *rt = container_of(dst, struct rtable, dst); - struct net *net = dev_net(dst->dev); - unsigned int mtu; + unsigned int mtu, res; + struct net *net; + + rcu_read_lock(); + net = dev_net_rcu(dst->dev); if (READ_ONCE(net->ipv4.sysctl_ip_fwd_use_pmtu) || ip_mtu_locked(dst) || !forwarding) { @@ -482,7 +485,11 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst, out: mtu = min_t(unsigned int, mtu, IP_MAX_MTU); - return mtu - lwtunnel_headroom(dst->lwtstate, mtu); + res = mtu - lwtunnel_headroom(dst->lwtstate, mtu); + + rcu_read_unlock(); + + return res; } static inline unsigned int ip_skb_dst_mtu(struct sock *sk, -- 2.25.1
From: Eric Dumazet <edumazet@google.com> mainline inclusion from mainline-v6.18-rc1 commit 99a2ace61b211b0be861b07fbaa062fca4b58879 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/ID3WJ1 CVE: CVE-2025-40074 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i... -------------------------------- Use RCU to protect accesses to dst->dev from sk_setup_caps() and sk_dst_gso_max_size(). Also use dst_dev_rcu() in ip6_dst_mtu_maybe_forward(), and ip_dst_mtu_maybe_forward(). ip4_dst_hoplimit() can use dst_dev_net_rcu(). Fixes: 4a6ce2b6f2ec ("net: introduce a new function dst_dev_put()") Signed-off-by: Eric Dumazet <edumazet@google.com> Reviewed-by: David Ahern <dsahern@kernel.org> Link: https://patch.msgid.link/20250828195823.3958522-6-edumazet@google.com Signed-off-by: Jakub Kicinski <kuba@kernel.org> Conflicts: include/net/ip.h include/net/ip6_route.h include/net/route.h net/core/sock.c [commit 1caf27297215, 15492700ac41 and 22d6c9eebf2e are not backport] Signed-off-by: Dong Chenchen <dongchenchen2@huawei.com> --- include/net/ip.h | 6 ++++-- include/net/ip6_route.h | 2 +- include/net/route.h | 2 +- net/core/sock.c | 16 ++++++++++------ 4 files changed, 16 insertions(+), 10 deletions(-) diff --git a/include/net/ip.h b/include/net/ip.h index 268fbcbfd276..2fa407767f33 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -456,12 +456,14 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst, bool forwarding) { const struct rtable *rt = container_of(dst, struct rtable, dst); + const struct net_device *dev; unsigned int mtu, res; struct net *net; rcu_read_lock(); - net = dev_net_rcu(dst->dev); + dev = dst_dev_rcu(dst); + net = dev_net_rcu(dev); if (READ_ONCE(net->ipv4.sysctl_ip_fwd_use_pmtu) || ip_mtu_locked(dst) || !forwarding) { @@ -475,7 +477,7 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst, if (mtu) goto out; - mtu = READ_ONCE(dst->dev->mtu); + mtu = READ_ONCE(dev->mtu); if (unlikely(ip_mtu_locked(dst))) { if (rt->rt_uses_gateway && mtu > 576) diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 392232fcd703..dca39485e672 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -333,7 +333,7 @@ static inline unsigned int ip6_dst_mtu_maybe_forward(const struct dst_entry *dst mtu = IPV6_MIN_MTU; rcu_read_lock(); - idev = __in6_dev_get(dst->dev); + idev = __in6_dev_get(dst_dev_rcu(dst)); if (idev) mtu = idev->cnf.mtu6; rcu_read_unlock(); diff --git a/include/net/route.h b/include/net/route.h index 4e657ca38aae..7194483dae23 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -362,7 +362,7 @@ static inline int ip4_dst_hoplimit(const struct dst_entry *dst) const struct net *net; rcu_read_lock(); - net = dev_net_rcu(dst->dev); + net = dst_dev_net_rcu(dst); hoplimit = READ_ONCE(net->ipv4.sysctl_ip_default_ttl); rcu_read_unlock(); } diff --git a/net/core/sock.c b/net/core/sock.c index cbf46bfa5510..d63f5ee49054 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2435,7 +2435,7 @@ void sk_free_unlock_clone(struct sock *sk) } EXPORT_SYMBOL_GPL(sk_free_unlock_clone); -static u32 sk_dst_gso_max_size(struct sock *sk, struct dst_entry *dst) +static u32 sk_dst_gso_max_size(struct sock *sk, const struct net_device *dev) { bool is_ipv6 = false; u32 max_size; @@ -2445,8 +2445,8 @@ static u32 sk_dst_gso_max_size(struct sock *sk, struct dst_entry *dst) !ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)); #endif /* pairs with the WRITE_ONCE() in netif_set_gso(_ipv4)_max_size() */ - max_size = is_ipv6 ? READ_ONCE(dst_dev(dst)->gso_max_size) : - READ_ONCE(dst_dev(dst)->gso_ipv4_max_size); + max_size = is_ipv6 ? READ_ONCE(dev->gso_max_size) : + READ_ONCE(dev->gso_ipv4_max_size); if (max_size > GSO_LEGACY_MAX_SIZE && !sk_is_tcp(sk)) max_size = GSO_LEGACY_MAX_SIZE; @@ -2455,9 +2455,12 @@ static u32 sk_dst_gso_max_size(struct sock *sk, struct dst_entry *dst) void sk_setup_caps(struct sock *sk, struct dst_entry *dst) { + const struct net_device *dev; u32 max_segs = 1; - sk->sk_route_caps = dst_dev(dst)->features; + rcu_read_lock(); + dev = dst_dev_rcu(dst); + sk->sk_route_caps = dev->features; if (sk_is_tcp(sk)) sk->sk_route_caps |= NETIF_F_GSO; if (sk->sk_route_caps & NETIF_F_GSO) @@ -2469,13 +2472,14 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst) sk->sk_route_caps &= ~NETIF_F_GSO_MASK; } else { sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM; - sk->sk_gso_max_size = sk_dst_gso_max_size(sk, dst); + sk->sk_gso_max_size = sk_dst_gso_max_size(sk, dev); /* pairs with the WRITE_ONCE() in netif_set_gso_max_segs() */ - max_segs = max_t(u32, READ_ONCE(dst_dev(dst)->gso_max_segs), 1); + max_segs = max_t(u32, READ_ONCE(dev->gso_max_segs), 1); } } sk->sk_gso_max_segs = max_segs; sk_dst_set(sk, dst); + rcu_read_unlock(); } EXPORT_SYMBOL_GPL(sk_setup_caps); -- 2.25.1
participants (2)
-
Dong Chenchen -
patchwork bot