From: Eric Dumazet edumazet@google.com
net-next inclusion from net-next-v5.17-rc5 commit e5f80fcf869a18cc750d6b350bbfac82df292e0b category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I4VZN0?from=project-issue
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git/commit/?...
--------------------------------
IPv6 addrconf notifiers wants the loopback device to be the last device being dismantled at netns deletion.
This caused many limitations and work arounds.
Back in linux-5.3, Mahesh added a per host blackhole_netdev that can be used whenever we need to make sure objects no longer refer to a disappearing device.
If we attach to blackhole_netdev an ip6_ptr (allocate an idev), then we can use this special device (which is never freed) in place of the loopback_dev (which can be freed).
This will permit improvements in netdev_run_todo() and other parts of the stack where had steps to make sure loopback_dev was the last device to disappear.
Signed-off-by: Eric Dumazet edumazet@google.com Cc: Mahesh Bandewar maheshb@google.com Signed-off-by: David S. Miller davem@davemloft.net Conflicts: net/ipv6/addrconf.c Signed-off-by: Ziyang Xuan william.xuanziyang@huawei.com Reviewed-by: Wei Yongjun weiyongjun1@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- net/ipv6/addrconf.c | 78 +++++++++++++++++++-------------------------- net/ipv6/route.c | 21 +++++------- 2 files changed, 40 insertions(+), 59 deletions(-)
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 29526937077b..71ee4aec4af8 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -368,7 +368,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
ASSERT_RTNL();
- if (dev->mtu < IPV6_MIN_MTU) + if (dev->mtu < IPV6_MIN_MTU && dev != blackhole_netdev) return ERR_PTR(-EINVAL);
ndev = kzalloc(sizeof(struct inet6_dev), GFP_KERNEL); @@ -395,21 +395,22 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) /* We refer to the device */ dev_hold(dev);
- if (snmp6_alloc_dev(ndev) < 0) { - netdev_dbg(dev, "%s: cannot allocate memory for statistics\n", - __func__); - neigh_parms_release(&nd_tbl, ndev->nd_parms); - dev_put(dev); - kfree(ndev); - return ERR_PTR(err); - } + if (dev != blackhole_netdev) { + if (snmp6_alloc_dev(ndev) < 0) { + netdev_dbg(dev, "%s: cannot allocate memory for statistics\n", + __func__); + neigh_parms_release(&nd_tbl, ndev->nd_parms); + dev_put(dev); + kfree(ndev); + return ERR_PTR(err); + }
- if (snmp6_register_dev(ndev) < 0) { - netdev_dbg(dev, "%s: cannot create /proc/net/dev_snmp6/%s\n", - __func__, dev->name); - goto err_release; + if (snmp6_register_dev(ndev) < 0) { + netdev_dbg(dev, "%s: cannot create /proc/net/dev_snmp6/%s\n", + __func__, dev->name); + goto err_release; + } } - /* One reference from device. */ refcount_set(&ndev->refcnt, 1);
@@ -440,25 +441,28 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
ipv6_mc_init_dev(ndev); ndev->tstamp = jiffies; - err = addrconf_sysctl_register(ndev); - if (err) { - ipv6_mc_destroy_dev(ndev); - snmp6_unregister_dev(ndev); - goto err_release; + if (dev != blackhole_netdev) { + err = addrconf_sysctl_register(ndev); + if (err) { + ipv6_mc_destroy_dev(ndev); + snmp6_unregister_dev(ndev); + goto err_release; + } } /* protected by rtnl_lock */ rcu_assign_pointer(dev->ip6_ptr, ndev);
- /* Join interface-local all-node multicast group */ - ipv6_dev_mc_inc(dev, &in6addr_interfacelocal_allnodes); - - /* Join all-node multicast group */ - ipv6_dev_mc_inc(dev, &in6addr_linklocal_allnodes); + if (dev != blackhole_netdev) { + /* Join interface-local all-node multicast group */ + ipv6_dev_mc_inc(dev, &in6addr_interfacelocal_allnodes);
- /* Join all-router multicast group if forwarding is set */ - if (ndev->cnf.forwarding && (dev->flags & IFF_MULTICAST)) - ipv6_dev_mc_inc(dev, &in6addr_linklocal_allrouters); + /* Join all-node multicast group */ + ipv6_dev_mc_inc(dev, &in6addr_linklocal_allnodes);
+ /* Join all-router multicast group if forwarding is set */ + if (ndev->cnf.forwarding && (dev->flags & IFF_MULTICAST)) + ipv6_dev_mc_inc(dev, &in6addr_linklocal_allrouters); + } return ndev;
err_release: @@ -7115,26 +7119,8 @@ int __init addrconf_init(void) goto out_nowq; }
- /* The addrconf netdev notifier requires that loopback_dev - * has it's ipv6 private information allocated and setup - * before it can bring up and give link-local addresses - * to other devices which are up. - * - * Unfortunately, loopback_dev is not necessarily the first - * entry in the global dev_base list of net devices. In fact, - * it is likely to be the very last entry on that list. - * So this causes the notifier registry below to try and - * give link-local addresses to all devices besides loopback_dev - * first, then loopback_dev, which cases all the non-loopback_dev - * devices to fail to get a link-local address. - * - * So, as a temporary fix, allocate the ipv6 structure for - * loopback_dev first by hand. - * Longer term, all of the dependencies ipv6 has upon the loopback - * device and it being up should be removed. - */ rtnl_lock(); - idev = ipv6_add_dev(init_net.loopback_dev); + idev = ipv6_add_dev(blackhole_netdev); rtnl_unlock(); if (IS_ERR(idev)) { err = PTR_ERR(idev); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 654bf4ca6126..668b36b42699 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -156,14 +156,10 @@ void rt6_uncached_list_del(struct rt6_info *rt) } }
-static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev) +static void rt6_uncached_list_flush_dev(struct net_device *dev) { - struct net_device *loopback_dev = net->loopback_dev; int cpu;
- if (dev == loopback_dev) - return; - for_each_possible_cpu(cpu) { struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu); struct rt6_info *rt; @@ -174,7 +170,7 @@ static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev) struct net_device *rt_dev = rt->dst.dev;
if (rt_idev->dev == dev) { - rt->rt6i_idev = in6_dev_get(loopback_dev); + rt->rt6i_idev = in6_dev_get(blackhole_netdev); in6_dev_put(rt_idev); }
@@ -372,13 +368,12 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, { struct rt6_info *rt = (struct rt6_info *)dst; struct inet6_dev *idev = rt->rt6i_idev; - struct net_device *loopback_dev = - dev_net(dev)->loopback_dev;
- if (idev && idev->dev != loopback_dev) { - struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev); - if (loopback_idev) { - rt->rt6i_idev = loopback_idev; + if (idev && idev->dev != blackhole_netdev) { + struct inet6_dev *blackhole_idev = in6_dev_get(blackhole_netdev); + + if (blackhole_idev) { + rt->rt6i_idev = blackhole_idev; in6_dev_put(idev); } } @@ -4799,7 +4794,7 @@ void rt6_sync_down_dev(struct net_device *dev, unsigned long event) void rt6_disable_ip(struct net_device *dev, unsigned long event) { rt6_sync_down_dev(dev, event); - rt6_uncached_list_flush_dev(dev_net(dev), dev); + rt6_uncached_list_flush_dev(dev); neigh_ifdown(&nd_tbl, dev); }