From: Juan Zhou zhoujuan51@h-partners.com
Fixed some bond-related errors.
Junxian Huang (5): RDMA/hns: Fix a memory leak error when bond clear failed RDMA/hns: Add functions to obtain netdev and bus_num from an hr_dev RDMA/hns: Fix wild pointer error of RoCE bonding when rmmod hns3 RDMA/hns: Fix the device loss after unbinding RoCE bond resource slave RDMA/hns: Fix missing cleanup when bond_grp becomes invalid
drivers/infiniband/hw/hns/hns_roce_ah.c | 1 - drivers/infiniband/hw/hns/hns_roce_bond.c | 225 +++++++++++++------- drivers/infiniband/hw/hns/hns_roce_bond.h | 10 +- drivers/infiniband/hw/hns/hns_roce_device.h | 13 ++ drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 29 ++- drivers/infiniband/hw/hns/hns_roce_main.c | 64 ++++-- drivers/infiniband/hw/hns/hns_roce_pd.c | 1 - drivers/infiniband/hw/hns/hns_roce_qp.c | 5 +- drivers/infiniband/hw/hns/hns_roce_srq.c | 1 - 9 files changed, 236 insertions(+), 113 deletions(-)
From: Junxian Huang huangjunxian6@hisilicon.com
driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I7WHE3
--------------------------------------------------------------------------
During bond clear, the slaves will be re-init to normal RDMA devices. If the re-init failed, the current bond clear work returns directly without removing bond id and kfree bond group, leading to a memory leak.
This patch adds a bus_num fields in 'struct hns_roce_bond_group' so that the bond id removal no longer depends on 'bond_grp->main_hr_dev', and the memory can still be released safely even if the main_hr_dev re-init failed.
Fixes: e62a20278f18 ("RDMA/hns: support RoCE bonding") Signed-off-by: Junxian Huang huangjunxian6@hisilicon.com --- drivers/infiniband/hw/hns/hns_roce_bond.c | 32 ++++++++++++++--------- drivers/infiniband/hw/hns/hns_roce_bond.h | 3 ++- 2 files changed, 21 insertions(+), 14 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_bond.c b/drivers/infiniband/hw/hns/hns_roce_bond.c index 9f642b0adcb7..95b8988ff72e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_bond.c +++ b/drivers/infiniband/hw/hns/hns_roce_bond.c @@ -238,23 +238,26 @@ static void hns_roce_clear_bond(struct hns_roce_bond_group *bond_grp) u8 main_func_idx = PCI_FUNC(bond_grp->main_hr_dev->pci_dev->devfn); struct hns_roce_dev *hr_dev; struct net_device *net_dev; - int i; + int i, ret;
bond_grp->bond_state = HNS_ROCE_BOND_NOT_BONDED; + bond_grp->main_hr_dev = NULL;
hns_roce_bond_uninit_client(bond_grp, main_func_idx);
for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) { net_dev = bond_grp->bond_func_info[i].net_dev; - if (net_dev) + if (net_dev) { hr_dev = hns_roce_bond_init_client(bond_grp, i); + if (hr_dev) + bond_grp->main_hr_dev = hr_dev; + } } - if (!hr_dev) - return;
- bond_grp->main_hr_dev = hr_dev; - hns_roce_cleanup_bond(bond_grp); - ibdev_info(&hr_dev->ib_dev, "RoCE clear bond finished!\n"); + ret = hns_roce_cleanup_bond(bond_grp); + if (!ret) + ibdev_info(&bond_grp->main_hr_dev->ib_dev, + "RoCE clear bond finished!\n"); }
static void hns_roce_slave_changestate(struct hns_roce_bond_group *bond_grp) @@ -444,7 +447,7 @@ static struct hns_roce_die_info *alloc_die_info(int bus_num)
static int alloc_bond_id(struct hns_roce_bond_group *bond_grp) { - int bus_num = bond_grp->main_hr_dev->pci_dev->bus->number; + u8 bus_num = bond_grp->bus_num; struct hns_roce_die_info *die_info = xa_load(&roce_bond_xa, bus_num); int i;
@@ -491,23 +494,25 @@ static int remove_bond_id(int bus_num, u8 bond_id) return 0; }
-void hns_roce_cleanup_bond(struct hns_roce_bond_group *bond_grp) +int hns_roce_cleanup_bond(struct hns_roce_bond_group *bond_grp) { int ret;
- ret = hns_roce_cmd_bond(bond_grp, HNS_ROCE_CLEAR_BOND); + ret = bond_grp->main_hr_dev ? + hns_roce_cmd_bond(bond_grp, HNS_ROCE_CLEAR_BOND) : -EIO; if (ret) ibdev_err(&bond_grp->main_hr_dev->ib_dev, - "failed to clear RoCE bond!\n"); + "failed to clear RoCE bond, ret = %d.\n", ret);
cancel_delayed_work(&bond_grp->bond_work); - ret = remove_bond_id(bond_grp->main_hr_dev->pci_dev->bus->number, - bond_grp->bond_id); + ret = remove_bond_id(bond_grp->bus_num, bond_grp->bond_id); if (ret) ibdev_err(&bond_grp->main_hr_dev->ib_dev, "failed to remove bond ID %d, ret = %d.\n", bond_grp->bond_id, ret); kfree(bond_grp); + + return ret; }
static bool hns_roce_bond_lowerstate_event(struct hns_roce_dev *hr_dev, @@ -640,6 +645,7 @@ static struct hns_roce_bond_group *hns_roce_alloc_bond_grp(struct hns_roce_dev * bond_grp->main_hr_dev = main_hr_dev; bond_grp->bond_ready = false; bond_grp->bond_state = HNS_ROCE_BOND_NOT_BONDED; + bond_grp->bus_num = main_hr_dev->pci_dev->bus->number;
ret = alloc_bond_id(bond_grp); if (ret) { diff --git a/drivers/infiniband/hw/hns/hns_roce_bond.h b/drivers/infiniband/hw/hns/hns_roce_bond.h index 8f637b551f25..94ee5bf36aa2 100644 --- a/drivers/infiniband/hw/hns/hns_roce_bond.h +++ b/drivers/infiniband/hw/hns/hns_roce_bond.h @@ -57,6 +57,7 @@ struct hns_roce_bond_group { u32 active_slave_map; u32 slave_map_diff; u8 bond_id; + u8 bus_num; struct bonding *bond; bool bond_ready; enum hns_roce_bond_state bond_state; @@ -77,7 +78,7 @@ struct hns_roce_die_info { int hns_roce_bond_init(struct hns_roce_dev *hr_dev); int hns_roce_bond_event(struct notifier_block *self, unsigned long event, void *ptr); -void hns_roce_cleanup_bond(struct hns_roce_bond_group *bond_grp); +int hns_roce_cleanup_bond(struct hns_roce_bond_group *bond_grp); bool hns_roce_bond_is_active(struct hns_roce_dev *hr_dev); struct net_device *hns_roce_get_bond_netdev(struct hns_roce_dev *hr_dev); struct hns_roce_bond_group *hns_roce_get_bond_grp(struct hns_roce_dev *hr_dev);
From: Junxian Huang huangjunxian6@hisilicon.com
driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I7WHE3
--------------------------------------------------------------------------
Add 2 inline functions to obtain netdev and bus_num from an hr_dev to improve readability.
Signed-off-by: Junxian Huang huangjunxian6@hisilicon.com --- drivers/infiniband/hw/hns/hns_roce_ah.c | 1 - drivers/infiniband/hw/hns/hns_roce_bond.c | 15 +++++++------- drivers/infiniband/hw/hns/hns_roce_device.h | 12 +++++++++++ drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 2 +- drivers/infiniband/hw/hns/hns_roce_main.c | 22 ++++++++++----------- drivers/infiniband/hw/hns/hns_roce_pd.c | 1 - drivers/infiniband/hw/hns/hns_roce_qp.c | 5 +++-- drivers/infiniband/hw/hns/hns_roce_srq.c | 1 - 8 files changed, 34 insertions(+), 25 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c index c92179da38bd..f4ceaeba5601 100644 --- a/drivers/infiniband/hw/hns/hns_roce_ah.c +++ b/drivers/infiniband/hw/hns/hns_roce_ah.c @@ -30,7 +30,6 @@ * SOFTWARE. */
-#include <linux/pci.h> #include <rdma/ib_addr.h> #include <rdma/ib_cache.h> #include "hnae3.h" diff --git a/drivers/infiniband/hw/hns/hns_roce_bond.c b/drivers/infiniband/hw/hns/hns_roce_bond.c index 95b8988ff72e..ec4da46b7ba3 100644 --- a/drivers/infiniband/hw/hns/hns_roce_bond.c +++ b/drivers/infiniband/hw/hns/hns_roce_bond.c @@ -3,7 +3,6 @@ * Copyright (c) 2022 Hisilicon Limited. */
-#include <linux/pci.h> #include "hnae3.h" #include "hns_roce_device.h" #include "hns_roce_hw_v2.h" @@ -64,7 +63,7 @@ static bool is_hrdev_bond_slave(struct hns_roce_dev *hr_dev, if (!netif_is_lag_master(upper_dev)) return false;
- if (upper_dev == get_upper_dev_from_ndev(hr_dev->iboe.netdevs[0])) + if (upper_dev == get_upper_dev_from_ndev(get_hr_netdev(hr_dev, 0))) return true;
bond_grp = hns_roce_get_bond_grp(hr_dev); @@ -77,7 +76,8 @@ static bool is_hrdev_bond_slave(struct hns_roce_dev *hr_dev, struct hns_roce_bond_group *hns_roce_get_bond_grp(struct hns_roce_dev *hr_dev) { struct hns_roce_die_info *die_info = - xa_load(&roce_bond_xa, hr_dev->pci_dev->bus->number); + xa_load(&roce_bond_xa, get_hr_bus_num(hr_dev)); + struct net_device *net_dev = get_hr_netdev(hr_dev, 0); struct hns_roce_bond_group *bond_grp; int i;
@@ -88,9 +88,8 @@ struct hns_roce_bond_group *hns_roce_get_bond_grp(struct hns_roce_dev *hr_dev) bond_grp = die_info->bgrps[i]; if (!bond_grp) continue; - if (is_netdev_bond_slave(hr_dev->iboe.netdevs[0], bond_grp) || - bond_grp->upper_dev == - get_upper_dev_from_ndev(hr_dev->iboe.netdevs[0])) + if (is_netdev_bond_slave(net_dev, bond_grp) || + (bond_grp->upper_dev == get_upper_dev_from_ndev(net_dev))) return bond_grp; }
@@ -697,10 +696,10 @@ static enum bond_support_type if (hr_dev) { slave_num++; if (bus_num == -1) - bus_num = hr_dev->pci_dev->bus->number; + bus_num = get_hr_bus_num(hr_dev); if (hr_dev->is_vf || pci_num_vf(hr_dev->pci_dev) > 0 || - bus_num != hr_dev->pci_dev->bus->number) { + bus_num != get_hr_bus_num(hr_dev)) { support = false; break; } diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 6df342ae34e3..bd07775f0856 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -33,6 +33,7 @@ #ifndef _HNS_ROCE_DEVICE_H #define _HNS_ROCE_DEVICE_H
+#include <linux/pci.h> #include <rdma/ib_verbs.h> #include <rdma/hns-abi.h> #include "hns_roce_bond.h" @@ -1269,6 +1270,17 @@ static inline enum ib_port_state get_port_state(struct net_device *net_dev) IB_PORT_ACTIVE : IB_PORT_DOWN; }
+static inline struct net_device *get_hr_netdev(struct hns_roce_dev *hr_dev, + u8 port) +{ + return hr_dev->iboe.netdevs[port]; +} + +static inline u8 get_hr_bus_num(struct hns_roce_dev *hr_dev) +{ + return hr_dev->pci_dev->bus->number; +} + void hns_roce_init_uar_table(struct hns_roce_dev *dev); int hns_roce_uar_alloc(struct hns_roce_dev *dev, struct hns_roce_uar *uar);
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 10488d1f9beb..1dadb7a3031c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -7526,7 +7526,7 @@ static void hns_roce_hw_v2_link_status_change(struct hnae3_handle *handle, return;
for (phy_port = 0; phy_port < hr_dev->caps.num_ports; phy_port++) - if (netdev == hr_dev->iboe.netdevs[phy_port]) + if (netdev == get_hr_netdev(hr_dev, phy_port)) break;
if (phy_port == hr_dev->caps.num_ports) diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 1dc747454e00..8f60395be9f7 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -32,7 +32,6 @@ */ #include <linux/acpi.h> #include <linux/module.h> -#include <linux/pci.h> #include <rdma/ib_addr.h> #include <rdma/ib_smi.h> #include <rdma/ib_user_verbs.h> @@ -61,7 +60,7 @@ static struct net_device *hns_roce_get_netdev(struct ib_device *ib_dev, rcu_read_lock();
if (!ndev) - ndev = hr_dev->iboe.netdevs[port_num - 1]; + ndev = get_hr_netdev(hr_dev, port_num - 1);
if (ndev) dev_hold(ndev); @@ -217,14 +216,14 @@ static int hns_roce_netdev_event(struct notifier_block *self,
static int hns_roce_setup_mtu_mac(struct hns_roce_dev *hr_dev) { + struct net_device *net_dev; int ret; u8 i;
for (i = 0; i < hr_dev->caps.num_ports; i++) { hr_dev->iboe.port_state[i] = IB_PORT_DOWN; - - ret = hns_roce_set_mac(hr_dev, i, - hr_dev->iboe.netdevs[i]->dev_addr); + net_dev = get_hr_netdev(hr_dev, i); + ret = hns_roce_set_mac(hr_dev, i, net_dev->dev_addr); if (ret) return ret; } @@ -310,7 +309,7 @@ static int hns_roce_query_port(struct ib_device *ib_dev, u8 port_num,
net_dev = hr_dev->hw->get_bond_netdev(hr_dev); if (!net_dev) - net_dev = hr_dev->iboe.netdevs[port]; + net_dev = get_hr_netdev(hr_dev, port); if (!net_dev) { spin_unlock_irqrestore(&hr_dev->iboe.lock, flags); dev_err(dev, "Find netdev %u failed!\n", port); @@ -961,11 +960,12 @@ static const struct ib_device_ops hns_roce_dev_restrack_ops = {
static int hns_roce_register_device(struct hns_roce_dev *hr_dev) { - int ret; struct hns_roce_ib_iboe *iboe = NULL; - struct ib_device *ib_dev = NULL; struct device *dev = hr_dev->dev; + struct ib_device *ib_dev = NULL; + struct net_device *net_dev; unsigned int i; + int ret;
iboe = &hr_dev->iboe; spin_lock_init(&iboe->lock); @@ -1049,11 +1049,11 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev) ib_dev->driver_def = hns_roce_uapi_defs;
for (i = 0; i < hr_dev->caps.num_ports; i++) { - if (!hr_dev->iboe.netdevs[i]) + net_dev = get_hr_netdev(hr_dev, i); + if (!net_dev) continue;
- ret = ib_device_set_netdev(ib_dev, hr_dev->iboe.netdevs[i], - i + 1); + ret = ib_device_set_netdev(ib_dev, net_dev, i + 1); if (ret) return ret; } diff --git a/drivers/infiniband/hw/hns/hns_roce_pd.c b/drivers/infiniband/hw/hns/hns_roce_pd.c index 7399963dc294..6c69e095aa01 100644 --- a/drivers/infiniband/hw/hns/hns_roce_pd.c +++ b/drivers/infiniband/hw/hns/hns_roce_pd.c @@ -30,7 +30,6 @@ * SOFTWARE. */
-#include <linux/pci.h> #include "hns_roce_device.h"
void hns_roce_init_pd_table(struct hns_roce_dev *hr_dev) diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 4d0c9332f896..00f82f4b19f6 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -31,7 +31,6 @@ * SOFTWARE. */
-#include <linux/pci.h> #include <rdma/ib_addr.h> #include <rdma/ib_umem.h> #include <rdma/uverbs_ioctl.h> @@ -1410,11 +1409,13 @@ static int check_mtu_validate(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct ib_qp_attr *attr, int attr_mask) { + struct net_device *net_dev; enum ib_mtu active_mtu; int p;
p = attr_mask & IB_QP_PORT ? (attr->port_num - 1) : hr_qp->port; - active_mtu = iboe_get_mtu(hr_dev->iboe.netdevs[p]->mtu); + net_dev = get_hr_netdev(hr_dev, p); + active_mtu = iboe_get_mtu(net_dev->mtu);
if ((hr_dev->caps.max_mtu >= IB_MTU_2048 && attr->path_mtu > hr_dev->caps.max_mtu) || diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index 320aaa24be01..511e0f79e9d7 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -3,7 +3,6 @@ * Copyright (c) 2018 Hisilicon Limited. */
-#include <linux/pci.h> #include <rdma/ib_umem.h> #include "hns_roce_device.h" #include "hns_roce_cmd.h"
From: Junxian Huang huangjunxian6@hisilicon.com
driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I7WHE3
--------------------------------------------------------------------------
When rmmod hns3, the uninit procedure is in this order: pf0 roce uninit instance, pf0 nic uninit instance, pf1 roce uninit instance, pf1 nic uninit instance, and so on.
During pf0 nic uninit instance, pf0 netdev is unregistered and RoCE bonding driver is will be notified by a bonding event. Then a clear-bond work will be scheduled.
At this time, the clear-bond work and pf1 roce uninit instance are being executed concurrently. As the clear-bond work modifies the instance state of pf1 earlier, pf1 roce uninit instance will return when the state is found changed. This leads to pf1 nic uninit instance fast enough to be completed before the clear-bond work. When the clear-bond work accesses pf1 nic resources which have been released, an error occurs.
To fix the error, add a new instance state to indicate an ongoing bond work involving bonding uninit. The roce driver uninit instance will wait for the completion of the bond work when the device being uninited is also in the procedure of bonding uninit to avoid concurrency and make sure the nic resources won't be released for the moment.
Fixes: e62a20278f18 ("RDMA/hns: support RoCE bonding") Signed-off-by: Junxian Huang huangjunxian6@hisilicon.com --- drivers/infiniband/hw/hns/hns_roce_bond.c | 137 ++++++++++++-------- drivers/infiniband/hw/hns/hns_roce_bond.h | 7 +- drivers/infiniband/hw/hns/hns_roce_device.h | 1 + drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 24 +++- drivers/infiniband/hw/hns/hns_roce_main.c | 11 +- 5 files changed, 122 insertions(+), 58 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_bond.c b/drivers/infiniband/hw/hns/hns_roce_bond.c index ec4da46b7ba3..19ec0940a7ea 100644 --- a/drivers/infiniband/hw/hns/hns_roce_bond.c +++ b/drivers/infiniband/hw/hns/hns_roce_bond.c @@ -56,6 +56,8 @@ static bool is_hrdev_bond_slave(struct hns_roce_dev *hr_dev, struct net_device *upper_dev) { struct hns_roce_bond_group *bond_grp; + struct net_device *net_dev; + u8 bus_num;
if (!hr_dev || !upper_dev) return false; @@ -63,21 +65,23 @@ static bool is_hrdev_bond_slave(struct hns_roce_dev *hr_dev, if (!netif_is_lag_master(upper_dev)) return false;
- if (upper_dev == get_upper_dev_from_ndev(get_hr_netdev(hr_dev, 0))) + net_dev = get_hr_netdev(hr_dev, 0); + bus_num = get_hr_bus_num(hr_dev); + + if (upper_dev == get_upper_dev_from_ndev(net_dev)) return true;
- bond_grp = hns_roce_get_bond_grp(hr_dev); + bond_grp = hns_roce_get_bond_grp(net_dev, bus_num); if (bond_grp && upper_dev == bond_grp->upper_dev) return true;
return false; }
-struct hns_roce_bond_group *hns_roce_get_bond_grp(struct hns_roce_dev *hr_dev) +struct hns_roce_bond_group *hns_roce_get_bond_grp(struct net_device *net_dev, + u8 bus_num) { - struct hns_roce_die_info *die_info = - xa_load(&roce_bond_xa, get_hr_bus_num(hr_dev)); - struct net_device *net_dev = get_hr_netdev(hr_dev, 0); + struct hns_roce_die_info *die_info = xa_load(&roce_bond_xa, bus_num); struct hns_roce_bond_group *bond_grp; int i;
@@ -98,7 +102,11 @@ struct hns_roce_bond_group *hns_roce_get_bond_grp(struct hns_roce_dev *hr_dev)
bool hns_roce_bond_is_active(struct hns_roce_dev *hr_dev) { - struct hns_roce_bond_group *bond_grp = hns_roce_get_bond_grp(hr_dev); + struct net_device *net_dev = get_hr_netdev(hr_dev, 0); + struct hns_roce_bond_group *bond_grp; + u8 bus_num = get_hr_bus_num(hr_dev); + + bond_grp = hns_roce_get_bond_grp(net_dev, bus_num);
if (bond_grp && bond_grp->bond_state != HNS_ROCE_BOND_NOT_BONDED) return true; @@ -117,13 +125,15 @@ static inline bool is_active_slave(struct net_device *net_dev,
struct net_device *hns_roce_get_bond_netdev(struct hns_roce_dev *hr_dev) { - struct hns_roce_bond_group *bond_grp = hns_roce_get_bond_grp(hr_dev); - struct net_device *net_dev = NULL; + struct net_device *net_dev = get_hr_netdev(hr_dev, 0); + struct hns_roce_bond_group *bond_grp; + u8 bus_num = get_hr_bus_num(hr_dev); int i;
if (!(hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_BOND)) return NULL;
+ bond_grp = hns_roce_get_bond_grp(net_dev, bus_num); if (!bond_grp) return NULL;
@@ -144,9 +154,10 @@ struct net_device *hns_roce_get_bond_netdev(struct hns_roce_dev *hr_dev) for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) { net_dev = bond_grp->bond_func_info[i].net_dev; if (net_dev && get_port_state(net_dev) == IB_PORT_ACTIVE) - break; + goto out; }
+ net_dev = NULL; out: mutex_unlock(&bond_grp->bond_mutex);
@@ -206,6 +217,7 @@ static void hns_roce_set_bond(struct hns_roce_bond_group *bond_grp) }
bond_grp->bond_state = HNS_ROCE_BOND_REGISTERING; + bond_grp->main_hr_dev = NULL;
for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) { net_dev = bond_grp->bond_func_info[i].net_dev; @@ -217,19 +229,21 @@ static void hns_roce_set_bond(struct hns_roce_bond_group *bond_grp) } } } - if (!hr_dev) - return;
bond_grp->slave_map_diff = 0; hns_roce_bond_get_active_slave(bond_grp); - ret = hns_roce_cmd_bond(bond_grp, HNS_ROCE_SET_BOND); - if (ret) { - ibdev_err(&hr_dev->ib_dev, "failed to set RoCE bond!\n"); - return; - } + + ret = bond_grp->main_hr_dev ? + hns_roce_cmd_bond(bond_grp, HNS_ROCE_SET_BOND) : -EIO;
bond_grp->bond_state = HNS_ROCE_BOND_IS_BONDED; - ibdev_info(&hr_dev->ib_dev, "RoCE set bond finished!\n"); + complete(&bond_grp->bond_work_done); + + if (ret) + BOND_ERR_LOG("failed to set RoCE bond, ret = %d.\n", ret); + else + ibdev_info(&bond_grp->main_hr_dev->ib_dev, + "RoCE set bond finished!\n"); }
static void hns_roce_clear_bond(struct hns_roce_bond_group *bond_grp) @@ -266,15 +280,17 @@ static void hns_roce_slave_changestate(struct hns_roce_bond_group *bond_grp) hns_roce_bond_get_active_slave(bond_grp);
ret = hns_roce_cmd_bond(bond_grp, HNS_ROCE_CHANGE_BOND); - if (ret) { - ibdev_err(&bond_grp->main_hr_dev->ib_dev, - "failed to change RoCE bond slave state!\n"); - return; - }
bond_grp->bond_state = HNS_ROCE_BOND_IS_BONDED; - ibdev_info(&bond_grp->main_hr_dev->ib_dev, - "RoCE slave changestate finished!\n"); + complete(&bond_grp->bond_work_done); + + if (ret) + ibdev_err(&bond_grp->main_hr_dev->ib_dev, + "failed to change RoCE bond slave state, ret = %d.\n", + ret); + else + ibdev_info(&bond_grp->main_hr_dev->ib_dev, + "RoCE slave changestate finished!\n"); }
static void hns_roce_slave_inc(struct hns_roce_bond_group *bond_grp) @@ -292,16 +308,18 @@ static void hns_roce_slave_inc(struct hns_roce_bond_group *bond_grp)
bond_grp->slave_map_diff = 0; hns_roce_bond_get_active_slave(bond_grp); + ret = hns_roce_cmd_bond(bond_grp, HNS_ROCE_CHANGE_BOND); - if (ret) { - ibdev_err(&bond_grp->main_hr_dev->ib_dev, - "failed to increase RoCE bond slave!\n"); - return; - }
bond_grp->bond_state = HNS_ROCE_BOND_IS_BONDED; - ibdev_info(&bond_grp->main_hr_dev->ib_dev, - "RoCE slave increase finished!\n"); + complete(&bond_grp->bond_work_done); + + if (ret) + ibdev_err(&bond_grp->main_hr_dev->ib_dev, + "failed to increase slave, ret = %d.\n", ret); + else + ibdev_info(&bond_grp->main_hr_dev->ib_dev, + "RoCE slave increase finished!\n"); }
static void hns_roce_slave_dec(struct hns_roce_bond_group *bond_grp) @@ -315,6 +333,7 @@ static void hns_roce_slave_dec(struct hns_roce_bond_group *bond_grp) int i;
if (dec_slave_map & (1 << main_func_idx)) { + bond_grp->main_hr_dev = NULL; hns_roce_bond_uninit_client(bond_grp, main_func_idx); for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) { net_dev = bond_grp->bond_func_info[i].net_dev; @@ -340,16 +359,19 @@ static void hns_roce_slave_dec(struct hns_roce_bond_group *bond_grp)
bond_grp->slave_map_diff = 0; hns_roce_bond_get_active_slave(bond_grp); - ret = hns_roce_cmd_bond(bond_grp, HNS_ROCE_CHANGE_BOND); - if (ret) { - ibdev_err(&bond_grp->main_hr_dev->ib_dev, - "failed to decrease RoCE bond slave!\n"); - return; - } + + ret = bond_grp->main_hr_dev ? + hns_roce_cmd_bond(bond_grp, HNS_ROCE_CHANGE_BOND) : -EIO;
bond_grp->bond_state = HNS_ROCE_BOND_IS_BONDED; - ibdev_info(&bond_grp->main_hr_dev->ib_dev, - "RoCE slave decrease finished!\n"); + complete(&bond_grp->bond_work_done); + + if (ret) + BOND_ERR_LOG("failed to decrease RoCE bond slave, ret = %d.\n", + ret); + else + ibdev_info(&bond_grp->main_hr_dev->ib_dev, + "RoCE slave decrease finished!\n"); }
static void hns_roce_do_bond(struct hns_roce_bond_group *bond_grp) @@ -364,6 +386,8 @@ static void hns_roce_do_bond(struct hns_roce_bond_group *bond_grp) "do_bond: bond_ready - %d, bond_state - %d.\n", bond_ready, bond_grp->bond_state);
+ reinit_completion(&bond_grp->bond_work_done); + if (bond_ready && bond_state == HNS_ROCE_BOND_NOT_BONDED) hns_roce_set_bond(bond_grp); else if (bond_ready && bond_state == HNS_ROCE_BOND_SLAVE_CHANGESTATE) @@ -397,10 +421,13 @@ void hns_roce_do_bond_work(struct work_struct *work)
int hns_roce_bond_init(struct hns_roce_dev *hr_dev) { - struct hns_roce_bond_group *bond_grp = hns_roce_get_bond_grp(hr_dev); + struct net_device *net_dev = get_hr_netdev(hr_dev, 0); struct hns_roce_v2_priv *priv = hr_dev->priv; + struct hns_roce_bond_group *bond_grp; + u8 bus_num = get_hr_bus_num(hr_dev); int ret;
+ bond_grp = hns_roce_get_bond_grp(net_dev, bus_num); if (priv->handle->rinfo.reset_state == HNS_ROCE_STATE_RST_INIT && bond_grp) { bond_grp->main_hr_dev = hr_dev; @@ -495,21 +522,24 @@ static int remove_bond_id(int bus_num, u8 bond_id)
int hns_roce_cleanup_bond(struct hns_roce_bond_group *bond_grp) { + bool completion_no_waiter; int ret;
ret = bond_grp->main_hr_dev ? hns_roce_cmd_bond(bond_grp, HNS_ROCE_CLEAR_BOND) : -EIO; if (ret) - ibdev_err(&bond_grp->main_hr_dev->ib_dev, - "failed to clear RoCE bond, ret = %d.\n", ret); + BOND_ERR_LOG("failed to clear RoCE bond, ret = %d.\n", ret);
cancel_delayed_work(&bond_grp->bond_work); ret = remove_bond_id(bond_grp->bus_num, bond_grp->bond_id); if (ret) - ibdev_err(&bond_grp->main_hr_dev->ib_dev, - "failed to remove bond ID %d, ret = %d.\n", - bond_grp->bond_id, ret); - kfree(bond_grp); + BOND_ERR_LOG("failed to remove bond id %d, ret = %d.\n", + bond_grp->bond_id, ret); + + completion_no_waiter = completion_done(&bond_grp->bond_work_done); + complete(&bond_grp->bond_work_done); + if (completion_no_waiter) + kfree(bond_grp);
return ret; } @@ -640,6 +670,8 @@ static struct hns_roce_bond_group *hns_roce_alloc_bond_grp(struct hns_roce_dev *
INIT_DELAYED_WORK(&bond_grp->bond_work, hns_roce_do_bond_work);
+ init_completion(&bond_grp->bond_work_done); + bond_grp->upper_dev = upper_dev; bond_grp->main_hr_dev = main_hr_dev; bond_grp->bond_ready = false; @@ -664,15 +696,16 @@ static enum bond_support_type struct net_device **upper_dev, struct netdev_notifier_changeupper_info *info) { - struct hns_roce_bond_group *bond_grp = hns_roce_get_bond_grp(hr_dev); + struct net_device *net_dev = get_hr_netdev(hr_dev, 0); struct netdev_lag_upper_info *bond_upper_info = NULL; + struct hns_roce_bond_group *bond_grp; + int bus_num = get_hr_bus_num(hr_dev); bool bond_grp_exist = false; - struct net_device *net_dev; bool support = true; u8 slave_num = 0; - int bus_num = -1;
*upper_dev = info->upper_dev; + bond_grp = hns_roce_get_bond_grp(net_dev, bus_num); if (bond_grp && *upper_dev == bond_grp->upper_dev) bond_grp_exist = true;
@@ -686,6 +719,7 @@ static enum bond_support_type !hns_roce_bond_mode_is_supported(bond_upper_info->tx_type)) return BOND_NOT_SUPPORT;
+ bus_num = -1; rcu_read_lock(); for_each_netdev_in_bond_rcu(*upper_dev, net_dev) { if (!info->linking && bond_grp_exist) { @@ -724,6 +758,7 @@ int hns_roce_bond_event(struct notifier_block *self, container_of(self, struct hns_roce_dev, bond_nb); enum bond_support_type support = BOND_SUPPORT; struct hns_roce_bond_group *bond_grp; + u8 bus_num = get_hr_bus_num(hr_dev); struct net_device *upper_dev; bool changed;
@@ -743,7 +778,7 @@ int hns_roce_bond_event(struct notifier_block *self, else if (!upper_dev && hr_dev != hns_roce_get_hrdev_by_netdev(net_dev)) return NOTIFY_DONE;
- bond_grp = hns_roce_get_bond_grp(hr_dev); + bond_grp = hns_roce_get_bond_grp(get_hr_netdev(hr_dev, 0), bus_num); if (event == NETDEV_CHANGEUPPER) { if (!bond_grp) { bond_grp = hns_roce_alloc_bond_grp(hr_dev, upper_dev); diff --git a/drivers/infiniband/hw/hns/hns_roce_bond.h b/drivers/infiniband/hw/hns/hns_roce_bond.h index 94ee5bf36aa2..c9de9315d0da 100644 --- a/drivers/infiniband/hw/hns/hns_roce_bond.h +++ b/drivers/infiniband/hw/hns/hns_roce_bond.h @@ -14,6 +14,9 @@
#define BOND_ID(id) BIT(id)
+#define BOND_ERR_LOG(fmt, ...) \ + pr_err("HNS RoCE Bonding: " fmt, ##__VA_ARGS__) \ + enum { BOND_MODE_1, BOND_MODE_2_4, @@ -68,6 +71,7 @@ struct hns_roce_bond_group { struct mutex bond_mutex; struct hns_roce_func_info bond_func_info[ROCE_BOND_FUNC_MAX]; struct delayed_work bond_work; + struct completion bond_work_done; };
struct hns_roce_die_info { @@ -81,6 +85,7 @@ int hns_roce_bond_event(struct notifier_block *self, int hns_roce_cleanup_bond(struct hns_roce_bond_group *bond_grp); bool hns_roce_bond_is_active(struct hns_roce_dev *hr_dev); struct net_device *hns_roce_get_bond_netdev(struct hns_roce_dev *hr_dev); -struct hns_roce_bond_group *hns_roce_get_bond_grp(struct hns_roce_dev *hr_dev); +struct hns_roce_bond_group *hns_roce_get_bond_grp(struct net_device *net_dev, + u8 bus_num);
#endif diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index bd07775f0856..148b8920925f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -186,6 +186,7 @@ enum hns_roce_instance_state { HNS_ROCE_STATE_INIT, HNS_ROCE_STATE_INITED, HNS_ROCE_STATE_UNINIT, + HNS_ROCE_STATE_BOND_UNINIT, };
enum { diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 1dadb7a3031c..1311c65d5979 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -7231,7 +7231,8 @@ static bool check_vf_support(struct pci_dev *vf) if (!hr_dev) return false;
- bond_grp = hns_roce_get_bond_grp(hr_dev); + bond_grp = hns_roce_get_bond_grp(get_hr_netdev(hr_dev, 0), + pf->bus->number); if (bond_grp) return false;
@@ -7361,6 +7362,19 @@ static int hns_roce_hw_v2_init_instance(struct hnae3_handle *handle) static void hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle, bool reset) { + struct hns_roce_bond_group *bond_grp; + + /* Wait for the completion of bond work to avoid concurrency */ + if (handle->rinfo.instance_state == HNS_ROCE_STATE_BOND_UNINIT) { + bond_grp = hns_roce_get_bond_grp(handle->rinfo.netdev, + handle->pdev->bus->number); + if (bond_grp) { + wait_for_completion(&bond_grp->bond_work_done); + if (bond_grp->bond_state == HNS_ROCE_BOND_NOT_BONDED) + kfree(bond_grp); + } + } + if (handle->rinfo.instance_state != HNS_ROCE_STATE_INITED) return;
@@ -7394,7 +7408,7 @@ void hns_roce_bond_uninit_client(struct hns_roce_bond_group *bond_grp, if (handle->rinfo.instance_state != HNS_ROCE_STATE_INITED) return;
- handle->rinfo.instance_state = HNS_ROCE_STATE_UNINIT; + handle->rinfo.instance_state = HNS_ROCE_STATE_BOND_UNINIT;
__hns_roce_hw_v2_uninit_instance(handle, false, false);
@@ -7509,9 +7523,11 @@ static void hns_roce_hw_v2_link_status_change(struct hnae3_handle *handle, struct net_device *netdev = handle->rinfo.netdev; struct hns_roce_dev *hr_dev = handle->priv; struct hns_roce_bond_group *bond_grp; + struct net_device *hr_net_dev; struct ib_event event; unsigned long flags; u8 phy_port; + u8 bus_num;
if (linkup || !hr_dev) return; @@ -7521,7 +7537,9 @@ static void hns_roce_hw_v2_link_status_change(struct hnae3_handle *handle, * netdev but not only one. So bond device cannot get a correct * link status from this path. */ - bond_grp = hns_roce_get_bond_grp(hr_dev); + hr_net_dev = get_hr_netdev(hr_dev, 0); + bus_num = get_hr_bus_num(hr_dev); + bond_grp = hns_roce_get_bond_grp(hr_net_dev, bus_num); if (bond_grp) return;
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 8f60395be9f7..6c1fb24cd87b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -119,10 +119,12 @@ static int hns_roce_del_gid(const struct ib_gid_attr *attr, void **context)
static enum ib_port_state get_upper_port_state(struct hns_roce_dev *hr_dev) { + struct net_device *net_dev = get_hr_netdev(hr_dev, 0); struct hns_roce_bond_group *bond_grp; + u8 bus_num = get_hr_bus_num(hr_dev); struct net_device *upper;
- bond_grp = hns_roce_get_bond_grp(hr_dev); + bond_grp = hns_roce_get_bond_grp(net_dev, bus_num); upper = bond_grp ? bond_grp->upper_dev : NULL; if (upper) return get_port_state(upper); @@ -197,7 +199,8 @@ static int hns_roce_netdev_event(struct notifier_block *self, hr_dev = container_of(self, struct hns_roce_dev, iboe.nb); iboe = &hr_dev->iboe; if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_BOND) { - bond_grp = hns_roce_get_bond_grp(hr_dev); + bond_grp = hns_roce_get_bond_grp(get_hr_netdev(hr_dev, 0), + get_hr_bus_num(hr_dev)); upper = bond_grp ? bond_grp->upper_dev : NULL; }
@@ -850,13 +853,15 @@ static int hns_roce_get_hw_stats(struct ib_device *device, static void hns_roce_unregister_device(struct hns_roce_dev *hr_dev, bool bond_cleanup) { + struct net_device *net_dev = get_hr_netdev(hr_dev, 0); struct hns_roce_ib_iboe *iboe = &hr_dev->iboe; struct hns_roce_v2_priv *priv = hr_dev->priv; struct hns_roce_bond_group *bond_grp; + u8 bus_num = get_hr_bus_num(hr_dev);
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_BOND) { unregister_netdevice_notifier(&hr_dev->bond_nb); - bond_grp = hns_roce_get_bond_grp(hr_dev); + bond_grp = hns_roce_get_bond_grp(net_dev, bus_num); if (bond_grp) { if (bond_cleanup) hns_roce_cleanup_bond(bond_grp);
From: Junxian Huang huangjunxian6@hisilicon.com
driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I7WHE3
--------------------------------------------------------------------------
When the resource pf of RoCE bonding is unbinded, the main_hr_dev will be unregistered and the bond resources will be cleaned up. Currently, other slaves will not be re-initialized, and they are not availbale until the whole RoCE ko is removed and inserted again.
To fix this problem, re-initialized all the slaves to hns_* device except the resource slave itself before bond resources cleanup.
Fixes: e62a20278f18 ("RDMA/hns: support RoCE bonding") Signed-off-by: Junxian Huang huangjunxian6@hisilicon.com --- drivers/infiniband/hw/hns/hns_roce_bond.c | 24 +++++++++++----- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 3 ++ drivers/infiniband/hw/hns/hns_roce_main.c | 33 ++++++++++++++++------ 3 files changed, 44 insertions(+), 16 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_bond.c b/drivers/infiniband/hw/hns/hns_roce_bond.c index 19ec0940a7ea..1f6c6f3b738e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_bond.c +++ b/drivers/infiniband/hw/hns/hns_roce_bond.c @@ -37,19 +37,19 @@ static struct net_device *get_upper_dev_from_ndev(struct net_device *net_dev) return upper_dev; }
-static bool is_netdev_bond_slave(struct net_device *net_dev, - struct hns_roce_bond_group *bond_grp) +static int get_netdev_bond_slave_id(struct net_device *net_dev, + struct hns_roce_bond_group *bond_grp) { int i;
if (!net_dev || !bond_grp) - return false; + return -ENODEV;
for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) if (net_dev == bond_grp->bond_func_info[i].net_dev) - return true; + return i;
- return false; + return -ENOENT; }
static bool is_hrdev_bond_slave(struct hns_roce_dev *hr_dev, @@ -92,7 +92,7 @@ struct hns_roce_bond_group *hns_roce_get_bond_grp(struct net_device *net_dev, bond_grp = die_info->bgrps[i]; if (!bond_grp) continue; - if (is_netdev_bond_slave(net_dev, bond_grp) || + if (get_netdev_bond_slave_id(net_dev, bond_grp) >= 0 || (bond_grp->upper_dev == get_upper_dev_from_ndev(net_dev))) return bond_grp; } @@ -723,7 +723,7 @@ static enum bond_support_type rcu_read_lock(); for_each_netdev_in_bond_rcu(*upper_dev, net_dev) { if (!info->linking && bond_grp_exist) { - if (is_netdev_bond_slave(net_dev, bond_grp)) + if (get_netdev_bond_slave_id(net_dev, bond_grp) >= 0) slave_num++; } else { hr_dev = hns_roce_get_hrdev_by_netdev(net_dev); @@ -761,6 +761,7 @@ int hns_roce_bond_event(struct notifier_block *self, u8 bus_num = get_hr_bus_num(hr_dev); struct net_device *upper_dev; bool changed; + int slave_id;
if (event != NETDEV_CHANGEUPPER && event != NETDEV_CHANGELOWERSTATE) return NOTIFY_DONE; @@ -790,6 +791,15 @@ int hns_roce_bond_event(struct notifier_block *self, } else if (hr_dev != bond_grp->main_hr_dev) { return NOTIFY_DONE; } + /* In the case of netdev being unregistered, the roce + * instance shouldn't be inited. + */ + if (net_dev->reg_state >= NETREG_UNREGISTERING) { + slave_id = get_netdev_bond_slave_id(net_dev, bond_grp); + if (slave_id >= 0) + bond_grp->bond_func_info[slave_id].handle = NULL; + } + if (support == BOND_EXISTING_NOT_SUPPORT) { bond_grp->bond_ready = false; hns_roce_queue_bond_work(bond_grp, HZ); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 1311c65d5979..e905d26bb3e5 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -7393,6 +7393,9 @@ struct hns_roce_dev int ret;
handle = bond_grp->bond_func_info[func_idx].handle; + if (!handle || !handle->client) + return NULL; + ret = hns_roce_hw_v2_init_instance(handle); if (ret) return NULL; diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 6c1fb24cd87b..4a16200ab950 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -858,19 +858,34 @@ static void hns_roce_unregister_device(struct hns_roce_dev *hr_dev, struct hns_roce_v2_priv *priv = hr_dev->priv; struct hns_roce_bond_group *bond_grp; u8 bus_num = get_hr_bus_num(hr_dev); + int i;
- if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_BOND) { - unregister_netdevice_notifier(&hr_dev->bond_nb); - bond_grp = hns_roce_get_bond_grp(net_dev, bus_num); - if (bond_grp) { - if (bond_cleanup) - hns_roce_cleanup_bond(bond_grp); - else if (priv->handle->rinfo.reset_state == - HNS_ROCE_STATE_RST_UNINIT) - bond_grp->main_hr_dev = NULL; + if (!(hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_BOND)) + goto normal_unregister; + + unregister_netdevice_notifier(&hr_dev->bond_nb); + bond_grp = hns_roce_get_bond_grp(net_dev, bus_num); + if (!bond_grp) + goto normal_unregister; + + if (bond_cleanup) { + /* To avoid the loss of other slave devices when main_hr_dev + * is unregistered, re-initialized the remaining slaves before + * the bond resources cleanup. + */ + bond_grp->bond_state = HNS_ROCE_BOND_NOT_BONDED; + for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) { + net_dev = bond_grp->bond_func_info[i].net_dev; + if (net_dev && net_dev != iboe->netdevs[0]) + hns_roce_bond_init_client(bond_grp, i); } + hns_roce_cleanup_bond(bond_grp); + } else if (priv->handle->rinfo.reset_state == + HNS_ROCE_STATE_RST_UNINIT) { + bond_grp->main_hr_dev = NULL; }
+normal_unregister: hr_dev->active = false; unregister_netdevice_notifier(&iboe->nb); ib_unregister_device(&hr_dev->ib_dev);
From: Junxian Huang huangjunxian6@hisilicon.com
driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I7WHE3
--------------------------------------------------------------------------
Consider running such a cmd: "ifenslave bond0 eth1 eth2; ifenslave -d bond0 eth1" The bonding condition becomes invalid right after bond_grp is allocated before the bond delayed work is scheduled.
When the bond delayed work is scheduled, as the bonding condition is invalid, the function will just do nothing but return directly, remaining the allocated bond_grp unfreed.
Currently, two flags, bond_ready and bond_state, are parsed to decide which bond work procedure will be executed. In the above problem, bond_ready is false(since the bonding condition is invalid) and bond_state is NOT_BONDED. Clean up bond_grp resource for this situation.
Fixes: e62a20278f18 ("RDMA/hns: support RoCE bonding") Signed-off-by: Junxian Huang huangjunxian6@hisilicon.com --- drivers/infiniband/hw/hns/hns_roce_bond.c | 27 ++++++++++++++++++----- 1 file changed, 21 insertions(+), 6 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_bond.c b/drivers/infiniband/hw/hns/hns_roce_bond.c index 1f6c6f3b738e..2f92ed730b78 100644 --- a/drivers/infiniband/hw/hns/hns_roce_bond.c +++ b/drivers/infiniband/hw/hns/hns_roce_bond.c @@ -253,6 +253,9 @@ static void hns_roce_clear_bond(struct hns_roce_bond_group *bond_grp) struct net_device *net_dev; int i, ret;
+ if (bond_grp->bond_state == HNS_ROCE_BOND_NOT_BONDED) + goto out; + bond_grp->bond_state = HNS_ROCE_BOND_NOT_BONDED; bond_grp->main_hr_dev = NULL;
@@ -267,6 +270,7 @@ static void hns_roce_clear_bond(struct hns_roce_bond_group *bond_grp) } }
+out: ret = hns_roce_cleanup_bond(bond_grp); if (!ret) ibdev_info(&bond_grp->main_hr_dev->ib_dev, @@ -388,16 +392,27 @@ static void hns_roce_do_bond(struct hns_roce_bond_group *bond_grp)
reinit_completion(&bond_grp->bond_work_done);
- if (bond_ready && bond_state == HNS_ROCE_BOND_NOT_BONDED) + if (!bond_ready) { + hns_roce_clear_bond(bond_grp); + return; + } + + switch (bond_state) { + case HNS_ROCE_BOND_NOT_BONDED: hns_roce_set_bond(bond_grp); - else if (bond_ready && bond_state == HNS_ROCE_BOND_SLAVE_CHANGESTATE) + return; + case HNS_ROCE_BOND_SLAVE_CHANGESTATE: hns_roce_slave_changestate(bond_grp); - else if (bond_ready && bond_state == HNS_ROCE_BOND_SLAVE_INC) + return; + case HNS_ROCE_BOND_SLAVE_INC: hns_roce_slave_inc(bond_grp); - else if (bond_ready && bond_state == HNS_ROCE_BOND_SLAVE_DEC) + return; + case HNS_ROCE_BOND_SLAVE_DEC: hns_roce_slave_dec(bond_grp); - else if (!bond_ready && bond_state != HNS_ROCE_BOND_NOT_BONDED) - hns_roce_clear_bond(bond_grp); + return; + default: + return; + } }
void hns_roce_do_bond_work(struct work_struct *work)