From: Junxian Huang huangjunxian6@hisilicon.com
driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I7WHE3
--------------------------------------------------------------------------
When the resource pf of RoCE bonding is unbinded, the main_hr_dev will be unregistered and the bond resources will be cleaned up. Currently, other slaves will not be re-initialized, and they are not availbale until the whole RoCE ko is removed and inserted again.
To fix this problem, re-initialized all the slaves to hns_* device except the resource slave itself before bond resources cleanup.
Fixes: e62a20278f18 ("RDMA/hns: support RoCE bonding") Signed-off-by: Junxian Huang huangjunxian6@hisilicon.com --- drivers/infiniband/hw/hns/hns_roce_bond.c | 24 +++++++++++----- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 3 ++ drivers/infiniband/hw/hns/hns_roce_main.c | 33 ++++++++++++++++------ 3 files changed, 44 insertions(+), 16 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_bond.c b/drivers/infiniband/hw/hns/hns_roce_bond.c index 19ec0940a7ea..1f6c6f3b738e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_bond.c +++ b/drivers/infiniband/hw/hns/hns_roce_bond.c @@ -37,19 +37,19 @@ static struct net_device *get_upper_dev_from_ndev(struct net_device *net_dev) return upper_dev; }
-static bool is_netdev_bond_slave(struct net_device *net_dev, - struct hns_roce_bond_group *bond_grp) +static int get_netdev_bond_slave_id(struct net_device *net_dev, + struct hns_roce_bond_group *bond_grp) { int i;
if (!net_dev || !bond_grp) - return false; + return -ENODEV;
for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) if (net_dev == bond_grp->bond_func_info[i].net_dev) - return true; + return i;
- return false; + return -ENOENT; }
static bool is_hrdev_bond_slave(struct hns_roce_dev *hr_dev, @@ -92,7 +92,7 @@ struct hns_roce_bond_group *hns_roce_get_bond_grp(struct net_device *net_dev, bond_grp = die_info->bgrps[i]; if (!bond_grp) continue; - if (is_netdev_bond_slave(net_dev, bond_grp) || + if (get_netdev_bond_slave_id(net_dev, bond_grp) >= 0 || (bond_grp->upper_dev == get_upper_dev_from_ndev(net_dev))) return bond_grp; } @@ -723,7 +723,7 @@ static enum bond_support_type rcu_read_lock(); for_each_netdev_in_bond_rcu(*upper_dev, net_dev) { if (!info->linking && bond_grp_exist) { - if (is_netdev_bond_slave(net_dev, bond_grp)) + if (get_netdev_bond_slave_id(net_dev, bond_grp) >= 0) slave_num++; } else { hr_dev = hns_roce_get_hrdev_by_netdev(net_dev); @@ -761,6 +761,7 @@ int hns_roce_bond_event(struct notifier_block *self, u8 bus_num = get_hr_bus_num(hr_dev); struct net_device *upper_dev; bool changed; + int slave_id;
if (event != NETDEV_CHANGEUPPER && event != NETDEV_CHANGELOWERSTATE) return NOTIFY_DONE; @@ -790,6 +791,15 @@ int hns_roce_bond_event(struct notifier_block *self, } else if (hr_dev != bond_grp->main_hr_dev) { return NOTIFY_DONE; } + /* In the case of netdev being unregistered, the roce + * instance shouldn't be inited. + */ + if (net_dev->reg_state >= NETREG_UNREGISTERING) { + slave_id = get_netdev_bond_slave_id(net_dev, bond_grp); + if (slave_id >= 0) + bond_grp->bond_func_info[slave_id].handle = NULL; + } + if (support == BOND_EXISTING_NOT_SUPPORT) { bond_grp->bond_ready = false; hns_roce_queue_bond_work(bond_grp, HZ); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 1311c65d5979..e905d26bb3e5 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -7393,6 +7393,9 @@ struct hns_roce_dev int ret;
handle = bond_grp->bond_func_info[func_idx].handle; + if (!handle || !handle->client) + return NULL; + ret = hns_roce_hw_v2_init_instance(handle); if (ret) return NULL; diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 6c1fb24cd87b..4a16200ab950 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -858,19 +858,34 @@ static void hns_roce_unregister_device(struct hns_roce_dev *hr_dev, struct hns_roce_v2_priv *priv = hr_dev->priv; struct hns_roce_bond_group *bond_grp; u8 bus_num = get_hr_bus_num(hr_dev); + int i;
- if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_BOND) { - unregister_netdevice_notifier(&hr_dev->bond_nb); - bond_grp = hns_roce_get_bond_grp(net_dev, bus_num); - if (bond_grp) { - if (bond_cleanup) - hns_roce_cleanup_bond(bond_grp); - else if (priv->handle->rinfo.reset_state == - HNS_ROCE_STATE_RST_UNINIT) - bond_grp->main_hr_dev = NULL; + if (!(hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_BOND)) + goto normal_unregister; + + unregister_netdevice_notifier(&hr_dev->bond_nb); + bond_grp = hns_roce_get_bond_grp(net_dev, bus_num); + if (!bond_grp) + goto normal_unregister; + + if (bond_cleanup) { + /* To avoid the loss of other slave devices when main_hr_dev + * is unregistered, re-initialized the remaining slaves before + * the bond resources cleanup. + */ + bond_grp->bond_state = HNS_ROCE_BOND_NOT_BONDED; + for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) { + net_dev = bond_grp->bond_func_info[i].net_dev; + if (net_dev && net_dev != iboe->netdevs[0]) + hns_roce_bond_init_client(bond_grp, i); } + hns_roce_cleanup_bond(bond_grp); + } else if (priv->handle->rinfo.reset_state == + HNS_ROCE_STATE_RST_UNINIT) { + bond_grp->main_hr_dev = NULL; }
+normal_unregister: hr_dev->active = false; unregister_netdevice_notifier(&iboe->nb); ib_unregister_device(&hr_dev->ib_dev);