From: Junxian Huang huangjunxian6@hisilicon.com
driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I7WHE3
--------------------------------------------------------------------------
When rmmod hns3, the uninit procedure is in this order: pf0 roce uninit instance, pf0 nic uninit instance, pf1 roce uninit instance, pf1 nic uninit instance, and so on.
During pf0 nic uninit instance, pf0 netdev is unregistered and RoCE bonding driver is will be notified by a bonding event. Then a clear-bond work will be scheduled.
At this time, the clear-bond work and pf1 roce uninit instance are being executed concurrently. As the clear-bond work modifies the instance state of pf1 earlier, pf1 roce uninit instance will return when the state is found changed. This leads to pf1 nic uninit instance fast enough to be completed before the clear-bond work. When the clear-bond work accesses pf1 nic resources which have been released, an error occurs.
To fix the error, add a new instance state to indicate an ongoing bond work involving bonding uninit. The roce driver uninit instance will wait for the completion of the bond work when the device being uninited is also in the procedure of bonding uninit to avoid concurrency and make sure the nic resources won't be released for the moment.
Fixes: e62a20278f18 ("RDMA/hns: support RoCE bonding") Signed-off-by: Junxian Huang huangjunxian6@hisilicon.com --- drivers/infiniband/hw/hns/hns_roce_bond.c | 137 ++++++++++++-------- drivers/infiniband/hw/hns/hns_roce_bond.h | 7 +- drivers/infiniband/hw/hns/hns_roce_device.h | 1 + drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 24 +++- drivers/infiniband/hw/hns/hns_roce_main.c | 11 +- 5 files changed, 122 insertions(+), 58 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_bond.c b/drivers/infiniband/hw/hns/hns_roce_bond.c index ec4da46b7ba3..19ec0940a7ea 100644 --- a/drivers/infiniband/hw/hns/hns_roce_bond.c +++ b/drivers/infiniband/hw/hns/hns_roce_bond.c @@ -56,6 +56,8 @@ static bool is_hrdev_bond_slave(struct hns_roce_dev *hr_dev, struct net_device *upper_dev) { struct hns_roce_bond_group *bond_grp; + struct net_device *net_dev; + u8 bus_num;
if (!hr_dev || !upper_dev) return false; @@ -63,21 +65,23 @@ static bool is_hrdev_bond_slave(struct hns_roce_dev *hr_dev, if (!netif_is_lag_master(upper_dev)) return false;
- if (upper_dev == get_upper_dev_from_ndev(get_hr_netdev(hr_dev, 0))) + net_dev = get_hr_netdev(hr_dev, 0); + bus_num = get_hr_bus_num(hr_dev); + + if (upper_dev == get_upper_dev_from_ndev(net_dev)) return true;
- bond_grp = hns_roce_get_bond_grp(hr_dev); + bond_grp = hns_roce_get_bond_grp(net_dev, bus_num); if (bond_grp && upper_dev == bond_grp->upper_dev) return true;
return false; }
-struct hns_roce_bond_group *hns_roce_get_bond_grp(struct hns_roce_dev *hr_dev) +struct hns_roce_bond_group *hns_roce_get_bond_grp(struct net_device *net_dev, + u8 bus_num) { - struct hns_roce_die_info *die_info = - xa_load(&roce_bond_xa, get_hr_bus_num(hr_dev)); - struct net_device *net_dev = get_hr_netdev(hr_dev, 0); + struct hns_roce_die_info *die_info = xa_load(&roce_bond_xa, bus_num); struct hns_roce_bond_group *bond_grp; int i;
@@ -98,7 +102,11 @@ struct hns_roce_bond_group *hns_roce_get_bond_grp(struct hns_roce_dev *hr_dev)
bool hns_roce_bond_is_active(struct hns_roce_dev *hr_dev) { - struct hns_roce_bond_group *bond_grp = hns_roce_get_bond_grp(hr_dev); + struct net_device *net_dev = get_hr_netdev(hr_dev, 0); + struct hns_roce_bond_group *bond_grp; + u8 bus_num = get_hr_bus_num(hr_dev); + + bond_grp = hns_roce_get_bond_grp(net_dev, bus_num);
if (bond_grp && bond_grp->bond_state != HNS_ROCE_BOND_NOT_BONDED) return true; @@ -117,13 +125,15 @@ static inline bool is_active_slave(struct net_device *net_dev,
struct net_device *hns_roce_get_bond_netdev(struct hns_roce_dev *hr_dev) { - struct hns_roce_bond_group *bond_grp = hns_roce_get_bond_grp(hr_dev); - struct net_device *net_dev = NULL; + struct net_device *net_dev = get_hr_netdev(hr_dev, 0); + struct hns_roce_bond_group *bond_grp; + u8 bus_num = get_hr_bus_num(hr_dev); int i;
if (!(hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_BOND)) return NULL;
+ bond_grp = hns_roce_get_bond_grp(net_dev, bus_num); if (!bond_grp) return NULL;
@@ -144,9 +154,10 @@ struct net_device *hns_roce_get_bond_netdev(struct hns_roce_dev *hr_dev) for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) { net_dev = bond_grp->bond_func_info[i].net_dev; if (net_dev && get_port_state(net_dev) == IB_PORT_ACTIVE) - break; + goto out; }
+ net_dev = NULL; out: mutex_unlock(&bond_grp->bond_mutex);
@@ -206,6 +217,7 @@ static void hns_roce_set_bond(struct hns_roce_bond_group *bond_grp) }
bond_grp->bond_state = HNS_ROCE_BOND_REGISTERING; + bond_grp->main_hr_dev = NULL;
for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) { net_dev = bond_grp->bond_func_info[i].net_dev; @@ -217,19 +229,21 @@ static void hns_roce_set_bond(struct hns_roce_bond_group *bond_grp) } } } - if (!hr_dev) - return;
bond_grp->slave_map_diff = 0; hns_roce_bond_get_active_slave(bond_grp); - ret = hns_roce_cmd_bond(bond_grp, HNS_ROCE_SET_BOND); - if (ret) { - ibdev_err(&hr_dev->ib_dev, "failed to set RoCE bond!\n"); - return; - } + + ret = bond_grp->main_hr_dev ? + hns_roce_cmd_bond(bond_grp, HNS_ROCE_SET_BOND) : -EIO;
bond_grp->bond_state = HNS_ROCE_BOND_IS_BONDED; - ibdev_info(&hr_dev->ib_dev, "RoCE set bond finished!\n"); + complete(&bond_grp->bond_work_done); + + if (ret) + BOND_ERR_LOG("failed to set RoCE bond, ret = %d.\n", ret); + else + ibdev_info(&bond_grp->main_hr_dev->ib_dev, + "RoCE set bond finished!\n"); }
static void hns_roce_clear_bond(struct hns_roce_bond_group *bond_grp) @@ -266,15 +280,17 @@ static void hns_roce_slave_changestate(struct hns_roce_bond_group *bond_grp) hns_roce_bond_get_active_slave(bond_grp);
ret = hns_roce_cmd_bond(bond_grp, HNS_ROCE_CHANGE_BOND); - if (ret) { - ibdev_err(&bond_grp->main_hr_dev->ib_dev, - "failed to change RoCE bond slave state!\n"); - return; - }
bond_grp->bond_state = HNS_ROCE_BOND_IS_BONDED; - ibdev_info(&bond_grp->main_hr_dev->ib_dev, - "RoCE slave changestate finished!\n"); + complete(&bond_grp->bond_work_done); + + if (ret) + ibdev_err(&bond_grp->main_hr_dev->ib_dev, + "failed to change RoCE bond slave state, ret = %d.\n", + ret); + else + ibdev_info(&bond_grp->main_hr_dev->ib_dev, + "RoCE slave changestate finished!\n"); }
static void hns_roce_slave_inc(struct hns_roce_bond_group *bond_grp) @@ -292,16 +308,18 @@ static void hns_roce_slave_inc(struct hns_roce_bond_group *bond_grp)
bond_grp->slave_map_diff = 0; hns_roce_bond_get_active_slave(bond_grp); + ret = hns_roce_cmd_bond(bond_grp, HNS_ROCE_CHANGE_BOND); - if (ret) { - ibdev_err(&bond_grp->main_hr_dev->ib_dev, - "failed to increase RoCE bond slave!\n"); - return; - }
bond_grp->bond_state = HNS_ROCE_BOND_IS_BONDED; - ibdev_info(&bond_grp->main_hr_dev->ib_dev, - "RoCE slave increase finished!\n"); + complete(&bond_grp->bond_work_done); + + if (ret) + ibdev_err(&bond_grp->main_hr_dev->ib_dev, + "failed to increase slave, ret = %d.\n", ret); + else + ibdev_info(&bond_grp->main_hr_dev->ib_dev, + "RoCE slave increase finished!\n"); }
static void hns_roce_slave_dec(struct hns_roce_bond_group *bond_grp) @@ -315,6 +333,7 @@ static void hns_roce_slave_dec(struct hns_roce_bond_group *bond_grp) int i;
if (dec_slave_map & (1 << main_func_idx)) { + bond_grp->main_hr_dev = NULL; hns_roce_bond_uninit_client(bond_grp, main_func_idx); for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) { net_dev = bond_grp->bond_func_info[i].net_dev; @@ -340,16 +359,19 @@ static void hns_roce_slave_dec(struct hns_roce_bond_group *bond_grp)
bond_grp->slave_map_diff = 0; hns_roce_bond_get_active_slave(bond_grp); - ret = hns_roce_cmd_bond(bond_grp, HNS_ROCE_CHANGE_BOND); - if (ret) { - ibdev_err(&bond_grp->main_hr_dev->ib_dev, - "failed to decrease RoCE bond slave!\n"); - return; - } + + ret = bond_grp->main_hr_dev ? + hns_roce_cmd_bond(bond_grp, HNS_ROCE_CHANGE_BOND) : -EIO;
bond_grp->bond_state = HNS_ROCE_BOND_IS_BONDED; - ibdev_info(&bond_grp->main_hr_dev->ib_dev, - "RoCE slave decrease finished!\n"); + complete(&bond_grp->bond_work_done); + + if (ret) + BOND_ERR_LOG("failed to decrease RoCE bond slave, ret = %d.\n", + ret); + else + ibdev_info(&bond_grp->main_hr_dev->ib_dev, + "RoCE slave decrease finished!\n"); }
static void hns_roce_do_bond(struct hns_roce_bond_group *bond_grp) @@ -364,6 +386,8 @@ static void hns_roce_do_bond(struct hns_roce_bond_group *bond_grp) "do_bond: bond_ready - %d, bond_state - %d.\n", bond_ready, bond_grp->bond_state);
+ reinit_completion(&bond_grp->bond_work_done); + if (bond_ready && bond_state == HNS_ROCE_BOND_NOT_BONDED) hns_roce_set_bond(bond_grp); else if (bond_ready && bond_state == HNS_ROCE_BOND_SLAVE_CHANGESTATE) @@ -397,10 +421,13 @@ void hns_roce_do_bond_work(struct work_struct *work)
int hns_roce_bond_init(struct hns_roce_dev *hr_dev) { - struct hns_roce_bond_group *bond_grp = hns_roce_get_bond_grp(hr_dev); + struct net_device *net_dev = get_hr_netdev(hr_dev, 0); struct hns_roce_v2_priv *priv = hr_dev->priv; + struct hns_roce_bond_group *bond_grp; + u8 bus_num = get_hr_bus_num(hr_dev); int ret;
+ bond_grp = hns_roce_get_bond_grp(net_dev, bus_num); if (priv->handle->rinfo.reset_state == HNS_ROCE_STATE_RST_INIT && bond_grp) { bond_grp->main_hr_dev = hr_dev; @@ -495,21 +522,24 @@ static int remove_bond_id(int bus_num, u8 bond_id)
int hns_roce_cleanup_bond(struct hns_roce_bond_group *bond_grp) { + bool completion_no_waiter; int ret;
ret = bond_grp->main_hr_dev ? hns_roce_cmd_bond(bond_grp, HNS_ROCE_CLEAR_BOND) : -EIO; if (ret) - ibdev_err(&bond_grp->main_hr_dev->ib_dev, - "failed to clear RoCE bond, ret = %d.\n", ret); + BOND_ERR_LOG("failed to clear RoCE bond, ret = %d.\n", ret);
cancel_delayed_work(&bond_grp->bond_work); ret = remove_bond_id(bond_grp->bus_num, bond_grp->bond_id); if (ret) - ibdev_err(&bond_grp->main_hr_dev->ib_dev, - "failed to remove bond ID %d, ret = %d.\n", - bond_grp->bond_id, ret); - kfree(bond_grp); + BOND_ERR_LOG("failed to remove bond id %d, ret = %d.\n", + bond_grp->bond_id, ret); + + completion_no_waiter = completion_done(&bond_grp->bond_work_done); + complete(&bond_grp->bond_work_done); + if (completion_no_waiter) + kfree(bond_grp);
return ret; } @@ -640,6 +670,8 @@ static struct hns_roce_bond_group *hns_roce_alloc_bond_grp(struct hns_roce_dev *
INIT_DELAYED_WORK(&bond_grp->bond_work, hns_roce_do_bond_work);
+ init_completion(&bond_grp->bond_work_done); + bond_grp->upper_dev = upper_dev; bond_grp->main_hr_dev = main_hr_dev; bond_grp->bond_ready = false; @@ -664,15 +696,16 @@ static enum bond_support_type struct net_device **upper_dev, struct netdev_notifier_changeupper_info *info) { - struct hns_roce_bond_group *bond_grp = hns_roce_get_bond_grp(hr_dev); + struct net_device *net_dev = get_hr_netdev(hr_dev, 0); struct netdev_lag_upper_info *bond_upper_info = NULL; + struct hns_roce_bond_group *bond_grp; + int bus_num = get_hr_bus_num(hr_dev); bool bond_grp_exist = false; - struct net_device *net_dev; bool support = true; u8 slave_num = 0; - int bus_num = -1;
*upper_dev = info->upper_dev; + bond_grp = hns_roce_get_bond_grp(net_dev, bus_num); if (bond_grp && *upper_dev == bond_grp->upper_dev) bond_grp_exist = true;
@@ -686,6 +719,7 @@ static enum bond_support_type !hns_roce_bond_mode_is_supported(bond_upper_info->tx_type)) return BOND_NOT_SUPPORT;
+ bus_num = -1; rcu_read_lock(); for_each_netdev_in_bond_rcu(*upper_dev, net_dev) { if (!info->linking && bond_grp_exist) { @@ -724,6 +758,7 @@ int hns_roce_bond_event(struct notifier_block *self, container_of(self, struct hns_roce_dev, bond_nb); enum bond_support_type support = BOND_SUPPORT; struct hns_roce_bond_group *bond_grp; + u8 bus_num = get_hr_bus_num(hr_dev); struct net_device *upper_dev; bool changed;
@@ -743,7 +778,7 @@ int hns_roce_bond_event(struct notifier_block *self, else if (!upper_dev && hr_dev != hns_roce_get_hrdev_by_netdev(net_dev)) return NOTIFY_DONE;
- bond_grp = hns_roce_get_bond_grp(hr_dev); + bond_grp = hns_roce_get_bond_grp(get_hr_netdev(hr_dev, 0), bus_num); if (event == NETDEV_CHANGEUPPER) { if (!bond_grp) { bond_grp = hns_roce_alloc_bond_grp(hr_dev, upper_dev); diff --git a/drivers/infiniband/hw/hns/hns_roce_bond.h b/drivers/infiniband/hw/hns/hns_roce_bond.h index 94ee5bf36aa2..c9de9315d0da 100644 --- a/drivers/infiniband/hw/hns/hns_roce_bond.h +++ b/drivers/infiniband/hw/hns/hns_roce_bond.h @@ -14,6 +14,9 @@
#define BOND_ID(id) BIT(id)
+#define BOND_ERR_LOG(fmt, ...) \ + pr_err("HNS RoCE Bonding: " fmt, ##__VA_ARGS__) \ + enum { BOND_MODE_1, BOND_MODE_2_4, @@ -68,6 +71,7 @@ struct hns_roce_bond_group { struct mutex bond_mutex; struct hns_roce_func_info bond_func_info[ROCE_BOND_FUNC_MAX]; struct delayed_work bond_work; + struct completion bond_work_done; };
struct hns_roce_die_info { @@ -81,6 +85,7 @@ int hns_roce_bond_event(struct notifier_block *self, int hns_roce_cleanup_bond(struct hns_roce_bond_group *bond_grp); bool hns_roce_bond_is_active(struct hns_roce_dev *hr_dev); struct net_device *hns_roce_get_bond_netdev(struct hns_roce_dev *hr_dev); -struct hns_roce_bond_group *hns_roce_get_bond_grp(struct hns_roce_dev *hr_dev); +struct hns_roce_bond_group *hns_roce_get_bond_grp(struct net_device *net_dev, + u8 bus_num);
#endif diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index bd07775f0856..148b8920925f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -186,6 +186,7 @@ enum hns_roce_instance_state { HNS_ROCE_STATE_INIT, HNS_ROCE_STATE_INITED, HNS_ROCE_STATE_UNINIT, + HNS_ROCE_STATE_BOND_UNINIT, };
enum { diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 1dadb7a3031c..1311c65d5979 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -7231,7 +7231,8 @@ static bool check_vf_support(struct pci_dev *vf) if (!hr_dev) return false;
- bond_grp = hns_roce_get_bond_grp(hr_dev); + bond_grp = hns_roce_get_bond_grp(get_hr_netdev(hr_dev, 0), + pf->bus->number); if (bond_grp) return false;
@@ -7361,6 +7362,19 @@ static int hns_roce_hw_v2_init_instance(struct hnae3_handle *handle) static void hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle, bool reset) { + struct hns_roce_bond_group *bond_grp; + + /* Wait for the completion of bond work to avoid concurrency */ + if (handle->rinfo.instance_state == HNS_ROCE_STATE_BOND_UNINIT) { + bond_grp = hns_roce_get_bond_grp(handle->rinfo.netdev, + handle->pdev->bus->number); + if (bond_grp) { + wait_for_completion(&bond_grp->bond_work_done); + if (bond_grp->bond_state == HNS_ROCE_BOND_NOT_BONDED) + kfree(bond_grp); + } + } + if (handle->rinfo.instance_state != HNS_ROCE_STATE_INITED) return;
@@ -7394,7 +7408,7 @@ void hns_roce_bond_uninit_client(struct hns_roce_bond_group *bond_grp, if (handle->rinfo.instance_state != HNS_ROCE_STATE_INITED) return;
- handle->rinfo.instance_state = HNS_ROCE_STATE_UNINIT; + handle->rinfo.instance_state = HNS_ROCE_STATE_BOND_UNINIT;
__hns_roce_hw_v2_uninit_instance(handle, false, false);
@@ -7509,9 +7523,11 @@ static void hns_roce_hw_v2_link_status_change(struct hnae3_handle *handle, struct net_device *netdev = handle->rinfo.netdev; struct hns_roce_dev *hr_dev = handle->priv; struct hns_roce_bond_group *bond_grp; + struct net_device *hr_net_dev; struct ib_event event; unsigned long flags; u8 phy_port; + u8 bus_num;
if (linkup || !hr_dev) return; @@ -7521,7 +7537,9 @@ static void hns_roce_hw_v2_link_status_change(struct hnae3_handle *handle, * netdev but not only one. So bond device cannot get a correct * link status from this path. */ - bond_grp = hns_roce_get_bond_grp(hr_dev); + hr_net_dev = get_hr_netdev(hr_dev, 0); + bus_num = get_hr_bus_num(hr_dev); + bond_grp = hns_roce_get_bond_grp(hr_net_dev, bus_num); if (bond_grp) return;
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 8f60395be9f7..6c1fb24cd87b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -119,10 +119,12 @@ static int hns_roce_del_gid(const struct ib_gid_attr *attr, void **context)
static enum ib_port_state get_upper_port_state(struct hns_roce_dev *hr_dev) { + struct net_device *net_dev = get_hr_netdev(hr_dev, 0); struct hns_roce_bond_group *bond_grp; + u8 bus_num = get_hr_bus_num(hr_dev); struct net_device *upper;
- bond_grp = hns_roce_get_bond_grp(hr_dev); + bond_grp = hns_roce_get_bond_grp(net_dev, bus_num); upper = bond_grp ? bond_grp->upper_dev : NULL; if (upper) return get_port_state(upper); @@ -197,7 +199,8 @@ static int hns_roce_netdev_event(struct notifier_block *self, hr_dev = container_of(self, struct hns_roce_dev, iboe.nb); iboe = &hr_dev->iboe; if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_BOND) { - bond_grp = hns_roce_get_bond_grp(hr_dev); + bond_grp = hns_roce_get_bond_grp(get_hr_netdev(hr_dev, 0), + get_hr_bus_num(hr_dev)); upper = bond_grp ? bond_grp->upper_dev : NULL; }
@@ -850,13 +853,15 @@ static int hns_roce_get_hw_stats(struct ib_device *device, static void hns_roce_unregister_device(struct hns_roce_dev *hr_dev, bool bond_cleanup) { + struct net_device *net_dev = get_hr_netdev(hr_dev, 0); struct hns_roce_ib_iboe *iboe = &hr_dev->iboe; struct hns_roce_v2_priv *priv = hr_dev->priv; struct hns_roce_bond_group *bond_grp; + u8 bus_num = get_hr_bus_num(hr_dev);
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_BOND) { unregister_netdevice_notifier(&hr_dev->bond_nb); - bond_grp = hns_roce_get_bond_grp(hr_dev); + bond_grp = hns_roce_get_bond_grp(net_dev, bus_num); if (bond_grp) { if (bond_cleanup) hns_roce_cleanup_bond(bond_grp);