From: Junxian Huang huangjunxian6@hisilicon.com
driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I968IB
--------------------------------------------------------------------------
In the concurrency process between setting bond and reset, when the reset process is finished, the driver detects that bond resource has already been allocated, thus entering the bond recover process, where the bond state is set to HNS_ROCE_BOND_IS_BONDED. But at this point the set bond process hasn't been executed yet(i.e. slaves haven't been uninited). This wrong bond state leads to the abnormal reset result that 2 slaves are both registered as bond device.
Thus delete the bond state setting in bond recover process. Besides, to fix other potential concurrency errors between bond and reset, some improvements are also added:
1. For the situation that reset occurs before bond work, add a reset check at the beginning of bond work. If there is an ongoing reset process, re-queue the bond work until the reset is finished.
2. For the situation that reset occurs during bond work, add reset checks to bond init/uninit process, treating this situation as an abnormal case.
Fixes: b0f80ad22f96 ("RDMA/hns: Support reset recovery for RoCE bonding") Signed-off-by: Junxian Huang huangjunxian6@hisilicon.com Signed-off-by: Juan Zhou zhoujuan51@h-partners.com --- drivers/infiniband/hw/hns/hns_roce_bond.c | 151 +++++++++++++++------ drivers/infiniband/hw/hns/hns_roce_bond.h | 1 + drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 14 +- drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 4 +- 4 files changed, 126 insertions(+), 44 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_bond.c b/drivers/infiniband/hw/hns/hns_roce_bond.c index 911a2ba28..146eeb7f4 100644 --- a/drivers/infiniband/hw/hns/hns_roce_bond.c +++ b/drivers/infiniband/hw/hns/hns_roce_bond.c @@ -211,8 +211,11 @@ static void hns_roce_set_bond(struct hns_roce_bond_group *bond_grp)
for (i = ROCE_BOND_FUNC_MAX - 1; i >= 0; i--) { net_dev = bond_grp->bond_func_info[i].net_dev; - if (net_dev) - hns_roce_bond_uninit_client(bond_grp, i); + if (net_dev) { + ret = hns_roce_bond_uninit_client(bond_grp, i); + if (ret) + goto set_err; + } }
bond_grp->bond_state = HNS_ROCE_BOND_REGISTERING; @@ -234,15 +237,19 @@ static void hns_roce_set_bond(struct hns_roce_bond_group *bond_grp)
ret = bond_grp->main_hr_dev ? hns_roce_cmd_bond(bond_grp, HNS_ROCE_SET_BOND) : -EIO; + if (ret) + goto set_err;
bond_grp->bond_state = HNS_ROCE_BOND_IS_BONDED; complete(&bond_grp->bond_work_done); + ibdev_info(&bond_grp->main_hr_dev->ib_dev, "RoCE set bond finished!\n");
- if (ret) - BOND_ERR_LOG("failed to set RoCE bond, ret = %d.\n", ret); - else - ibdev_info(&bond_grp->main_hr_dev->ib_dev, - "RoCE set bond finished!\n"); + return; + +set_err: + bond_grp->bond_state = HNS_ROCE_BOND_NOT_BONDED; + BOND_ERR_LOG("failed to set RoCE bond, ret = %d.\n", ret); + hns_roce_cleanup_bond(bond_grp); }
static void hns_roce_clear_bond(struct hns_roce_bond_group *bond_grp) @@ -258,7 +265,11 @@ static void hns_roce_clear_bond(struct hns_roce_bond_group *bond_grp) bond_grp->bond_state = HNS_ROCE_BOND_NOT_BONDED; bond_grp->main_hr_dev = NULL;
- hns_roce_bond_uninit_client(bond_grp, main_func_idx); + ret = hns_roce_bond_uninit_client(bond_grp, main_func_idx); + if (ret) { + BOND_ERR_LOG("failed to uninit bond, ret = %d.\n", ret); + return; + }
for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) { net_dev = bond_grp->bond_func_info[i].net_dev; @@ -303,8 +314,15 @@ static void hns_roce_slave_inc(struct hns_roce_bond_group *bond_grp) int ret;
while (inc_slave_map > 0) { - if (inc_slave_map & 1) - hns_roce_bond_uninit_client(bond_grp, inc_func_idx); + if (inc_slave_map & 1) { + ret = hns_roce_bond_uninit_client(bond_grp, inc_func_idx); + if (ret) { + BOND_ERR_LOG("failed to uninit slave %u, ret = %d.\n", + inc_func_idx, ret); + bond_grp->bond_func_info[inc_func_idx].net_dev = NULL; + bond_grp->slave_map &= ~(1U << inc_func_idx); + } + } inc_slave_map >>= 1; inc_func_idx++; } @@ -325,36 +343,66 @@ static void hns_roce_slave_inc(struct hns_roce_bond_group *bond_grp) "RoCE slave increase finished!\n"); }
+static int switch_main_dev(struct hns_roce_bond_group *bond_grp, + u32 *dec_slave_map, u8 main_func_idx) +{ + struct hns_roce_dev *hr_dev; + struct net_device *net_dev; + int ret; + int i; + + bond_grp->main_hr_dev = NULL; + ret = hns_roce_bond_uninit_client(bond_grp, main_func_idx); + if (ret) { + BOND_ERR_LOG("failed to uninit main dev %u, ret = %d.\n", + main_func_idx, ret); + *dec_slave_map &= ~(1U << main_func_idx); + bond_grp->slave_map |= (1U << main_func_idx); + return ret; + } + + for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) { + net_dev = bond_grp->bond_func_info[i].net_dev; + if (!(*dec_slave_map & (1 << i)) && net_dev) { + bond_grp->bond_state = HNS_ROCE_BOND_REGISTERING; + hr_dev = hns_roce_bond_init_client(bond_grp, i); + if (hr_dev) { + bond_grp->main_hr_dev = hr_dev; + break; + } + } + } + + if (!bond_grp->main_hr_dev) + return -ENODEV; + + return 0; +} + static void hns_roce_slave_dec(struct hns_roce_bond_group *bond_grp) { u8 main_func_idx = PCI_FUNC(bond_grp->main_hr_dev->pci_dev->devfn); u32 dec_slave_map = bond_grp->slave_map_diff; - struct hns_roce_dev *hr_dev; struct net_device *net_dev; u8 dec_func_idx = 0; int ret; - int i;
if (dec_slave_map & (1 << main_func_idx)) { - bond_grp->main_hr_dev = NULL; - hns_roce_bond_uninit_client(bond_grp, main_func_idx); - for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) { - net_dev = bond_grp->bond_func_info[i].net_dev; - if (!(dec_slave_map & (1 << i)) && net_dev) { - bond_grp->bond_state = HNS_ROCE_BOND_REGISTERING; - hr_dev = hns_roce_bond_init_client(bond_grp, i); - if (hr_dev) { - bond_grp->main_hr_dev = hr_dev; - break; - } - } - } + ret = switch_main_dev(bond_grp, &dec_slave_map, main_func_idx); + if (ret == -ENODEV) + goto dec_err; }
while (dec_slave_map > 0) { if (dec_slave_map & 1) { + net_dev = bond_grp->bond_func_info[dec_func_idx].net_dev; bond_grp->bond_func_info[dec_func_idx].net_dev = NULL; - hns_roce_bond_init_client(bond_grp, dec_func_idx); + if (!hns_roce_bond_init_client(bond_grp, dec_func_idx)) { + BOND_ERR_LOG("failed to re-init slave %u.\n", + dec_func_idx); + bond_grp->slave_map |= (1U << dec_func_idx); + bond_grp->bond_func_info[dec_func_idx].net_dev = net_dev; + } } dec_slave_map >>= 1; dec_func_idx++; @@ -365,16 +413,20 @@ static void hns_roce_slave_dec(struct hns_roce_bond_group *bond_grp)
ret = bond_grp->main_hr_dev ? hns_roce_cmd_bond(bond_grp, HNS_ROCE_CHANGE_BOND) : -EIO; + if (ret) + goto dec_err;
bond_grp->bond_state = HNS_ROCE_BOND_IS_BONDED; complete(&bond_grp->bond_work_done); + ibdev_info(&bond_grp->main_hr_dev->ib_dev, + "RoCE slave decrease finished!\n");
- if (ret) - BOND_ERR_LOG("failed to decrease RoCE bond slave, ret = %d.\n", - ret); - else - ibdev_info(&bond_grp->main_hr_dev->ib_dev, - "RoCE slave decrease finished!\n"); + return; + +dec_err: + bond_grp->bond_state = HNS_ROCE_BOND_NOT_BONDED; + BOND_ERR_LOG("failed to decrease RoCE bond slave, ret = %d.\n", ret); + hns_roce_cleanup_bond(bond_grp); }
static void hns_roce_do_bond(struct hns_roce_bond_group *bond_grp) @@ -414,7 +466,25 @@ static void hns_roce_do_bond(struct hns_roce_bond_group *bond_grp) } }
-void hns_roce_do_bond_work(struct work_struct *work) +bool is_bond_slave_in_reset(struct hns_roce_bond_group *bond_grp) +{ + struct hnae3_handle *handle; + struct net_device *net_dev; + int i; + + for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) { + net_dev = bond_grp->bond_func_info[i].net_dev; + handle = bond_grp->bond_func_info[i].handle; + if (net_dev && handle && + handle->rinfo.reset_state != HNS_ROCE_STATE_NON_RST && + handle->rinfo.reset_state != HNS_ROCE_STATE_RST_INITED) + return true; + } + + return false; +} + +static void hns_roce_do_bond_work(struct work_struct *work) { struct delayed_work *delayed_work = to_delayed_work(work); struct hns_roce_bond_group *bond_grp = @@ -422,15 +492,19 @@ void hns_roce_do_bond_work(struct work_struct *work) bond_work); int status;
+ if (is_bond_slave_in_reset(bond_grp)) + goto queue_work; + status = mutex_trylock(&roce_bond_mutex); - if (!status) { - /* delay 1 sec */ - hns_roce_queue_bond_work(bond_grp, HZ); - return; - } + if (!status) + goto queue_work;
hns_roce_do_bond(bond_grp); mutex_unlock(&roce_bond_mutex); + return; + +queue_work: + hns_roce_queue_bond_work(bond_grp, HZ); }
int hns_roce_bond_init(struct hns_roce_dev *hr_dev) @@ -452,7 +526,6 @@ int hns_roce_bond_init(struct hns_roce_dev *hr_dev) ret); return ret; } - bond_grp->bond_state = HNS_ROCE_BOND_IS_BONDED; }
hr_dev->bond_nb.notifier_call = hns_roce_bond_event; diff --git a/drivers/infiniband/hw/hns/hns_roce_bond.h b/drivers/infiniband/hw/hns/hns_roce_bond.h index c9de9315d..e75fe75f7 100644 --- a/drivers/infiniband/hw/hns/hns_roce_bond.h +++ b/drivers/infiniband/hw/hns/hns_roce_bond.h @@ -87,5 +87,6 @@ bool hns_roce_bond_is_active(struct hns_roce_dev *hr_dev); struct net_device *hns_roce_get_bond_netdev(struct hns_roce_dev *hr_dev); struct hns_roce_bond_group *hns_roce_get_bond_grp(struct net_device *net_dev, u8 bus_num); +bool is_bond_slave_in_reset(struct hns_roce_bond_group *bond_grp);
#endif diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 2153590d0..988b4aeda 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -7137,6 +7137,9 @@ struct hns_roce_dev if (!handle || !handle->client) return NULL;
+ if (is_bond_slave_in_reset(bond_grp)) + return NULL; + ret = hns_roce_hw_v2_init_instance(handle); if (ret) return NULL; @@ -7144,19 +7147,24 @@ struct hns_roce_dev return handle->priv; }
-void hns_roce_bond_uninit_client(struct hns_roce_bond_group *bond_grp, - int func_idx) +int hns_roce_bond_uninit_client(struct hns_roce_bond_group *bond_grp, + int func_idx) { struct hnae3_handle *handle = bond_grp->bond_func_info[func_idx].handle;
if (handle->rinfo.instance_state != HNS_ROCE_STATE_INITED) - return; + return -EPERM; + + if (is_bond_slave_in_reset(bond_grp)) + return -EBUSY;
handle->rinfo.instance_state = HNS_ROCE_STATE_BOND_UNINIT;
__hns_roce_hw_v2_uninit_instance(handle, false, false);
handle->rinfo.instance_state = HNS_ROCE_STATE_NON_INIT; + + return 0; } static int hns_roce_hw_v2_reset_notify_down(struct hnae3_handle *handle) { diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 074e1f290..dd64e0b95 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -1586,8 +1586,8 @@ struct hns_roce_bond_info { struct hns_roce_dev *hns_roce_bond_init_client(struct hns_roce_bond_group *bond_grp, int func_idx); -void hns_roce_bond_uninit_client(struct hns_roce_bond_group *bond_grp, - int func_idx); +int hns_roce_bond_uninit_client(struct hns_roce_bond_group *bond_grp, + int func_idx); int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata); int hns_roce_cmd_bond(struct hns_roce_bond_group *bond_grp, enum hns_roce_bond_cmd_type bond_type);