From: Junxian Huang huangjunxian6@hisilicon.com
driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I87LSW
--------------------------------------------------------------------------
In the concurrency process between setting bond and reset, when the reset process is finished, the driver detects that bond resource has already been allocated, thus entering the bond recover process, where the bond state is set to HNS_ROCE_BOND_IS_BONDED. But at this point the set bond process hasn't been executed yet(i.e. slaves haven't been uninited). This wrong bond state leads to the abnormal reset result that 2 slaves are both registered as bond device.
Thus delete the bond state setting in bond recover process. Besides, to fix other potential concurrency errors between bond and reset, some improvements are also added:
1. For the situation that reset occurs before bond work, add a reset check at the beginning of bond work. If there is an ongoing reset process, re-queue the bond work until the reset is finished.
2. For the situation that reset occurs during bond work, add reset checks to bond init/uninit process, treating this situation as an abnormal case.
Fixes: b0f80ad22f96 ("RDMA/hns: Support reset recovery for RoCE bonding") Signed-off-by: Junxian Huang huangjunxian6@hisilicon.com --- drivers/infiniband/hw/hns/hns_roce_bond.c | 153 +++++++++++++++------ drivers/infiniband/hw/hns/hns_roce_bond.h | 1 + drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 14 +- drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 4 +- 4 files changed, 127 insertions(+), 45 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_bond.c b/drivers/infiniband/hw/hns/hns_roce_bond.c index 2f92ed730b78..7ee059a84d07 100644 --- a/drivers/infiniband/hw/hns/hns_roce_bond.c +++ b/drivers/infiniband/hw/hns/hns_roce_bond.c @@ -212,8 +212,11 @@ static void hns_roce_set_bond(struct hns_roce_bond_group *bond_grp)
for (i = ROCE_BOND_FUNC_MAX - 1; i >= 0; i--) { net_dev = bond_grp->bond_func_info[i].net_dev; - if (net_dev) - hns_roce_bond_uninit_client(bond_grp, i); + if (net_dev) { + ret = hns_roce_bond_uninit_client(bond_grp, i); + if (ret) + goto set_err; + } }
bond_grp->bond_state = HNS_ROCE_BOND_REGISTERING; @@ -235,15 +238,19 @@ static void hns_roce_set_bond(struct hns_roce_bond_group *bond_grp)
ret = bond_grp->main_hr_dev ? hns_roce_cmd_bond(bond_grp, HNS_ROCE_SET_BOND) : -EIO; + if (ret) + goto set_err;
bond_grp->bond_state = HNS_ROCE_BOND_IS_BONDED; complete(&bond_grp->bond_work_done); + ibdev_info(&bond_grp->main_hr_dev->ib_dev, "RoCE set bond finished!\n");
- if (ret) - BOND_ERR_LOG("failed to set RoCE bond, ret = %d.\n", ret); - else - ibdev_info(&bond_grp->main_hr_dev->ib_dev, - "RoCE set bond finished!\n"); + return; + +set_err: + bond_grp->bond_state = HNS_ROCE_BOND_NOT_BONDED; + BOND_ERR_LOG("failed to set RoCE bond, ret = %d.\n", ret); + hns_roce_cleanup_bond(bond_grp); }
static void hns_roce_clear_bond(struct hns_roce_bond_group *bond_grp) @@ -259,7 +266,11 @@ static void hns_roce_clear_bond(struct hns_roce_bond_group *bond_grp) bond_grp->bond_state = HNS_ROCE_BOND_NOT_BONDED; bond_grp->main_hr_dev = NULL;
- hns_roce_bond_uninit_client(bond_grp, main_func_idx); + ret = hns_roce_bond_uninit_client(bond_grp, main_func_idx); + if (ret) { + BOND_ERR_LOG("failed to uninit bond, ret = %d.\n", ret); + return; + }
for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) { net_dev = bond_grp->bond_func_info[i].net_dev; @@ -273,8 +284,7 @@ static void hns_roce_clear_bond(struct hns_roce_bond_group *bond_grp) out: ret = hns_roce_cleanup_bond(bond_grp); if (!ret) - ibdev_info(&bond_grp->main_hr_dev->ib_dev, - "RoCE clear bond finished!\n"); + ibdev_info(&hr_dev->ib_dev, "RoCE clear bond finished!\n"); }
static void hns_roce_slave_changestate(struct hns_roce_bond_group *bond_grp) @@ -304,8 +314,15 @@ static void hns_roce_slave_inc(struct hns_roce_bond_group *bond_grp) int ret;
while (inc_slave_map > 0) { - if (inc_slave_map & 1) - hns_roce_bond_uninit_client(bond_grp, inc_func_idx); + if (inc_slave_map & 1) { + ret = hns_roce_bond_uninit_client(bond_grp, inc_func_idx); + if (ret) { + BOND_ERR_LOG("failed to uninit slave %u, ret = %d.\n", + inc_func_idx, ret); + bond_grp->bond_func_info[inc_func_idx].net_dev = NULL; + bond_grp->slave_map &= ~(1U << inc_func_idx); + } + } inc_slave_map >>= 1; inc_func_idx++; } @@ -326,36 +343,67 @@ static void hns_roce_slave_inc(struct hns_roce_bond_group *bond_grp) "RoCE slave increase finished!\n"); }
+static int switch_main_dev(struct hns_roce_bond_group *bond_grp, + u32 *dec_slave_map, u8 main_func_idx) +{ + struct hns_roce_dev *hr_dev; + struct net_device *net_dev; + int ret; + int i; + + bond_grp->main_hr_dev = NULL; + ret = hns_roce_bond_uninit_client(bond_grp, main_func_idx); + if (ret) { + BOND_ERR_LOG("failed to uninit main dev %u, ret = %d.\n", + main_func_idx, ret); + *dec_slave_map &= ~(1U << main_func_idx); + bond_grp->slave_map |= (1U << main_func_idx); + return ret; + } + + for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) { + net_dev = bond_grp->bond_func_info[i].net_dev; + if (!(*dec_slave_map & (1 << i)) && net_dev) { + bond_grp->bond_state = HNS_ROCE_BOND_REGISTERING; + hr_dev = hns_roce_bond_init_client(bond_grp, i); + if (hr_dev) { + bond_grp->main_hr_dev = hr_dev; + break; + } + } + } + + if (!bond_grp->main_hr_dev) + return -ENODEV; + + return 0; +} + static void hns_roce_slave_dec(struct hns_roce_bond_group *bond_grp) { u8 main_func_idx = PCI_FUNC(bond_grp->main_hr_dev->pci_dev->devfn); u32 dec_slave_map = bond_grp->slave_map_diff; - struct hns_roce_dev *hr_dev; struct net_device *net_dev; u8 dec_func_idx = 0; int ret; int i;
if (dec_slave_map & (1 << main_func_idx)) { - bond_grp->main_hr_dev = NULL; - hns_roce_bond_uninit_client(bond_grp, main_func_idx); - for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) { - net_dev = bond_grp->bond_func_info[i].net_dev; - if (!(dec_slave_map & (1 << i)) && net_dev) { - bond_grp->bond_state = HNS_ROCE_BOND_REGISTERING; - hr_dev = hns_roce_bond_init_client(bond_grp, i); - if (hr_dev) { - bond_grp->main_hr_dev = hr_dev; - break; - } - } - } + ret = switch_main_dev(bond_grp, &dec_slave_map, main_func_idx); + if (ret == -ENODEV) + goto dec_err; }
while (dec_slave_map > 0) { if (dec_slave_map & 1) { + net_dev = bond_grp->bond_func_info[dec_func_idx].net_dev; bond_grp->bond_func_info[dec_func_idx].net_dev = NULL; - hns_roce_bond_init_client(bond_grp, dec_func_idx); + if (!hns_roce_bond_init_client(bond_grp, dec_func_idx)) { + BOND_ERR_LOG("failed to re-init slave %u.\n", + dec_func_idx); + bond_grp->slave_map |= (1U << dec_func_idx); + bond_grp->bond_func_info[dec_func_idx].net_dev = net_dev; + } } dec_slave_map >>= 1; dec_func_idx++; @@ -366,16 +414,20 @@ static void hns_roce_slave_dec(struct hns_roce_bond_group *bond_grp)
ret = bond_grp->main_hr_dev ? hns_roce_cmd_bond(bond_grp, HNS_ROCE_CHANGE_BOND) : -EIO; + if (ret) + goto dec_err;
bond_grp->bond_state = HNS_ROCE_BOND_IS_BONDED; complete(&bond_grp->bond_work_done); + ibdev_info(&bond_grp->main_hr_dev->ib_dev, + "RoCE slave decrease finished!\n");
- if (ret) - BOND_ERR_LOG("failed to decrease RoCE bond slave, ret = %d.\n", - ret); - else - ibdev_info(&bond_grp->main_hr_dev->ib_dev, - "RoCE slave decrease finished!\n"); + return; + +dec_err: + bond_grp->bond_state = HNS_ROCE_BOND_NOT_BONDED; + BOND_ERR_LOG("failed to decrease RoCE bond slave, ret = %d.\n", ret); + hns_roce_cleanup_bond(bond_grp); }
static void hns_roce_do_bond(struct hns_roce_bond_group *bond_grp) @@ -415,7 +467,25 @@ static void hns_roce_do_bond(struct hns_roce_bond_group *bond_grp) } }
-void hns_roce_do_bond_work(struct work_struct *work) +bool is_bond_slave_in_reset(struct hns_roce_bond_group *bond_grp) +{ + struct hnae3_handle *handle; + struct net_device *net_dev; + int i; + + for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) { + net_dev = bond_grp->bond_func_info[i].net_dev; + handle = bond_grp->bond_func_info[i].handle; + if (net_dev && handle && + handle->rinfo.reset_state != HNS_ROCE_STATE_NON_RST && + handle->rinfo.reset_state != HNS_ROCE_STATE_RST_INITED) + return true; + } + + return false; +} + +static void hns_roce_do_bond_work(struct work_struct *work) { struct delayed_work *delayed_work = to_delayed_work(work); struct hns_roce_bond_group *bond_grp = @@ -423,15 +493,19 @@ void hns_roce_do_bond_work(struct work_struct *work) bond_work); int status;
+ if (is_bond_slave_in_reset(bond_grp)) + goto queue_work; + status = mutex_trylock(&roce_bond_mutex); - if (!status) { - /* delay 1 sec */ - hns_roce_queue_bond_work(bond_grp, HZ); - return; - } + if (!status) + goto queue_work;
hns_roce_do_bond(bond_grp); mutex_unlock(&roce_bond_mutex); + return; + +queue_work: + hns_roce_queue_bond_work(bond_grp, HZ); }
int hns_roce_bond_init(struct hns_roce_dev *hr_dev) @@ -453,7 +527,6 @@ int hns_roce_bond_init(struct hns_roce_dev *hr_dev) ret); return ret; } - bond_grp->bond_state = HNS_ROCE_BOND_IS_BONDED; }
hr_dev->bond_nb.notifier_call = hns_roce_bond_event; diff --git a/drivers/infiniband/hw/hns/hns_roce_bond.h b/drivers/infiniband/hw/hns/hns_roce_bond.h index c9de9315d0da..e75fe75f7f4f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_bond.h +++ b/drivers/infiniband/hw/hns/hns_roce_bond.h @@ -87,5 +87,6 @@ bool hns_roce_bond_is_active(struct hns_roce_dev *hr_dev); struct net_device *hns_roce_get_bond_netdev(struct hns_roce_dev *hr_dev); struct hns_roce_bond_group *hns_roce_get_bond_grp(struct net_device *net_dev, u8 bus_num); +bool is_bond_slave_in_reset(struct hns_roce_bond_group *bond_grp);
#endif diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 88dca31867a6..5a7817f6d25a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -7401,6 +7401,9 @@ struct hns_roce_dev if (!handle || !handle->client) return NULL;
+ if (is_bond_slave_in_reset(bond_grp)) + return NULL; + ret = hns_roce_hw_v2_init_instance(handle); if (ret) return NULL; @@ -7408,19 +7411,24 @@ struct hns_roce_dev return handle->priv; }
-void hns_roce_bond_uninit_client(struct hns_roce_bond_group *bond_grp, - int func_idx) +int hns_roce_bond_uninit_client(struct hns_roce_bond_group *bond_grp, + int func_idx) { struct hnae3_handle *handle = bond_grp->bond_func_info[func_idx].handle;
if (handle->rinfo.instance_state != HNS_ROCE_STATE_INITED) - return; + return -EPERM; + + if (is_bond_slave_in_reset(bond_grp)) + return -EBUSY;
handle->rinfo.instance_state = HNS_ROCE_STATE_BOND_UNINIT;
__hns_roce_hw_v2_uninit_instance(handle, false, false);
handle->rinfo.instance_state = HNS_ROCE_STATE_NON_INIT; + + return 0; }
static void hns_roce_v2_reset_notify_user(struct hns_roce_dev *hr_dev) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index f4cb1ed55b3f..cbcd0f6531a4 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -1590,8 +1590,8 @@ struct hns_roce_bond_info { struct hns_roce_dev *hns_roce_bond_init_client(struct hns_roce_bond_group *bond_grp, int func_idx); -void hns_roce_bond_uninit_client(struct hns_roce_bond_group *bond_grp, - int func_idx); +int hns_roce_bond_uninit_client(struct hns_roce_bond_group *bond_grp, + int func_idx); int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata);
int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev,