data:image/s3,"s3://crabby-images/c8afb/c8afb8c19b264a8cf434df6c1dd1bdf43b08080d" alt=""
From: Junxian Huang <huangjunxian6@hisilicon.com> driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IBD4ID ---------------------------------------------------------------------- When roce driver is removed during reset, the reset flow of roce may not be fully completed. This may lead to the reset_state of roce handler stored in nic driver remaining in a middle state, such as HNS_ROCE_STATE_RST_DOWN or HNS_ROCE_STATE_RST_UNINIT. The reset_state won't be cleared even if roce driver is re-inited. This cause that roce bonding which currently relies on reset_state fails in this case. Replace the reset detection for bonding with nic APIs (.ae_dev_resetting() and .get_hw_reset_stat()), just like the reset detection elsewhere in roce driver. Fixes: b927e3066992 ("RDMA/hns: Fix the concurrency error between bond and reset.") Signed-off-by: Junxian Huang <huangjunxian6@hisilicon.com> Signed-off-by: Xinghai Cen <cenxinghai@h-partners.com> --- drivers/infiniband/hw/hns/hns_roce_bond.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_bond.c b/drivers/infiniband/hw/hns/hns_roce_bond.c index a4ac07f8fc96..5799db2c09ba 100644 --- a/drivers/infiniband/hw/hns/hns_roce_bond.c +++ b/drivers/infiniband/hw/hns/hns_roce_bond.c @@ -543,6 +543,7 @@ static void hns_roce_do_bond(struct hns_roce_bond_group *bond_grp) bool is_bond_slave_in_reset(struct hns_roce_bond_group *bond_grp) { + const struct hnae3_ae_ops *ops; struct hnae3_handle *handle; struct net_device *net_dev; int i; @@ -550,9 +551,11 @@ bool is_bond_slave_in_reset(struct hns_roce_bond_group *bond_grp) for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) { net_dev = bond_grp->bond_func_info[i].net_dev; handle = bond_grp->bond_func_info[i].handle; - if (net_dev && handle && - handle->rinfo.reset_state != HNS_ROCE_STATE_NON_RST && - handle->rinfo.reset_state != HNS_ROCE_STATE_RST_INITED) + if (!net_dev || !handle) + continue; + ops = handle->ae_algo->ops; + if (ops->ae_dev_resetting(handle) || + ops->get_hw_reset_stat(handle)) return true; } -- 2.33.0