From: Huazhong Tan tanhuazhong@huawei.com
driver inclusion category: bugfix bugzilla: NA CVE: NA
Currently, the actual work of PF FLR is handled in the reset task, which is asynchronous. So in some case, if the preparing and rebuilding are not done, then the PF FLR will trigger some problems, for example, makes hardware go into chaos.
So this patch separates the process of PF FLR from reset task, and adds a semaphore to serialize this reset and others.
When FLR's preparing fails, if there has other higher level reset pending or failing times less than the HCLGE_FLR_RETRY_CNT, this preparing should be retried, otherwise PF and its VF may get into wrong state.
BTW, while the hardware reports misc interrupt during pcie_flr(), the driver can not receive this interrupt anymore, so disable it when hclge_flr_prepare() return, and re-enable it when enter hclge_flr_done().
Signed-off-by: Huazhong Tan tanhuazhong@huawei.com Reviewed-by: Yunsheng Lin linyunsheng@huawei.com Reviewed-by: Zhong Zhaohui zhongzhaohui@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 85 +++++++++++----------- .../ethernet/hisilicon/hns3/hns3pf/hclge_main.h | 1 + 2 files changed, 43 insertions(+), 43 deletions(-)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 48517cd..d0c9705 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -3289,8 +3289,6 @@ static int hclge_reset_wait(struct hclge_dev *hdev) reg = HCLGE_FUN_RST_ING; reg_bit = HCLGE_FUN_RST_ING_B; break; - case HNAE3_FLR_RESET: - break; default: dev_err(&hdev->pdev->dev, "Wait for unsupported reset type: %d\n", @@ -3298,20 +3296,6 @@ static int hclge_reset_wait(struct hclge_dev *hdev) return -EINVAL; }
- if (hdev->reset_type == HNAE3_FLR_RESET) { - while (!test_bit(HNAE3_FLR_DONE, &hdev->flr_state) && - cnt++ < HCLGE_RESET_WAIT_CNT) - msleep(HCLGE_RESET_WATI_MS); - - if (!test_bit(HNAE3_FLR_DONE, &hdev->flr_state)) { - dev_err(&hdev->pdev->dev, - "flr wait timeout: %u\n", cnt); - return -EBUSY; - } - - return 0; - } - do { msleep(HCLGE_RESET_WATI_MS); val = hclge_read_dev(&hdev->hw, reg); @@ -3470,12 +3454,6 @@ static void hclge_do_reset(struct hclge_dev *hdev) set_bit(HNAE3_FUNC_RESET, &hdev->reset_pending); hclge_reset_task_schedule(hdev); break; - case HNAE3_FLR_RESET: - dev_info(&pdev->dev, "FLR requested\n"); - /* schedule again to check later */ - set_bit(HNAE3_FLR_RESET, &hdev->reset_pending); - hclge_reset_task_schedule(hdev); - break; default: dev_warn(&pdev->dev, "Unsupported reset type: %d\n", hdev->reset_type); @@ -3609,10 +3587,6 @@ static int hclge_reset_prepare_wait(struct hclge_dev *hdev) ret = hclge_func_reset_sync_vf(hdev); if (ret) return ret; - - set_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state); - set_bit(HNAE3_FLR_DOWN, &hdev->flr_state); - hdev->rst_stats.flr_rst_cnt++; break; case HNAE3_IMP_RESET: if (handle && handle->ae_algo->ops->handle_imp_error) @@ -4009,12 +3983,13 @@ static void hclge_reset_service_task(struct hclge_dev *hdev) return; }
- if (test_and_set_bit(HCLGE_STATE_RST_HANDLING, &hdev->state)) - return; + down(&hdev->reset_sem); + set_bit(HCLGE_STATE_RST_HANDLING, &hdev->state);
hclge_reset_subtask(hdev);
clear_bit(HCLGE_STATE_RST_HANDLING, &hdev->state); + up(&hdev->reset_sem); }
static void hclge_update_vport_alive(struct hclge_dev *hdev) @@ -9947,30 +9922,53 @@ static void hclge_state_uninit(struct hclge_dev *hdev)
static void hclge_flr_prepare(struct hnae3_ae_dev *ae_dev) { -#define HCLGE_FLR_WAIT_MS 100 -#define HCLGE_FLR_WAIT_CNT 50 - struct hclge_dev *hdev = ae_dev->priv; - int cnt = 0; +#define HCLGE_FLR_RETRY_WAIT_MS 500 +#define HCLGE_FLR_RETRY_CNT 5
- clear_bit(HNAE3_FLR_DOWN, &hdev->flr_state); - clear_bit(HNAE3_FLR_DONE, &hdev->flr_state); - set_bit(HNAE3_FLR_RESET, &hdev->default_reset_request); - hclge_reset_event(hdev->pdev, NULL); + struct hclge_dev *hdev = ae_dev->priv; + int retry_cnt = 0; + int ret;
- while (!test_bit(HNAE3_FLR_DOWN, &hdev->flr_state) && - cnt++ < HCLGE_FLR_WAIT_CNT) - msleep(HCLGE_FLR_WAIT_MS); +retry: + down(&hdev->reset_sem); + set_bit(HCLGE_STATE_RST_HANDLING, &hdev->state); + hdev->reset_type = HNAE3_FLR_RESET; + ret = hclge_reset_prepare(hdev); + if (ret) { + dev_err(&hdev->pdev->dev, "fail to prepare FLR, ret=%d\n", + ret); + if (hdev->reset_pending || + retry_cnt++ < HCLGE_FLR_RETRY_CNT) { + dev_err(&hdev->pdev->dev, + "reset_pending:0x%lx, retry_cnt:%d\n", + hdev->reset_pending, retry_cnt); + clear_bit(HCLGE_STATE_RST_HANDLING, &hdev->state); + up(&hdev->reset_sem); + msleep(HCLGE_FLR_RETRY_WAIT_MS); + goto retry; + } + }
- if (!test_bit(HNAE3_FLR_DOWN, &hdev->flr_state)) - dev_err(&hdev->pdev->dev, - "flr wait down timeout: %d\n", cnt); + /* disable misc vector before FLR done */ + hclge_enable_vector(&hdev->misc_vector, false); + set_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state); + hdev->rst_stats.flr_rst_cnt++; }
static void hclge_flr_done(struct hnae3_ae_dev *ae_dev) { struct hclge_dev *hdev = ae_dev->priv; + int ret; + + hclge_enable_vector(&hdev->misc_vector, true);
- set_bit(HNAE3_FLR_DONE, &hdev->flr_state); + ret = hclge_reset_rebuild(hdev); + if (ret) + dev_err(&hdev->pdev->dev, "fail to rebuild, ret=%d\n", ret); + + hdev->reset_type = HNAE3_NONE_RESET; + clear_bit(HCLGE_STATE_RST_HANDLING, &hdev->state); + up(&hdev->reset_sem); }
static void hclge_clear_resetting_state(struct hclge_dev *hdev) @@ -10013,6 +10011,7 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
mutex_init(&hdev->vport_lock); spin_lock_init(&hdev->fd_rule_lock); + sema_init(&hdev->reset_sem, 1);
ret = hclge_pci_init(hdev); if (ret) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index a82c6fc..b64f6c9 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -730,6 +730,7 @@ struct hclge_dev { unsigned long reset_request; /* reset has been requested */ unsigned long reset_pending; /* client rst is pending to be served */ struct hclge_rst_stats rst_stats; + struct semaphore reset_sem; /* protect reset process */ u32 reset_fail_cnt; u32 fw_version; u16 num_vmdq_vport; /* Num vmdq vport this PF has set up */