From: Yonglong Liu liuyonglong@huawei.com
driver inclusion category: bugfix bugzilla: NA CVE: NA
----------------------------
When fully configure VLANs for a VF, then unload the VF while triggering a reset to PF, will cause a kernel crash because the irq is already uninit.
[ 293.177579] ------------[ cut here ]------------ [ 293.183502] kernel BUG at drivers/pci/msi.c:352! [ 293.189547] Internal error: Oops - BUG: 0 [#1] SMP [ 293.195910] Modules linked in: 8021q garp mrp hclgevf devlink xt_CHECKSUM ipt_MASQUERADE xt_conntrack ipt_REJECT nf_reject_ipv4 ip6table_mangle ip6table_nat nf_nat_ipv6 iptable_mangle iptable_nat nf_nat_ipv4 nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 libcrc32c ebtable_filter ebtables ip6table_filter ip6_tables iptable_filter tun bridge stp llc rfkill vfat fat ib_isert iscsi_target_mod ib_srpt target_core_mod ib_srp scsi_transport_srp ib_ipoib ib_umad rpcrdma sunrpc rdma_ucm ib_uverbs ib_iser rdma_cm iw_cm ib_cm libiscsi scsi_transport_iscsi ipmi_ssif aes_ce_blk crypto_simd cryptd hns_roce_hw_v2 aes_ce_cipher ghash_ce hns_roce sha2_ce sha256_arm64 ses sha1_ce ib_core enclosure sbsa_gwdt uio_pdrv_genirq uio sg ipmi_si ipmi_devintf ipmi_msghandler spi_dw_mmio sch_fq_codel ip_tables ext4 mbcache [ 293.302197] jbd2 sd_mod realtek hisi_sas_v3_hw hclge hisi_sas_main libsas ahci scsi_transport_sas libahci ixgbe hns3 libata hnae3 host_edma_drv mdio dm_mirror dm_region_hash dm_log dm_mod [ 293.334208] Process kworker/0:4 (pid: 684, stack limit = 0x000000009f218975) [ 293.346841] CPU: 0 PID: 684 Comm: kworker/0:4 Kdump: loaded Not tainted 4.19.90-2106.3.0.0095.oe1.aarch64 #1 [ 293.368467] Hardware name: Huawei TaiShan 2280 V2/BC82AMDC, BIOS 2280-V2 CS V5.B170.01 06/30/2021 [ 293.390124] Workqueue: hclgevf hclgevf_service_task [hclgevf] [ 293.402627] pstate: 80c00009 (Nzcv daif +PAN +UAO) [ 293.414324] pc : free_msi_irqs+0x19c/0x1b8 [ 293.425429] lr : free_msi_irqs+0x18c/0x1b8 [ 293.436545] sp : ffff00002716fbb0 [ 293.446950] x29: ffff00002716fbb0 x28: 0000000000000000 [ 293.459519] x27: 0000000000000000 x26: ffff45b91ea16b00 [ 293.472183] x25: 0000000000000000 x24: ffffa587b08f4700 [ 293.484717] x23: ffffc591ac30e000 x22: ffffa587b08f8428 [ 293.497190] x21: ffffc591ac30e300 x20: 0000000000000000 [ 293.509594] x19: ffffa58a062a8300 x18: 0000000000000000 [ 293.521949] x17: 0000000000000000 x16: ffff45b91dcc3f48 [ 293.534013] x15: 0000000000000000 x14: 0000000000000000 [ 293.545883] x13: 0000000000000040 x12: 0000000000000228 [ 293.557508] x11: 0000000000000020 x10: 0000000000000040 [ 293.568889] x9 : ffff45b91ea1e190 x8 : ffffc591802d0000 [ 293.580123] x7 : ffffc591802d0148 x6 : 0000000000000120 [ 293.591190] x5 : ffffc591802d0000 x4 : 0000000000000000 [ 293.602015] x3 : 0000000000000000 x2 : 0000000000000000 [ 293.612624] x1 : 00000000000004a4 x0 : ffffa58a1e0c6b80 [ 293.623028] Call trace: [ 293.630340] free_msi_irqs+0x19c/0x1b8 [ 293.638849] pci_disable_msix+0x118/0x140 [ 293.647452] pci_free_irq_vectors+0x20/0x38 [ 293.656081] hclgevf_uninit_msi+0x44/0x58 [hclgevf] [ 293.665309] hclgevf_reset_rebuild+0x1ac/0x2e0 [hclgevf] [ 293.674866] hclgevf_reset+0x358/0x400 [hclgevf] [ 293.683545] hclgevf_reset_service_task+0xd0/0x1b0 [hclgevf] [ 293.693325] hclgevf_service_task+0x4c/0x2e8 [hclgevf] [ 293.702307] process_one_work+0x1b0/0x448 [ 293.710034] worker_thread+0x54/0x468 [ 293.717331] kthread+0x134/0x138 [ 293.724114] ret_from_fork+0x10/0x18 [ 293.731324] Code: f940b000 b4ffff00 a903e7b8 f90017b6 (d4210000)
This patch fixes the problem by waiting for the VF reset done while unloading the VF.
Signed-off-by: Yonglong Liu liuyonglong@huawei.com Reviewed-by: Junxin Chen chenjunxin1@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 4 ++++ 1 file changed, 4 insertions(+)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 89ea550b07340..4122eaf2619b5 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -2896,6 +2896,8 @@ static void hclgevf_uninit_client_instance(struct hnae3_client *client,
/* un-init roce, if it exists */ if (hdev->roce_client) { + while (test_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state)) + msleep(100); hdev->roce_client->ops->uninit_instance(&hdev->roce, 0); hdev->roce_client = NULL; hdev->roce.client = NULL; @@ -2905,6 +2907,8 @@ static void hclgevf_uninit_client_instance(struct hnae3_client *client, if (client->ops->uninit_instance && hdev->nic_client && client->type != HNAE3_CLIENT_ROCE) { clear_bit(HCLGEVF_STATE_NIC_REGISTERED, &hdev->state); + while (test_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state)) + msleep(100);
client->ops->uninit_instance(&hdev->nic, 0); hdev->nic_client = NULL;