From: Lin Ma linma@zju.edu.cn
stable inclusion from stable-v5.10.138 commit 0f516dcd1456b18b56a7de0c1f67b8a4aa54c2ef category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I60QFD
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
commit 6faee3d4ee8be0f0367d0c3d826afb3571b7a5e0 upstream.
The commit c23d92b80e0b ("igb: Teardown SR-IOV before unregister_netdev()") places the unregister_netdev() call after the igb_disable_sriov() call to avoid functionality issue.
However, it introduces several race conditions when detaching a device. For example, when .remove() is called, the below interleaving leads to use-after-free.
(FREE from device detaching) | (USE from netdev core) igb_remove | igb_ndo_get_vf_config igb_disable_sriov | vf >= adapter->vfs_allocated_count? kfree(adapter->vf_data) | adapter->vfs_allocated_count = 0 | | memcpy(... adapter->vf_data[vf]
Moreover, the igb_disable_sriov() also suffers from data race with the requests from VF driver.
(FREE from device detaching) | (USE from requests) igb_remove | igb_msix_other igb_disable_sriov | igb_msg_task kfree(adapter->vf_data) | vf < adapter->vfs_allocated_count adapter->vfs_allocated_count = 0 |
To this end, this commit first eliminates the data races from netdev core by using rtnl_lock (similar to commit 719479230893 ("dpaa2-eth: add MAC/PHY support through phylink")). And then adds a spinlock to eliminate races from driver requests. (similar to commit 1e53834ce541 ("ixgbe: Add locking to prevent panic when setting sriov_numvfs to zero")
Fixes: c23d92b80e0b ("igb: Teardown SR-IOV before unregister_netdev()") Signed-off-by: Lin Ma linma@zju.edu.cn Tested-by: Konrad Jankowski konrad0.jankowski@intel.com Signed-off-by: Tony Nguyen anthony.l.nguyen@intel.com Link: https://lore.kernel.org/r/20220817184921.735244-1-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski kuba@kernel.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com Reviewed-by: Wei Li liwei391@huawei.com --- drivers/net/ethernet/intel/igb/igb.h | 2 ++ drivers/net/ethernet/intel/igb/igb_main.c | 12 +++++++++++- 2 files changed, 13 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h index 7bda8c5edea5..e6d2800a8abc 100644 --- a/drivers/net/ethernet/intel/igb/igb.h +++ b/drivers/net/ethernet/intel/igb/igb.h @@ -664,6 +664,8 @@ struct igb_adapter { struct igb_mac_addr *mac_table; struct vf_mac_filter vf_macs; struct vf_mac_filter *vf_mac_list; + /* lock for VF resources */ + spinlock_t vfs_lock; };
/* flags controlling PTP/1588 function */ diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index d768846112ca..7b56e3c29893 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -3638,6 +3638,7 @@ static int igb_disable_sriov(struct pci_dev *pdev) struct net_device *netdev = pci_get_drvdata(pdev); struct igb_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; + unsigned long flags;
/* reclaim resources allocated to VFs */ if (adapter->vf_data) { @@ -3650,12 +3651,13 @@ static int igb_disable_sriov(struct pci_dev *pdev) pci_disable_sriov(pdev); msleep(500); } - + spin_lock_irqsave(&adapter->vfs_lock, flags); kfree(adapter->vf_mac_list); adapter->vf_mac_list = NULL; kfree(adapter->vf_data); adapter->vf_data = NULL; adapter->vfs_allocated_count = 0; + spin_unlock_irqrestore(&adapter->vfs_lock, flags); wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ); wrfl(); msleep(100); @@ -3815,7 +3817,9 @@ static void igb_remove(struct pci_dev *pdev) igb_release_hw_control(adapter);
#ifdef CONFIG_PCI_IOV + rtnl_lock(); igb_disable_sriov(pdev); + rtnl_unlock(); #endif
unregister_netdev(netdev); @@ -3975,6 +3979,9 @@ static int igb_sw_init(struct igb_adapter *adapter)
spin_lock_init(&adapter->nfc_lock); spin_lock_init(&adapter->stats64_lock); + + /* init spinlock to avoid concurrency of VF resources */ + spin_lock_init(&adapter->vfs_lock); #ifdef CONFIG_PCI_IOV switch (hw->mac.type) { case e1000_82576: @@ -7840,8 +7847,10 @@ static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf) static void igb_msg_task(struct igb_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; + unsigned long flags; u32 vf;
+ spin_lock_irqsave(&adapter->vfs_lock, flags); for (vf = 0; vf < adapter->vfs_allocated_count; vf++) { /* process any reset requests */ if (!igb_check_for_rst(hw, vf)) @@ -7855,6 +7864,7 @@ static void igb_msg_task(struct igb_adapter *adapter) if (!igb_check_for_ack(hw, vf)) igb_rcv_ack_from_vf(adapter, vf); } + spin_unlock_irqrestore(&adapter->vfs_lock, flags); }
/**