From: Slawomir Mrozowicz slawomirx.mrozowicz@intel.com
mainline inclusion from mainline-v5.18-rc1 commit 366fd1000995d4cf64e1a61a0d78a051550b9841 category: bugfix bugzilla: 186597, https://gitee.com/src-openeuler/kernel/issues/I532H9 CVE: CVE-2021-33061
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
Add support for ndo_set_vf_link_state the Network Device Option that allows the PF driver to control the virtual link state of the VF devices. Without this change a VF cannot be disabled/enabled by the administrator. In the implementation the auto state takes over PF link state to VF link setting, the enable state is not supported, the disable state shut off the VF link regardless of the PF setting.
Signed-off-by: Slawomir Mrozowicz slawomirx.mrozowicz@intel.com Tested-by: Konrad Jankowski konrad0.jankowski@intel.com Signed-off-by: Tony Nguyen anthony.l.nguyen@intel.com Signed-off-by: Ziyang Xuan william.xuanziyang@huawei.com Reviewed-by: Wei Yongjun weiyongjun1@huawei.com Reviewed-by: Xiu Jianfeng xiujianfeng@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- drivers/net/ethernet/intel/ixgbe/ixgbe.h | 2 + drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 11 +- drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h | 2 + .../net/ethernet/intel/ixgbe/ixgbe_sriov.c | 207 ++++++++++++++---- .../net/ethernet/intel/ixgbe/ixgbe_sriov.h | 4 +- 5 files changed, 182 insertions(+), 44 deletions(-)
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index de0fc6ecf491..21bdae9eac75 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -177,6 +177,8 @@ struct vf_data_storage { u16 pf_vlan; /* When set, guest VLAN config not allowed. */ u16 pf_qos; u16 tx_rate; + int link_enable; + int link_state; u8 spoofchk_enabled; bool rss_query_enabled; u8 trusted; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 0109b47b2fe6..c0a56308dfd6 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -5683,6 +5683,9 @@ static void ixgbe_up_complete(struct ixgbe_adapter *adapter) ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT); ctrl_ext |= IXGBE_CTRL_EXT_PFRSTD; IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext); + + /* update setting rx tx for all active vfs */ + ixgbe_set_all_vfs(adapter); }
void ixgbe_reinit_locked(struct ixgbe_adapter *adapter) @@ -6140,11 +6143,8 @@ void ixgbe_down(struct ixgbe_adapter *adapter) for (i = 0 ; i < adapter->num_vfs; i++) adapter->vfinfo[i].clear_to_send = false;
- /* ping all the active vfs to let them know we are going down */ - ixgbe_ping_all_vfs(adapter); - - /* Disable all VFTE/VFRE TX/RX */ - ixgbe_disable_tx_rx(adapter); + /* update setting rx tx for all active vfs */ + ixgbe_set_all_vfs(adapter); }
/* disable transmits in the hardware now that interrupts are off */ @@ -10255,6 +10255,7 @@ static const struct net_device_ops ixgbe_netdev_ops = { .ndo_set_vf_vlan = ixgbe_ndo_set_vf_vlan, .ndo_set_vf_rate = ixgbe_ndo_set_vf_bw, .ndo_set_vf_spoofchk = ixgbe_ndo_set_vf_spoofchk, + .ndo_set_vf_link_state = ixgbe_ndo_set_vf_link_state, .ndo_set_vf_rss_query_en = ixgbe_ndo_set_vf_rss_query_en, .ndo_set_vf_trust = ixgbe_ndo_set_vf_trust, .ndo_get_vf_config = ixgbe_ndo_get_vf_config, diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h index a148534d7256..8f4316b19278 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h @@ -85,6 +85,8 @@ enum ixgbe_pfvf_api_rev { #define IXGBE_VF_IPSEC_ADD 0x0d #define IXGBE_VF_IPSEC_DEL 0x0e
+#define IXGBE_VF_GET_LINK_STATE 0x10 /* get vf link state */ + /* length of permanent address message returned from PF */ #define IXGBE_VF_PERMADDR_MSG_LEN 4 /* word in permanent address message with the current multicast type */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c index 214a38de3f41..7f11c0a8e7a9 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c @@ -96,6 +96,7 @@ static int __ixgbe_enable_sriov(struct ixgbe_adapter *adapter, for (i = 0; i < num_vfs; i++) { /* enable spoof checking for all VFs */ adapter->vfinfo[i].spoofchk_enabled = true; + adapter->vfinfo[i].link_enable = true;
/* We support VF RSS querying only for 82599 and x540 * devices at the moment. These devices share RSS @@ -820,6 +821,57 @@ static inline void ixgbe_write_qde(struct ixgbe_adapter *adapter, u32 vf, } }
+/** + * ixgbe_set_vf_rx_tx - Set VF rx tx + * @adapter: Pointer to adapter struct + * @vf: VF identifier + * + * Set or reset correct transmit and receive for vf + **/ +static void ixgbe_set_vf_rx_tx(struct ixgbe_adapter *adapter, int vf) +{ + u32 reg_cur_tx, reg_cur_rx, reg_req_tx, reg_req_rx; + struct ixgbe_hw *hw = &adapter->hw; + u32 reg_offset, vf_shift; + + vf_shift = vf % 32; + reg_offset = vf / 32; + + reg_cur_tx = IXGBE_READ_REG(hw, IXGBE_VFTE(reg_offset)); + reg_cur_rx = IXGBE_READ_REG(hw, IXGBE_VFRE(reg_offset)); + + if (adapter->vfinfo[vf].link_enable) { + reg_req_tx = reg_cur_tx | 1 << vf_shift; + reg_req_rx = reg_cur_rx | 1 << vf_shift; + } else { + reg_req_tx = reg_cur_tx & ~(1 << vf_shift); + reg_req_rx = reg_cur_rx & ~(1 << vf_shift); + } + + /* The 82599 cannot support a mix of jumbo and non-jumbo PF/VFs. + * For more info take a look at ixgbe_set_vf_lpe + */ + if (adapter->hw.mac.type == ixgbe_mac_82599EB) { + struct net_device *dev = adapter->netdev; + int pf_max_frame = dev->mtu + ETH_HLEN; + +#if IS_ENABLED(CONFIG_FCOE) + if (dev->features & NETIF_F_FCOE_MTU) + pf_max_frame = max_t(int, pf_max_frame, + IXGBE_FCOE_JUMBO_FRAME_SIZE); +#endif /* CONFIG_FCOE */ + + if (pf_max_frame > ETH_FRAME_LEN) + reg_req_rx = reg_cur_rx & ~(1 << vf_shift); + } + + /* Enable/Disable particular VF */ + if (reg_cur_tx != reg_req_tx) + IXGBE_WRITE_REG(hw, IXGBE_VFTE(reg_offset), reg_req_tx); + if (reg_cur_rx != reg_req_rx) + IXGBE_WRITE_REG(hw, IXGBE_VFRE(reg_offset), reg_req_rx); +} + static int ixgbe_vf_reset_msg(struct ixgbe_adapter *adapter, u32 vf) { struct ixgbe_ring_feature *vmdq = &adapter->ring_feature[RING_F_VMDQ]; @@ -845,11 +897,6 @@ static int ixgbe_vf_reset_msg(struct ixgbe_adapter *adapter, u32 vf) vf_shift = vf % 32; reg_offset = vf / 32;
- /* enable transmit for vf */ - reg = IXGBE_READ_REG(hw, IXGBE_VFTE(reg_offset)); - reg |= BIT(vf_shift); - IXGBE_WRITE_REG(hw, IXGBE_VFTE(reg_offset), reg); - /* force drop enable for all VF Rx queues */ reg = IXGBE_QDE_ENABLE; if (adapter->vfinfo[vf].pf_vlan) @@ -857,27 +904,7 @@ static int ixgbe_vf_reset_msg(struct ixgbe_adapter *adapter, u32 vf)
ixgbe_write_qde(adapter, vf, reg);
- /* enable receive for vf */ - reg = IXGBE_READ_REG(hw, IXGBE_VFRE(reg_offset)); - reg |= BIT(vf_shift); - /* - * The 82599 cannot support a mix of jumbo and non-jumbo PF/VFs. - * For more info take a look at ixgbe_set_vf_lpe - */ - if (adapter->hw.mac.type == ixgbe_mac_82599EB) { - struct net_device *dev = adapter->netdev; - int pf_max_frame = dev->mtu + ETH_HLEN; - -#ifdef CONFIG_FCOE - if (dev->features & NETIF_F_FCOE_MTU) - pf_max_frame = max_t(int, pf_max_frame, - IXGBE_FCOE_JUMBO_FRAME_SIZE); - -#endif /* CONFIG_FCOE */ - if (pf_max_frame > ETH_FRAME_LEN) - reg &= ~BIT(vf_shift); - } - IXGBE_WRITE_REG(hw, IXGBE_VFRE(reg_offset), reg); + ixgbe_set_vf_rx_tx(adapter, vf);
/* enable VF mailbox for further messages */ adapter->vfinfo[vf].clear_to_send = true; @@ -1202,6 +1229,26 @@ static int ixgbe_update_vf_xcast_mode(struct ixgbe_adapter *adapter, return 0; }
+static int ixgbe_get_vf_link_state(struct ixgbe_adapter *adapter, + u32 *msgbuf, u32 vf) +{ + u32 *link_state = &msgbuf[1]; + + /* verify the PF is supporting the correct API */ + switch (adapter->vfinfo[vf].vf_api) { + case ixgbe_mbox_api_12: + case ixgbe_mbox_api_13: + case ixgbe_mbox_api_14: + break; + default: + return -EOPNOTSUPP; + } + + *link_state = adapter->vfinfo[vf].link_enable; + + return 0; +} + static int ixgbe_rcv_msg_from_vf(struct ixgbe_adapter *adapter, u32 vf) { u32 mbx_size = IXGBE_VFMAILBOX_SIZE; @@ -1267,6 +1314,9 @@ static int ixgbe_rcv_msg_from_vf(struct ixgbe_adapter *adapter, u32 vf) case IXGBE_VF_UPDATE_XCAST_MODE: retval = ixgbe_update_vf_xcast_mode(adapter, msgbuf, vf); break; + case IXGBE_VF_GET_LINK_STATE: + retval = ixgbe_get_vf_link_state(adapter, msgbuf, vf); + break; case IXGBE_VF_IPSEC_ADD: retval = ixgbe_ipsec_vf_add_sa(adapter, msgbuf, vf); break; @@ -1322,18 +1372,6 @@ void ixgbe_msg_task(struct ixgbe_adapter *adapter) } }
-void ixgbe_disable_tx_rx(struct ixgbe_adapter *adapter) -{ - struct ixgbe_hw *hw = &adapter->hw; - - /* disable transmit and receive for all vfs */ - IXGBE_WRITE_REG(hw, IXGBE_VFTE(0), 0); - IXGBE_WRITE_REG(hw, IXGBE_VFTE(1), 0); - - IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), 0); - IXGBE_WRITE_REG(hw, IXGBE_VFRE(1), 0); -} - static inline void ixgbe_ping_vf(struct ixgbe_adapter *adapter, int vf) { struct ixgbe_hw *hw = &adapter->hw; @@ -1359,6 +1397,21 @@ void ixgbe_ping_all_vfs(struct ixgbe_adapter *adapter) } }
+/** + * ixgbe_set_all_vfs - update vfs queues + * @adapter: Pointer to adapter struct + * + * Update setting transmit and receive queues for all vfs + **/ +void ixgbe_set_all_vfs(struct ixgbe_adapter *adapter) +{ + int i; + + for (i = 0 ; i < adapter->num_vfs; i++) + ixgbe_set_vf_link_state(adapter, i, + adapter->vfinfo[i].link_state); +} + int ixgbe_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac) { struct ixgbe_adapter *adapter = netdev_priv(netdev); @@ -1656,6 +1709,84 @@ int ixgbe_ndo_set_vf_spoofchk(struct net_device *netdev, int vf, bool setting) return 0; }
+/** + * ixgbe_set_vf_link_state - Set link state + * @adapter: Pointer to adapter struct + * @vf: VF identifier + * @state: required link state + * + * Set a link force state on/off a single vf + **/ +void ixgbe_set_vf_link_state(struct ixgbe_adapter *adapter, int vf, int state) +{ + adapter->vfinfo[vf].link_state = state; + + switch (state) { + case IFLA_VF_LINK_STATE_AUTO: + if (test_bit(__IXGBE_DOWN, &adapter->state)) + adapter->vfinfo[vf].link_enable = false; + else + adapter->vfinfo[vf].link_enable = true; + break; + case IFLA_VF_LINK_STATE_ENABLE: + adapter->vfinfo[vf].link_enable = true; + break; + case IFLA_VF_LINK_STATE_DISABLE: + adapter->vfinfo[vf].link_enable = false; + break; + } + + ixgbe_set_vf_rx_tx(adapter, vf); + + /* restart the VF */ + adapter->vfinfo[vf].clear_to_send = false; + ixgbe_ping_vf(adapter, vf); +} + +/** + * ixgbe_ndo_set_vf_link_state - Set link state + * @netdev: network interface device structure + * @vf: VF identifier + * @state: required link state + * + * Set the link state of a specified VF, regardless of physical link state + **/ +int ixgbe_ndo_set_vf_link_state(struct net_device *netdev, int vf, int state) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + int ret = 0; + + if (vf < 0 || vf >= adapter->num_vfs) { + dev_err(&adapter->pdev->dev, + "NDO set VF link - invalid VF identifier %d\n", vf); + return -EINVAL; + } + + switch (state) { + case IFLA_VF_LINK_STATE_ENABLE: + dev_info(&adapter->pdev->dev, + "NDO set VF %d link state %d - not supported\n", + vf, state); + break; + case IFLA_VF_LINK_STATE_DISABLE: + dev_info(&adapter->pdev->dev, + "NDO set VF %d link state disable\n", vf); + ixgbe_set_vf_link_state(adapter, vf, state); + break; + case IFLA_VF_LINK_STATE_AUTO: + dev_info(&adapter->pdev->dev, + "NDO set VF %d link state auto\n", vf); + ixgbe_set_vf_link_state(adapter, vf, state); + break; + default: + dev_err(&adapter->pdev->dev, + "NDO set VF %d - invalid link state %d\n", vf, state); + ret = -EINVAL; + } + + return ret; +} + int ixgbe_ndo_set_vf_rss_query_en(struct net_device *netdev, int vf, bool setting) { diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h index 3ec21923c89c..0690ecb8dfa3 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h @@ -17,8 +17,8 @@ void ixgbe_restore_vf_multicasts(struct ixgbe_adapter *adapter); #endif void ixgbe_msg_task(struct ixgbe_adapter *adapter); int ixgbe_vf_configuration(struct pci_dev *pdev, unsigned int event_mask); -void ixgbe_disable_tx_rx(struct ixgbe_adapter *adapter); void ixgbe_ping_all_vfs(struct ixgbe_adapter *adapter); +void ixgbe_set_all_vfs(struct ixgbe_adapter *adapter); int ixgbe_ndo_set_vf_mac(struct net_device *netdev, int queue, u8 *mac); int ixgbe_ndo_set_vf_vlan(struct net_device *netdev, int queue, u16 vlan, u8 qos, __be16 vlan_proto); @@ -31,7 +31,9 @@ int ixgbe_ndo_set_vf_rss_query_en(struct net_device *netdev, int vf, int ixgbe_ndo_set_vf_trust(struct net_device *netdev, int vf, bool setting); int ixgbe_ndo_get_vf_config(struct net_device *netdev, int vf, struct ifla_vf_info *ivi); +int ixgbe_ndo_set_vf_link_state(struct net_device *netdev, int vf, int state); void ixgbe_check_vf_rate_limit(struct ixgbe_adapter *adapter); +void ixgbe_set_vf_link_state(struct ixgbe_adapter *adapter, int vf, int state); int ixgbe_disable_sriov(struct ixgbe_adapter *adapter); #ifdef CONFIG_PCI_IOV void ixgbe_enable_sriov(struct ixgbe_adapter *adapter, unsigned int max_vfs);
From: Slawomir Mrozowicz slawomirx.mrozowicz@intel.com
mainline inclusion from mainline-v5.18-rc1 commit 008ca35f6e87be1d60b6af3d1ae247c6d5c2531d category: bugfix bugzilla: 186597, https://gitee.com/src-openeuler/kernel/issues/I532H9 CVE: CVE-2021-33061
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
The 82599 PF driver disable VF driver after a special MDD event occurs. Adds the option for administrators to control whether VFs are automatically disabled after several MDD events. The automatically disabling is now the default mode for 82599 PF driver, as it is more reliable.
This addresses CVE-2021-33061.
Signed-off-by: Slawomir Mrozowicz slawomirx.mrozowicz@intel.com Tested-by: Konrad Jankowski konrad0.jankowski@intel.com Signed-off-by: Tony Nguyen anthony.l.nguyen@intel.com Signed-off-by: Ziyang Xuan william.xuanziyang@huawei.com Reviewed-by: Wei Yongjun weiyongjun1@huawei.com Reviewed-by: Xiu Jianfeng xiujianfeng@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- drivers/net/ethernet/intel/ixgbe/ixgbe.h | 4 +++ .../net/ethernet/intel/ixgbe/ixgbe_ethtool.c | 21 ++++++++++++++ drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 28 ++++++++++++++++++- 3 files changed, 52 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index 21bdae9eac75..eaa992e7c591 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -184,6 +184,7 @@ struct vf_data_storage { u8 trusted; int xcast_mode; unsigned int vf_api; + u8 primary_abort_count; };
enum ixgbevf_xcast_modes { @@ -554,6 +555,8 @@ struct ixgbe_mac_addr { #define IXGBE_TRY_LINK_TIMEOUT (4 * HZ) #define IXGBE_SFP_POLL_JIFFIES (2 * HZ) /* SFP poll every 2 seconds */
+#define IXGBE_PRIMARY_ABORT_LIMIT 5 + /* board specific private data structure */ struct ixgbe_adapter { unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; @@ -612,6 +615,7 @@ struct ixgbe_adapter { #define IXGBE_FLAG2_RX_LEGACY BIT(16) #define IXGBE_FLAG2_IPSEC_ENABLED BIT(17) #define IXGBE_FLAG2_VF_IPSEC_ENABLED BIT(18) +#define IXGBE_FLAG2_AUTO_DISABLE_VF BIT(19)
/* Tx fast path data */ int num_tx_queues; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index 089e1b64b59a..294c59ab35c5 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -138,6 +138,8 @@ static const char ixgbe_priv_flags_strings[][ETH_GSTRING_LEN] = { "legacy-rx", #define IXGBE_PRIV_FLAGS_VF_IPSEC_EN BIT(1) "vf-ipsec", +#define IXGBE_PRIV_FLAGS_AUTO_DISABLE_VF BIT(2) + "mdd-disable-vf", };
#define IXGBE_PRIV_FLAGS_STR_LEN ARRAY_SIZE(ixgbe_priv_flags_strings) @@ -3523,6 +3525,9 @@ static u32 ixgbe_get_priv_flags(struct net_device *netdev) if (adapter->flags2 & IXGBE_FLAG2_VF_IPSEC_ENABLED) priv_flags |= IXGBE_PRIV_FLAGS_VF_IPSEC_EN;
+ if (adapter->flags2 & IXGBE_FLAG2_AUTO_DISABLE_VF) + priv_flags |= IXGBE_PRIV_FLAGS_AUTO_DISABLE_VF; + return priv_flags; }
@@ -3530,6 +3535,7 @@ static int ixgbe_set_priv_flags(struct net_device *netdev, u32 priv_flags) { struct ixgbe_adapter *adapter = netdev_priv(netdev); unsigned int flags2 = adapter->flags2; + unsigned int i;
flags2 &= ~IXGBE_FLAG2_RX_LEGACY; if (priv_flags & IXGBE_PRIV_FLAGS_LEGACY_RX) @@ -3539,6 +3545,21 @@ static int ixgbe_set_priv_flags(struct net_device *netdev, u32 priv_flags) if (priv_flags & IXGBE_PRIV_FLAGS_VF_IPSEC_EN) flags2 |= IXGBE_FLAG2_VF_IPSEC_ENABLED;
+ flags2 &= ~IXGBE_FLAG2_AUTO_DISABLE_VF; + if (priv_flags & IXGBE_PRIV_FLAGS_AUTO_DISABLE_VF) { + if (adapter->hw.mac.type == ixgbe_mac_82599EB) { + /* Reset primary abort counter */ + for (i = 0; i < adapter->num_vfs; i++) + adapter->vfinfo[i].primary_abort_count = 0; + + flags2 |= IXGBE_FLAG2_AUTO_DISABLE_VF; + } else { + e_info(probe, + "Cannot set private flags: Operation not supported\n"); + return -EOPNOTSUPP; + } + } + if (flags2 != adapter->flags2) { adapter->flags2 = flags2;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index c0a56308dfd6..74ee496e4031 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -7602,6 +7602,27 @@ static void ixgbe_watchdog_flush_tx(struct ixgbe_adapter *adapter) }
#ifdef CONFIG_PCI_IOV +static void ixgbe_bad_vf_abort(struct ixgbe_adapter *adapter, u32 vf) +{ + struct ixgbe_hw *hw = &adapter->hw; + + if (adapter->hw.mac.type == ixgbe_mac_82599EB && + adapter->flags2 & IXGBE_FLAG2_AUTO_DISABLE_VF) { + adapter->vfinfo[vf].primary_abort_count++; + if (adapter->vfinfo[vf].primary_abort_count == + IXGBE_PRIMARY_ABORT_LIMIT) { + ixgbe_set_vf_link_state(adapter, vf, + IFLA_VF_LINK_STATE_DISABLE); + adapter->vfinfo[vf].primary_abort_count = 0; + + e_info(drv, + "Malicious Driver Detection event detected on PF %d VF %d MAC: %pM mdd-disable-vf=on", + hw->bus.func, vf, + adapter->vfinfo[vf].vf_mac_addresses); + } + } +} + static void ixgbe_check_for_bad_vf(struct ixgbe_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; @@ -7633,8 +7654,10 @@ static void ixgbe_check_for_bad_vf(struct ixgbe_adapter *adapter) continue; pci_read_config_word(vfdev, PCI_STATUS, &status_reg); if (status_reg != IXGBE_FAILED_READ_CFG_WORD && - status_reg & PCI_STATUS_REC_MASTER_ABORT) + status_reg & PCI_STATUS_REC_MASTER_ABORT) { + ixgbe_bad_vf_abort(adapter, vf); pcie_flr(vfdev); + } } }
@@ -10724,6 +10747,9 @@ static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (err) goto err_sw_init;
+ if (adapter->hw.mac.type == ixgbe_mac_82599EB) + adapter->flags2 |= IXGBE_FLAG2_AUTO_DISABLE_VF; + switch (adapter->hw.mac.type) { case ixgbe_mac_X550: case ixgbe_mac_X550EM_x:
From: Slawomir Mrozowicz slawomirx.mrozowicz@intel.com
mainline inclusion from mainline-v5.18-rc1 commit 443ebdd68b443ea0798c883e8aabf10d75268e92 category: bugfix bugzilla: 186597, https://gitee.com/src-openeuler/kernel/issues/I532H9 CVE: CVE-2021-33061
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
Add possibility to disable link state if it is administratively disabled in PF.
It is part of the general functionality that allows the PF driver to control the state of the virtual link VF devices.
Signed-off-by: Slawomir Mrozowicz slawomirx.mrozowicz@intel.com Tested-by: Konrad Jankowski konrad0.jankowski@intel.com Signed-off-by: Tony Nguyen anthony.l.nguyen@intel.com
Conflicts: drivers/net/ethernet/intel/ixgbevf/vf.c
Signed-off-by: Ziyang Xuan william.xuanziyang@huawei.com Reviewed-by: Wei Yongjun weiyongjun1@huawei.com Reviewed-by: Xiu Jianfeng xiujianfeng@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- drivers/net/ethernet/intel/ixgbevf/ixgbevf.h | 2 + .../net/ethernet/intel/ixgbevf/ixgbevf_main.c | 11 ++++- drivers/net/ethernet/intel/ixgbevf/mbx.h | 2 + drivers/net/ethernet/intel/ixgbevf/vf.c | 42 +++++++++++++++++++ drivers/net/ethernet/intel/ixgbevf/vf.h | 1 + 5 files changed, 57 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h index a0e325774819..89bfe4eb92f8 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h @@ -387,6 +387,8 @@ struct ixgbevf_adapter { u32 *rss_key; u8 rss_indir_tbl[IXGBEVF_X550_VFRETA_SIZE]; u32 flags; + bool link_state; + #define IXGBEVF_FLAGS_LEGACY_RX BIT(1)
#ifdef CONFIG_XFRM diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index a70e4601eeb8..fda091a622b2 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -2293,7 +2293,9 @@ static void ixgbevf_negotiate_api(struct ixgbevf_adapter *adapter) static void ixgbevf_up_complete(struct ixgbevf_adapter *adapter) { struct net_device *netdev = adapter->netdev; + struct pci_dev *pdev = adapter->pdev; struct ixgbe_hw *hw = &adapter->hw; + bool state;
ixgbevf_configure_msix(adapter);
@@ -2306,6 +2308,11 @@ static void ixgbevf_up_complete(struct ixgbevf_adapter *adapter)
spin_unlock_bh(&adapter->mbx_lock);
+ state = adapter->link_state; + hw->mac.ops.get_link_state(hw, &adapter->link_state); + if (state && state != adapter->link_state) + dev_info(&pdev->dev, "VF is administratively disabled\n"); + smp_mb__before_atomic(); clear_bit(__IXGBEVF_DOWN, &adapter->state); ixgbevf_napi_enable_all(adapter); @@ -3074,6 +3081,8 @@ static int ixgbevf_sw_init(struct ixgbevf_adapter *adapter) adapter->tx_ring_count = IXGBEVF_DEFAULT_TXD; adapter->rx_ring_count = IXGBEVF_DEFAULT_RXD;
+ adapter->link_state = true; + set_bit(__IXGBEVF_DOWN, &adapter->state); return 0;
@@ -3306,7 +3315,7 @@ static void ixgbevf_watchdog_subtask(struct ixgbevf_adapter *adapter)
ixgbevf_watchdog_update_link(adapter);
- if (adapter->link_up) + if (adapter->link_up && adapter->link_state) ixgbevf_watchdog_link_is_up(adapter); else ixgbevf_watchdog_link_is_down(adapter); diff --git a/drivers/net/ethernet/intel/ixgbevf/mbx.h b/drivers/net/ethernet/intel/ixgbevf/mbx.h index 853796c8ef0e..403f4d9445b2 100644 --- a/drivers/net/ethernet/intel/ixgbevf/mbx.h +++ b/drivers/net/ethernet/intel/ixgbevf/mbx.h @@ -97,6 +97,8 @@ enum ixgbe_pfvf_api_rev { #define IXGBE_VF_IPSEC_ADD 0x0d #define IXGBE_VF_IPSEC_DEL 0x0e
+#define IXGBE_VF_GET_LINK_STATE 0x10 /* get vf link state */ + /* length of permanent address message returned from PF */ #define IXGBE_VF_PERMADDR_MSG_LEN 4 /* word in permanent address message with the current multicast type */ diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.c b/drivers/net/ethernet/intel/ixgbevf/vf.c index bfe6dfcec4ab..86d5521e69f8 100644 --- a/drivers/net/ethernet/intel/ixgbevf/vf.c +++ b/drivers/net/ethernet/intel/ixgbevf/vf.c @@ -573,6 +573,46 @@ static s32 ixgbevf_hv_update_xcast_mode(struct ixgbe_hw *hw, int xcast_mode) return -EOPNOTSUPP; }
+/** + * ixgbevf_get_link_state_vf - Get VF link state from PF + * @hw: pointer to the HW structure + * @link_state: link state storage + * + * Returns state of the operation error or success. + */ +static s32 ixgbevf_get_link_state_vf(struct ixgbe_hw *hw, bool *link_state) +{ + u32 msgbuf[2]; + s32 ret_val; + s32 err; + + msgbuf[0] = IXGBE_VF_GET_LINK_STATE; + msgbuf[1] = 0x0; + + err = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf, 2); + + if (err || (msgbuf[0] & IXGBE_VT_MSGTYPE_NACK)) { + ret_val = IXGBE_ERR_MBX; + } else { + ret_val = 0; + *link_state = msgbuf[1]; + } + + return ret_val; +} + +/** + * ixgbevf_hv_get_link_state_vf - * Hyper-V variant - just a stub. + * @hw: unused + * @link_state: unused + * + * Hyper-V variant; there is no mailbox communication. + */ +static s32 ixgbevf_hv_get_link_state_vf(struct ixgbe_hw *hw, bool *link_state) +{ + return -EOPNOTSUPP; +} + /** * ixgbevf_set_vfta_vf - Set/Unset VLAN filter table address * @hw: pointer to the HW structure @@ -950,6 +990,7 @@ static const struct ixgbe_mac_operations ixgbevf_mac_ops = { .set_rar = ixgbevf_set_rar_vf, .update_mc_addr_list = ixgbevf_update_mc_addr_list_vf, .update_xcast_mode = ixgbevf_update_xcast_mode, + .get_link_state = ixgbevf_get_link_state_vf, .set_uc_addr = ixgbevf_set_uc_addr_vf, .set_vfta = ixgbevf_set_vfta_vf, .set_rlpml = ixgbevf_set_rlpml_vf, @@ -967,6 +1008,7 @@ static const struct ixgbe_mac_operations ixgbevf_hv_mac_ops = { .set_rar = ixgbevf_hv_set_rar_vf, .update_mc_addr_list = ixgbevf_hv_update_mc_addr_list_vf, .update_xcast_mode = ixgbevf_hv_update_xcast_mode, + .get_link_state = ixgbevf_hv_get_link_state_vf, .set_uc_addr = ixgbevf_hv_set_uc_addr_vf, .set_vfta = ixgbevf_hv_set_vfta_vf, .set_rlpml = ixgbevf_hv_set_rlpml_vf, diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.h b/drivers/net/ethernet/intel/ixgbevf/vf.h index d1e9e306653b..45d9269218db 100644 --- a/drivers/net/ethernet/intel/ixgbevf/vf.h +++ b/drivers/net/ethernet/intel/ixgbevf/vf.h @@ -42,6 +42,7 @@ struct ixgbe_mac_operations { s32 (*init_rx_addrs)(struct ixgbe_hw *); s32 (*update_mc_addr_list)(struct ixgbe_hw *, struct net_device *); s32 (*update_xcast_mode)(struct ixgbe_hw *, int); + s32 (*get_link_state)(struct ixgbe_hw *hw, bool *link_state); s32 (*enable_mc)(struct ixgbe_hw *); s32 (*disable_mc)(struct ixgbe_hw *); s32 (*clear_vfta)(struct ixgbe_hw *);
From: Jordy Zomer jordy@pwning.systems
mainline inclusion from mainline-v5.17-rc1 commit 4fbcc1a4cb20fe26ad0225679c536c80f1648221 bugzilla: https://gitee.com/src-openeuler/kernel/issues/I4XOH9 CVE: CVE-2022-26490 backport: openEuler-22.03-LTS
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
It appears that there are some buffer overflows in EVT_TRANSACTION. This happens because the length parameters that are passed to memcpy come directly from skb->data and are not guarded in any way.
Signed-off-by: Jordy Zomer jordy@pwning.systems Reviewed-by: Krzysztof Kozlowski krzysztof.kozlowski@canonical.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Huang Guobin huangguobin4@huawei.com Reviewed-by: Wei Yongjun weiyongjun1@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- drivers/nfc/st21nfca/se.c | 10 ++++++++++ 1 file changed, 10 insertions(+)
diff --git a/drivers/nfc/st21nfca/se.c b/drivers/nfc/st21nfca/se.c index c8bdf078d111..0841e0e370a0 100644 --- a/drivers/nfc/st21nfca/se.c +++ b/drivers/nfc/st21nfca/se.c @@ -320,6 +320,11 @@ int st21nfca_connectivity_event_received(struct nfc_hci_dev *hdev, u8 host, return -ENOMEM;
transaction->aid_len = skb->data[1]; + + /* Checking if the length of the AID is valid */ + if (transaction->aid_len > sizeof(transaction->aid)) + return -EINVAL; + memcpy(transaction->aid, &skb->data[2], transaction->aid_len);
@@ -329,6 +334,11 @@ int st21nfca_connectivity_event_received(struct nfc_hci_dev *hdev, u8 host, return -EPROTO;
transaction->params_len = skb->data[transaction->aid_len + 3]; + + /* Total size is allocated (skb->len - 2) minus fixed array members */ + if (transaction->params_len > ((skb->len - 2) - sizeof(struct nfc_evt_transaction))) + return -EINVAL; + memcpy(transaction->params, skb->data + transaction->aid_len + 4, transaction->params_len);
From: Lakshmi Ramasubramanian nramas@linux.microsoft.com
mainline inclusion from mainline-v5.13-rc1 commit 031cc263c037a95e5d1249cbd3d55b77021f1eb8 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I53YJF
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
Uninitialized local variable "elf_info" would be passed to kexec_free_elf_info() if kexec_build_elf_info() returns an error in elf64_load().
If kexec_build_elf_info() returns an error, return the error immediately.
Signed-off-by: Lakshmi Ramasubramanian nramas@linux.microsoft.com Reported-by: Dan Carpenter dan.carpenter@oracle.com Reviewed-by: Michael Ellerman mpe@ellerman.id.au Signed-off-by: Rob Herring robh@kernel.org Link: https://lore.kernel.org/r/20210421163610.23775-2-nramas@linux.microsoft.com Signed-off-by: Lin Yujun linyujun809@huawei.com Reviewed-by: Zhang Jianhua chris.zjh@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- arch/powerpc/kexec/elf_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/powerpc/kexec/elf_64.c b/arch/powerpc/kexec/elf_64.c index 0492ca6003f3..ed284be2e0f7 100644 --- a/arch/powerpc/kexec/elf_64.c +++ b/arch/powerpc/kexec/elf_64.c @@ -45,7 +45,7 @@ static void *elf64_load(struct kimage *image, char *kernel_buf,
ret = kexec_build_elf_info(kernel_buf, kernel_len, &ehdr, &elf_info); if (ret) - goto out; + return ERR_PTR(ret);
if (image->type == KEXEC_TYPE_CRASH) { /* min & max buffer values for kdump case */
From: Lakshmi Ramasubramanian nramas@linux.microsoft.com
mainline inclusion from mainline-v5.13-rc1 commit a45dd984dea9baa22b15fb692fe870ab5670a4a0 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I53YU3
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
There are a few "goto out;" statements before the local variable "fdt" is initialized through the call to of_kexec_alloc_and_setup_fdt() in elf64_load(). This will result in an uninitialized "fdt" being passed to kvfree() in this function if there is an error before the call to of_kexec_alloc_and_setup_fdt().
If there is any error after fdt is allocated, but before it is saved in the arch specific kimage struct, free the fdt.
Fixes: 3c985d31ad66 ("powerpc: Use common of_kexec_alloc_and_setup_fdt()") Reported-by: kernel test robot lkp@intel.com Reported-by: Dan Carpenter dan.carpenter@oracle.com Signed-off-by: Michael Ellerman mpe@ellerman.id.au Signed-off-by: Lakshmi Ramasubramanian nramas@linux.microsoft.com Signed-off-by: Rob Herring robh@kernel.org Link: https://lore.kernel.org/r/20210421163610.23775-1-nramas@linux.microsoft.com Signed-off-by: Lin Yujun linyujun809@huawei.com Reviewed-by: Zhang Jianhua chris.zjh@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- arch/powerpc/kexec/elf_64.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-)
diff --git a/arch/powerpc/kexec/elf_64.c b/arch/powerpc/kexec/elf_64.c index ed284be2e0f7..ee292bd2edf2 100644 --- a/arch/powerpc/kexec/elf_64.c +++ b/arch/powerpc/kexec/elf_64.c @@ -114,7 +114,7 @@ static void *elf64_load(struct kimage *image, char *kernel_buf, ret = setup_new_fdt_ppc64(image, fdt, initrd_load_addr, initrd_len, cmdline); if (ret) - goto out; + goto out_free_fdt;
fdt_pack(fdt);
@@ -125,7 +125,7 @@ static void *elf64_load(struct kimage *image, char *kernel_buf, kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; ret = kexec_add_buffer(&kbuf); if (ret) - goto out; + goto out_free_fdt;
/* FDT will be freed in arch_kimage_file_post_load_cleanup */ image->arch.fdt = fdt; @@ -140,18 +140,14 @@ static void *elf64_load(struct kimage *image, char *kernel_buf, if (ret) pr_err("Error setting up the purgatory.\n");
+ goto out; + +out_free_fdt: + kvfree(fdt); out: kfree(modified_cmdline); kexec_free_elf_info(&elf_info);
- /* - * Once FDT buffer has been successfully passed to kexec_add_buffer(), - * the FDT buffer address is saved in image->arch.fdt. In that case, - * the memory cannot be freed here in case of any other error. - */ - if (ret && !image->arch.fdt) - kvfree(fdt); - return ret ? ERR_PTR(ret) : NULL; }
From: liangtian liangtian13@huawei.com
virt inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I53PTV?from=project-issue CVE: NA
-----------------------------------------------------
Since the reset function is in kvm_intel module instead of kvm module, the attribute weak function in kvm_main.c could not be found, which would cause st_max in X86 never be refreshed. The solution is to define the reset function in x86.c under the kvm module.
Signed-off-by: liangtian liangtian13@huawei.com Reviewed-by: Keqian Zhu zhukeqian1@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- arch/x86/kvm/vmx/vmx.c | 5 ----- arch/x86/kvm/x86.c | 5 +++++ 2 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 26046a755f4a..ea2ed880ecf5 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -367,11 +367,6 @@ static u32 vmx_segment_access_rights(struct kvm_segment *var); static __always_inline void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type);
-void kvm_arch_vcpu_stat_reset(struct kvm_vcpu_stat *vcpu_stat) -{ - vcpu_stat->st_max = 0; -} - void vmx_vmexit(void);
#define vmx_insn_failed(fmt...) \ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 556c52407202..3df001f114be 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -11472,6 +11472,11 @@ int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva) } EXPORT_SYMBOL_GPL(kvm_handle_invpcid);
+void kvm_arch_vcpu_stat_reset(struct kvm_vcpu_stat *vcpu_stat) +{ + vcpu_stat->st_max = 0; +} + EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
From: Zhen Lei thunder.leizhen@huawei.com
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I545H8 CVE: NA
-------------------------------------------------------------------------
To be consistent with the style of other ARCHs such as x86, the kexec commit b5a34a20984c ("arm64: support more than one crash kernel regions") requires all crash regions to be named "Crash kernel". Update the name of crashk_low_res, so that we can directly use the latest kexec tool without having to maintain a private version.
Signed-off-by: Zhen Lei thunder.leizhen@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- arch/arm64/kernel/setup.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-)
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index dcbefb4a9736..f6e56847a4e9 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -273,16 +273,9 @@ static void __init request_standard_resources(void) request_memmap_resources(res);
#ifdef CONFIG_KEXEC_CORE - /* - * Userspace will find "Crash kernel" or "Crash kernel (low)" - * region in /proc/iomem. - * In order to distinct from the high region and make no effect - * to the use of existing kexec-tools, rename the low region as - * "Crash kernel (low)". - */ + /* Userspace will find "Crash kernel" region in /proc/iomem. */ if (crashk_low_res.end && crashk_low_res.start >= res->start && crashk_low_res.end <= res->end) { - crashk_low_res.name = "Crash kernel (low)"; request_resource(res, &crashk_low_res); } if (crashk_res.end && crashk_res.start >= res->start &&
From: Zhen Lei thunder.leizhen@huawei.com
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I545H8 CVE: NA
-------------------------------------------------------------------------
If the crashkernel reservation is deferred, such boundaries are not known when the linear mapping is created. But its upper limit is fixed, cannot above 4G. Therefore, unless otherwise required, block mapping should be used for memory above 4G to improve performance.
Signed-off-by: Zhen Lei thunder.leizhen@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- arch/arm64/mm/mmu.c | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-)
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 41c48689270e..11755c89c66b 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -489,10 +489,10 @@ static void __init map_mem(pgd_t *pgdp) phys_addr_t kernel_start = __pa_symbol(_text); phys_addr_t kernel_end = __pa_symbol(__init_begin); phys_addr_t start, end; - int flags = 0; + int flags = 0, eflags = 0; u64 i;
- if (rodata_full || crash_mem_map || debug_pagealloc_enabled()) + if (rodata_full || debug_pagealloc_enabled()) flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
/* @@ -503,17 +503,40 @@ static void __init map_mem(pgd_t *pgdp) */ memblock_mark_nomap(kernel_start, kernel_end - kernel_start);
+#ifdef CONFIG_KEXEC_CORE + if (crash_mem_map) + eflags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; +#endif + /* map all the memory banks */ for_each_mem_range(i, &start, &end) { if (start >= end) break; + +#ifdef CONFIG_KEXEC_CORE + if (eflags && (end >= SZ_4G)) { + /* + * The memory block cross the 4G boundary. + * Forcibly use page-level mappings for memory under 4G. + */ + if (start < SZ_4G) { + __map_memblock(pgdp, start, SZ_4G - 1, + pgprot_tagged(PAGE_KERNEL), flags | eflags); + start = SZ_4G; + } + + /* Page-level mappings is not mandatory for memory above 4G */ + eflags = 0; + } +#endif + /* * The linear map must allow allocation tags reading/writing * if MTE is present. Otherwise, it has the same attributes as * PAGE_KERNEL. */ __map_memblock(pgdp, start, end, pgprot_tagged(PAGE_KERNEL), - flags); + flags | eflags); }
/*
From: Zhen Lei thunder.leizhen@huawei.com
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I545H8 CVE: NA
-------------------------------------------------------------------------
If the crashkernel has both high memory above 4G and low memory under 4G, kexec always loads the content such as Imge and dtb to the high memory instead of the low memory. This means that only high memory requires write protection based on page-level mapping. The allocation of high memory does not depend on the DMA boundary. So we can reserve the high memory first even if the crashkernel reservation is deferred.
Signed-off-by: Zhen Lei thunder.leizhen@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- arch/arm64/include/asm/kexec.h | 1 + arch/arm64/mm/init.c | 2 + arch/arm64/mm/mmu.c | 15 ++++++ kernel/crash_core.c | 92 ++++++++++++++++++++++++++++++++-- 4 files changed, 106 insertions(+), 4 deletions(-)
diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h index e19c0af3b53d..4024431ee001 100644 --- a/arch/arm64/include/asm/kexec.h +++ b/arch/arm64/include/asm/kexec.h @@ -99,6 +99,7 @@ static inline void crash_post_resume(void) {}
#ifdef CONFIG_KEXEC_CORE extern void __init reserve_crashkernel(void); +extern void __init reserve_crashkernel_high(void); #endif void machine_kexec_mask_interrupts(void);
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 2f3910beb4cf..0c43739bc6c5 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -441,6 +441,8 @@ void __init arm64_memblock_init(void)
early_init_fdt_scan_reserved_mem();
+ reserve_crashkernel_high(); + reserve_elfcorehdr();
high_memory = __va(memblock_end_of_DRAM() - 1) + 1; diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 11755c89c66b..c1103f76dbf6 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -506,6 +506,10 @@ static void __init map_mem(pgd_t *pgdp) #ifdef CONFIG_KEXEC_CORE if (crash_mem_map) eflags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; + + if (crashk_res.end) + memblock_mark_nomap(crashk_res.start, + resource_size(&crashk_res)); #endif
/* map all the memory banks */ @@ -552,6 +556,17 @@ static void __init map_mem(pgd_t *pgdp) __map_memblock(pgdp, kernel_start, kernel_end, PAGE_KERNEL, NO_CONT_MAPPINGS); memblock_clear_nomap(kernel_start, kernel_end - kernel_start); + +#ifdef CONFIG_KEXEC_CORE + if (crashk_res.end) { + __map_memblock(pgdp, crashk_res.start, + crashk_res.end + 1, + PAGE_KERNEL, + NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS); + memblock_clear_nomap(crashk_res.start, + resource_size(&crashk_res)); + } +#endif }
void mark_rodata_ro(void) diff --git a/kernel/crash_core.c b/kernel/crash_core.c index 88d93da963e8..5bd5fb6f7291 100644 --- a/kernel/crash_core.c +++ b/kernel/crash_core.c @@ -321,6 +321,9 @@ int __init parse_crashkernel_low(char *cmdline, */
#ifdef CONFIG_ARCH_WANT_RESERVE_CRASH_KERNEL +static bool crash_high_mem_reserved __initdata; +static struct resource crashk_res_high; + static int __init reserve_crashkernel_low(void) { #ifdef CONFIG_64BIT @@ -374,6 +377,66 @@ static int __init reserve_crashkernel_low(void) return 0; }
+void __init reserve_crashkernel_high(void) +{ + unsigned long long crash_base, crash_size; + char *cmdline = boot_command_line; + int ret; + + if (!IS_ENABLED(CONFIG_KEXEC_CORE)) + return; + + /* crashkernel=X[@offset] */ + ret = parse_crashkernel(cmdline, memblock_phys_mem_size(), + &crash_size, &crash_base); + if (ret || !crash_size) { + ret = parse_crashkernel_high(cmdline, 0, &crash_size, &crash_base); + if (ret || !crash_size) + return; + } + + crash_size = PAGE_ALIGN(crash_size); + + /* + * For the case crashkernel=X, may fall back to reserve memory above + * 4G, make reservations here in advance. It will be released later if + * the region is successfully reserved under 4G. + */ + if (!crash_base) { + crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN, + crash_base, CRASH_ADDR_HIGH_MAX); + if (!crash_base) + return; + + crash_high_mem_reserved = true; + } + + /* Mark the memory range that requires page-level mappings */ + crashk_res.start = crash_base; + crashk_res.end = crash_base + crash_size - 1; +} + +static void __init hand_over_reserved_high_mem(void) +{ + crashk_res_high.start = crashk_res.start; + crashk_res_high.end = crashk_res.end; + + crashk_res.start = 0; + crashk_res.end = 0; +} + +static void __init take_reserved_high_mem(unsigned long long *crash_base, + unsigned long long *crash_size) +{ + *crash_base = crashk_res_high.start; + *crash_size = resource_size(&crashk_res_high); +} + +static void __init free_reserved_high_mem(void) +{ + memblock_free(crashk_res_high.start, resource_size(&crashk_res_high)); +} + /* * reserve_crashkernel() - reserves memory for crash kernel * @@ -389,6 +452,8 @@ void __init reserve_crashkernel(void)
total_mem = memblock_phys_mem_size();
+ hand_over_reserved_high_mem(); + /* crashkernel=XM */ ret = parse_crashkernel(boot_command_line, total_mem, &crash_size, &crash_base); if (ret != 0 || crash_size <= 0) { @@ -398,6 +463,11 @@ void __init reserve_crashkernel(void) if (ret != 0 || crash_size <= 0) return; high = true; + + if (crash_high_mem_reserved) { + take_reserved_high_mem(&crash_base, &crash_size); + goto reserve_low; + } }
/* 0 means: find the address automatically */ @@ -411,10 +481,15 @@ void __init reserve_crashkernel(void) * So try low memory first and fall back to high memory * unless "crashkernel=size[KMG],high" is specified. */ - if (!high) + if (!high) { crash_base = memblock_find_in_range(CRASH_ALIGN, CRASH_ADDR_LOW_MAX, crash_size, CRASH_ALIGN); + if (!crash_base && crash_high_mem_reserved) { + take_reserved_high_mem(&crash_base, &crash_size); + goto reserve_low; + } + } if (!crash_base) crash_base = memblock_find_in_range(CRASH_ALIGN, CRASH_ADDR_HIGH_MAX, crash_size, @@ -447,9 +522,18 @@ void __init reserve_crashkernel(void) return; }
- if (crash_base >= CRASH_ADDR_LOW_MAX && reserve_crashkernel_low()) { - memblock_free(crash_base, crash_size); - return; + if ((crash_base >= CRASH_ADDR_LOW_MAX) || high) { +reserve_low: + if (reserve_crashkernel_low()) { + memblock_free(crash_base, crash_size); + return; + } + } else if (crash_high_mem_reserved) { + /* + * The crash memory is successfully allocated under 4G, and the + * previously reserved high memory is no longer required. + */ + free_reserved_high_mem(); }
pr_info("Reserving %ldMB of memory at %ldMB for crashkernel (System RAM: %ldMB)\n",
From: Zhen Lei thunder.leizhen@huawei.com
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I545H8 CVE: NA
-------------------------------------------------------------------------
For the case crashkernel=X@offset and crashkernel=X,high, we've explicitly used 'crashk_res' to mark the scope of the page-level mapping required, so NO_BLOCK_MAPPINGS should not be required for other areas. Otherwise, system performance will be affected. In fact, only the case crashkernel=X requires page-level mapping for all low memory under 4G because it attempts high memory after it fails to request low memory first, and we cannot predict its final location.
Signed-off-by: Zhen Lei thunder.leizhen@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- arch/arm64/include/asm/kexec.h | 2 ++ arch/arm64/mm/mmu.c | 17 +---------------- kernel/crash_core.c | 3 +++ 3 files changed, 6 insertions(+), 16 deletions(-)
diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h index 4024431ee001..d797dbab3aad 100644 --- a/arch/arm64/include/asm/kexec.h +++ b/arch/arm64/include/asm/kexec.h @@ -97,6 +97,8 @@ static inline void crash_prepare_suspend(void) {} static inline void crash_post_resume(void) {} #endif
+extern bool crash_low_mem_page_map; + #ifdef CONFIG_KEXEC_CORE extern void __init reserve_crashkernel(void); extern void __init reserve_crashkernel_high(void); diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index c1103f76dbf6..3b2937ae5e98 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -469,21 +469,6 @@ void __init mark_linear_text_alias_ro(void) PAGE_KERNEL_RO); }
-static bool crash_mem_map __initdata; - -static int __init enable_crash_mem_map(char *arg) -{ - /* - * Proper parameter parsing is done by reserve_crashkernel(). We only - * need to know if the linear map has to avoid block mappings so that - * the crashkernel reservations can be unmapped later. - */ - crash_mem_map = true; - - return 0; -} -early_param("crashkernel", enable_crash_mem_map); - static void __init map_mem(pgd_t *pgdp) { phys_addr_t kernel_start = __pa_symbol(_text); @@ -504,7 +489,7 @@ static void __init map_mem(pgd_t *pgdp) memblock_mark_nomap(kernel_start, kernel_end - kernel_start);
#ifdef CONFIG_KEXEC_CORE - if (crash_mem_map) + if (crash_low_mem_page_map) eflags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
if (crashk_res.end) diff --git a/kernel/crash_core.c b/kernel/crash_core.c index 5bd5fb6f7291..0865f816b57a 100644 --- a/kernel/crash_core.c +++ b/kernel/crash_core.c @@ -321,6 +321,7 @@ int __init parse_crashkernel_low(char *cmdline, */
#ifdef CONFIG_ARCH_WANT_RESERVE_CRASH_KERNEL +bool crash_low_mem_page_map __initdata; static bool crash_high_mem_reserved __initdata; static struct resource crashk_res_high;
@@ -393,6 +394,8 @@ void __init reserve_crashkernel_high(void) ret = parse_crashkernel_high(cmdline, 0, &crash_size, &crash_base); if (ret || !crash_size) return; + } else if (!crash_base) { + crash_low_mem_page_map = true; }
crash_size = PAGE_ALIGN(crash_size);
From: Zhen Lei thunder.leizhen@huawei.com
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I545H8 CVE: NA
-------------------------------------------------------------------------
For "crashkernel=X,high", there must be two crash regions: high=crashk_res and low=crashk_low_res. But now the syscall kexec_file_load() only add the crashk_res into "linux,usable-memory-range", this causes the second kernel to have no available dma memory. Fix it like kexec tool do for option -c.
Signed-off-by: Zhen Lei thunder.leizhen@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- drivers/of/kexec.c | 9 +++++++++ 1 file changed, 9 insertions(+)
diff --git a/drivers/of/kexec.c b/drivers/of/kexec.c index f335d941a716..d8231c34e873 100644 --- a/drivers/of/kexec.c +++ b/drivers/of/kexec.c @@ -396,6 +396,15 @@ void *of_kexec_alloc_and_setup_fdt(const struct kimage *image, crashk_res.end - crashk_res.start + 1); if (ret) goto out; + + if (crashk_low_res.end) { + ret = fdt_appendprop_addrrange(fdt, 0, chosen_node, + FDT_PROP_MEM_RANGE, + crashk_low_res.start, + crashk_low_res.end - crashk_low_res.start + 1); + if (ret) + goto out; + } }
/* add bootargs */
From: Lu Wei luwei32@huawei.com
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I52H2U CVE: NA
--------------------------------
The L2E mode of ipvlan is introduced in 4.19, and the mode value IPVLAN_MODE_L2E is inserted between IPVLAN_MODE_L3 and IPVLAN_MODE_L3S in enum ipvlan_mode as follows: enum ipvlan_mode { IPVLAN_MODE_L2 = 0, IPVLAN_MODE_L3, + IPVLAN_MODE_L2E, IPVLAN_MODE_L3S, IPVLAN_MODE_MAX };
that means the value of IPVLAN_MODE_L3S is changed from 2 to 3, and it is different from other operation system like SUSE, rethad and etc.
In order to fix it, IPVLAN_MODE_L2E is placed behind IPVLAN_MODE_L3S in 5.10. However it is incompatible with iproute tool since iproute defines enum ipvlan_mode as it does in 4.19. So this patch moves the IPVLAN_MODE_L2E back to keep incompatible with iproute tool.
Fixes: bd73acb1ae5a ("ipvlan: Introduce l2e mode") Signed-off-by: Lu Wei luwei32@huawei.com Reviewed-by: Wei Yongjun weiyongjun1@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- drivers/net/ipvlan/ipvlan_main.c | 2 +- include/uapi/linux/if_link.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index 779a84f70914..4e0fa42c18d0 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -14,7 +14,7 @@ int sysctl_ipvlan_loop_qlen = 131072; int sysctl_ipvlan_loop_delay = 10; static int ipvlan_default_mode = IPVLAN_MODE_L3; module_param(ipvlan_default_mode, int, 0400); -MODULE_PARM_DESC(ipvlan_default_mode, "set ipvlan default mode: 0 for l2, 1 for l3, 2 for l3s, 3 for l2e, others invalid now"); +MODULE_PARM_DESC(ipvlan_default_mode, "set ipvlan default mode: 0 for l2, 1 for l3, 2 for l2e, 3 for l3s, others invalid now");
static struct ctl_table_header *ipvlan_table_hrd; static struct ctl_table ipvlan_table[] = { diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 50d4705e1cbc..0507ecc7275a 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -689,8 +689,8 @@ enum { enum ipvlan_mode { IPVLAN_MODE_L2 = 0, IPVLAN_MODE_L3, - IPVLAN_MODE_L3S, IPVLAN_MODE_L2E, + IPVLAN_MODE_L3S, IPVLAN_MODE_MAX };