From: xiabing xiabing12@h-partners.com
This patchset includes some changes for hisi_sas: - Fix NULL pointer issue when removing debugfs. - Directly calling register snapshot instead of workqueue. - Allocate debugfs memory during triggering debugfs dump. - Fix issue that device is not found because .phy_attached is not set. - Replace with standard error code return value. - Check the variable slot before use it. - Clear HISI_SAS_RESET_BIT, release hisi_hba semaphore, unblock the scsi host, clear HISI_SAS_REJECT_CMD_BIT if FLR failed. - Use the correct number of registers when reading register information. - Fix warnings detected by sparse - Handle the NCQ error returned by D2H frame
Xiang Chen (1): scsi: hisi_sas: Remove unused variable and check in hisi_sas_send_ata_reset_each_phy()
Xingui Yang (3): scsi: hisi_sas: Fix normally completed I/O analysed as failed scsi: hisi_sas: Fix warnings detected by sparse scsi: hisi_sas: Handle the NCQ error returned by D2H frame
Yihang Li (11): scsi: hisi_sas: Set debugfs_dir pointer to NULL after removing debugfs scsi: hisi_sas: Directly call register snapshot instead of using workqueue scsi: hisi_sas: Allocate DFX memory during dump trigger scsi: hisi_sas: Remove redundant checks for automatic debugfs dump scsi: hisi_sas: Set .phy_attached before notifing phyup event HISI_PHYE_PHY_UP_PM scsi: hisi_sas: Replace with standard error code return value scsi: hisi_sas: Check before using pointer variables scsi: hisi_sas: Rollback some operations if FLR failed scsi: hisi_sas: Correct the number of global debugfs registers scsi: hisi_sas: Fix the deadlock issue that occurs during automatic dump scsi: hisi_sas: Check whether debugfs is enabled before removing or releasing it
drivers/scsi/hisi_sas/hisi_sas.h | 3 +- drivers/scsi/hisi_sas/hisi_sas_main.c | 37 ++++-- drivers/scsi/hisi_sas/hisi_sas_v2_hw.c | 11 +- drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 164 +++++++++++++------------ 4 files changed, 120 insertions(+), 95 deletions(-)
From: Xingui Yang yangxingui@huawei.com
mainline inclusion from mainline-v6.6-rc1 commit f5393a5602cacfda2014e0ff8220e5a7564e7cd1 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I8IL6H CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
----------------------------------------------------------------------
The PIO read command has no response frame and the struct iu[1024] won't be filled. I/Os which are normally completed will be treated as failed in sas_ata_task_done() when iu contains abnormal dirty data.
Consequently ending_fis should not be filled by iu when the response frame hasn't been written to memory.
Fixes: d380f55503ed ("scsi: hisi_sas: Don't bother clearing status buffer IU in task prep") Signed-off-by: Xingui Yang yangxingui@huawei.com Signed-off-by: Xiang Chen chenxiang66@hisilicon.com Link: https://lore.kernel.org/r/1689045300-44318-2-git-send-email-chenxiang66@hisi... Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: xiabing xiabing12@h-partners.com --- drivers/scsi/hisi_sas/hisi_sas_v2_hw.c | 11 +++++++++-- drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 6 ++++-- 2 files changed, 13 insertions(+), 4 deletions(-)
diff --git a/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c index d74e342faffd..82f0aba447c0 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c @@ -2026,6 +2026,11 @@ static void slot_err_v2_hw(struct hisi_hba *hisi_hba, u16 dma_tx_err_type = le16_to_cpu(err_record->dma_tx_err_type); u16 sipc_rx_err_type = le16_to_cpu(err_record->sipc_rx_err_type); u32 dma_rx_err_type = le32_to_cpu(err_record->dma_rx_err_type); + struct hisi_sas_complete_v2_hdr *complete_queue = + hisi_hba->complete_hdr[slot->cmplt_queue]; + struct hisi_sas_complete_v2_hdr *complete_hdr = + &complete_queue[slot->cmplt_queue_slot]; + u32 dw0 = le32_to_cpu(complete_hdr->dw0); int error = -1;
if (err_phase == 1) { @@ -2310,7 +2315,8 @@ static void slot_err_v2_hw(struct hisi_hba *hisi_hba, break; } } - hisi_sas_sata_done(task, slot); + if (dw0 & CMPLT_HDR_RSPNS_XFRD_MSK) + hisi_sas_sata_done(task, slot); } break; default: @@ -2446,7 +2452,8 @@ static void slot_complete_v2_hw(struct hisi_hba *hisi_hba, case SAS_PROTOCOL_SATA | SAS_PROTOCOL_STP: { ts->stat = SAS_SAM_STAT_GOOD; - hisi_sas_sata_done(task, slot); + if (dw0 & CMPLT_HDR_RSPNS_XFRD_MSK) + hisi_sas_sata_done(task, slot); break; } default: diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index e714ea328e55..934f742c2552 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -2251,7 +2251,8 @@ slot_err_v3_hw(struct hisi_hba *hisi_hba, struct sas_task *task, ts->stat = SAS_OPEN_REJECT; ts->open_rej_reason = SAS_OREJ_RSVD_RETRY; } - hisi_sas_sata_done(task, slot); + if (dw0 & CMPLT_HDR_RSPNS_XFRD_MSK) + hisi_sas_sata_done(task, slot); break; case SAS_PROTOCOL_SMP: ts->stat = SAS_SAM_STAT_CHECK_CONDITION; @@ -2381,7 +2382,8 @@ static void slot_complete_v3_hw(struct hisi_hba *hisi_hba, case SAS_PROTOCOL_STP: case SAS_PROTOCOL_SATA | SAS_PROTOCOL_STP: ts->stat = SAS_SAM_STAT_GOOD; - hisi_sas_sata_done(task, slot); + if (dw0 & CMPLT_HDR_RSPNS_XFRD_MSK) + hisi_sas_sata_done(task, slot); break; default: ts->stat = SAS_SAM_STAT_CHECK_CONDITION;
From: Xingui Yang yangxingui@huawei.com
mainline inclusion from mainline-v6.5-rc1 commit c0328cc595124579328462fc45d7a29a084cf357 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I8IL6H CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
----------------------------------------------------------------------
This patch fixes the following warning:
drivers/scsi/hisi_sas/hisi_sas_v3_hw.c:2168:43: sparse: sparse: restricted __le32 degrades to integer
Reported-by: kernel test robot lkp@intel.com Link: https://lore.kernel.org/oe-kbuild-all/202304161254.NztCVZIO-lkp@intel.com/ Signed-off-by: Xingui Yang yangxingui@huawei.com Signed-off-by: Xiang Chen chenxiang66@hisilicon.com Link: https://lore.kernel.org/r/1684118481-95908-4-git-send-email-chenxiang66@hisi... Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: xiabing xiabing12@h-partners.com --- drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index 934f742c2552..ddbc66d40c21 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -2211,6 +2211,7 @@ slot_err_v3_hw(struct hisi_hba *hisi_hba, struct sas_task *task, u32 trans_tx_fail_type = le32_to_cpu(record->trans_tx_fail_type); u16 sipc_rx_err_type = le16_to_cpu(record->sipc_rx_err_type); u32 dw3 = le32_to_cpu(complete_hdr->dw3); + u32 dw0 = le32_to_cpu(complete_hdr->dw0);
switch (task->task_proto) { case SAS_PROTOCOL_SSP: @@ -2220,8 +2221,8 @@ slot_err_v3_hw(struct hisi_hba *hisi_hba, struct sas_task *task, * but I/O information has been written to the host memory, we examine * response IU. */ - if (!(complete_hdr->dw0 & CMPLT_HDR_RSPNS_GOOD_MSK) && - (complete_hdr->dw0 & CMPLT_HDR_RSPNS_XFRD_MSK)) + if (!(dw0 & CMPLT_HDR_RSPNS_GOOD_MSK) && + (dw0 & CMPLT_HDR_RSPNS_XFRD_MSK)) return false;
ts->residual = trans_tx_fail_type; @@ -2237,7 +2238,7 @@ slot_err_v3_hw(struct hisi_hba *hisi_hba, struct sas_task *task, case SAS_PROTOCOL_SATA: case SAS_PROTOCOL_STP: case SAS_PROTOCOL_SATA | SAS_PROTOCOL_STP: - if ((complete_hdr->dw0 & CMPLT_HDR_RSPNS_XFRD_MSK) && + if ((dw0 & CMPLT_HDR_RSPNS_XFRD_MSK) && (sipc_rx_err_type & RX_FIS_STATUS_ERR_MSK)) { ts->stat = SAS_PROTO_RESPONSE; } else if (dma_rx_err_type & RX_DATA_LEN_UNDERFLOW_MSK) {
mainline inclusion from mainline-v6.7-rc1 commit 6de426f9276c448e2db7238911c97fb157cb23be category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I8IL6H CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
----------------------------------------------------------------------
If init debugfs failed during device registration due to memory allocation failure, debugfs_remove_recursive() is called, after which debugfs_dir is not set to NULL. debugfs_remove_recursive() will be called again during device removal. As a result, illegal pointer is accessed.
[ 1665.467244] hisi_sas_v3_hw 0000:b4:02.0: failed to init debugfs! ... [ 1669.836708] Unable to handle kernel NULL pointer dereference at virtual address 00000000000000a0 [ 1669.872669] pc : down_write+0x24/0x70 [ 1669.876315] lr : down_write+0x1c/0x70 [ 1669.879961] sp : ffff000036f53a30 [ 1669.883260] x29: ffff000036f53a30 x28: ffffa027c31549f8 [ 1669.888547] x27: ffffa027c3140000 x26: 0000000000000000 [ 1669.893834] x25: ffffa027bf37c270 x24: ffffa027bf37c270 [ 1669.899122] x23: ffff0000095406b8 x22: ffff0000095406a8 [ 1669.904408] x21: 0000000000000000 x20: ffffa027bf37c310 [ 1669.909695] x19: 00000000000000a0 x18: ffff8027dcd86f10 [ 1669.914982] x17: 0000000000000000 x16: 0000000000000000 [ 1669.920268] x15: 0000000000000000 x14: ffffa0274014f870 [ 1669.925555] x13: 0000000000000040 x12: 0000000000000228 [ 1669.930842] x11: 0000000000000020 x10: 0000000000000bb0 [ 1669.936129] x9 : ffff000036f537f0 x8 : ffff80273088ca10 [ 1669.941416] x7 : 000000000000001d x6 : 00000000ffffffff [ 1669.946702] x5 : ffff000008a36310 x4 : ffff80273088be00 [ 1669.951989] x3 : ffff000009513e90 x2 : 0000000000000000 [ 1669.957276] x1 : 00000000000000a0 x0 : ffffffff00000001 [ 1669.962563] Call trace: [ 1669.965000] down_write+0x24/0x70 [ 1669.968301] debugfs_remove_recursive+0x5c/0x1b0 [ 1669.972905] hisi_sas_debugfs_exit+0x24/0x30 [hisi_sas_main] [ 1669.978541] hisi_sas_v3_remove+0x130/0x150 [hisi_sas_v3_hw] [ 1669.984175] pci_device_remove+0x48/0xd8 [ 1669.988082] device_release_driver_internal+0x1b4/0x250 [ 1669.993282] device_release_driver+0x28/0x38 [ 1669.997534] pci_stop_bus_device+0x84/0xb8 [ 1670.001611] pci_stop_and_remove_bus_device_locked+0x24/0x40 [ 1670.007244] remove_store+0xfc/0x140 [ 1670.010802] dev_attr_store+0x44/0x60 [ 1670.014448] sysfs_kf_write+0x58/0x80 [ 1670.018095] kernfs_fop_write+0xe8/0x1f0 [ 1670.022000] __vfs_write+0x60/0x190 [ 1670.025472] vfs_write+0xac/0x1c0 [ 1670.028771] ksys_write+0x6c/0xd8 [ 1670.032071] __arm64_sys_write+0x24/0x30 [ 1670.035977] el0_svc_common+0x78/0x130 [ 1670.039710] el0_svc_handler+0x38/0x78 [ 1670.043442] el0_svc+0x8/0xc
To fix this, set debugfs_dir to NULL after debugfs_remove_recursive().
Signed-off-by: Yihang Li liyihang9@huawei.com Signed-off-by: Xingui Yang yangxingui@huawei.com Signed-off-by: Xiang Chen chenxiang66@hisilicon.com Link: https://lore.kernel.org/r/1694571327-78697-2-git-send-email-chenxiang66@hisi... Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: xiabing xiabing12@h-partners.com --- drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-)
diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index ddbc66d40c21..fd4a3f2af36b 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -4806,6 +4806,12 @@ static void debugfs_bist_init_v3_hw(struct hisi_hba *hisi_hba) hisi_hba->debugfs_bist_linkrate = SAS_LINK_RATE_1_5_GBPS; }
+static void debugfs_exit_v3_hw(struct hisi_hba *hisi_hba) +{ + debugfs_remove_recursive(hisi_hba->debugfs_dir); + hisi_hba->debugfs_dir = NULL; +} + static void debugfs_init_v3_hw(struct hisi_hba *hisi_hba) { struct device *dev = hisi_hba->dev; @@ -4829,18 +4835,13 @@ static void debugfs_init_v3_hw(struct hisi_hba *hisi_hba)
for (i = 0; i < hisi_sas_debugfs_dump_count; i++) { if (debugfs_alloc_v3_hw(hisi_hba, i)) { - debugfs_remove_recursive(hisi_hba->debugfs_dir); + debugfs_exit_v3_hw(hisi_hba); dev_dbg(dev, "failed to init debugfs!\n"); break; } } }
-static void debugfs_exit_v3_hw(struct hisi_hba *hisi_hba) -{ - debugfs_remove_recursive(hisi_hba->debugfs_dir); -} - static int hisi_sas_v3_probe(struct pci_dev *pdev, const struct pci_device_id *id) {
mainline inclusion from mainline-v6.7-rc1 commit 2ff07b5c6fe9173e7a7de3b23f300d71ad4d8fde category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I8IL6H CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
----------------------------------------------------------------------
Currently, register information dump is performed via workqueue, regardless of the trigger mode (automatic or manual). There is a delay in dumping register through workqueue, the exact register information at trigger time cannot be obtained.
Call register snapshot directly instead of through a workqueue.
Signed-off-by: Yihang Li liyihang9@huawei.com Signed-off-by: Xiang Chen chenxiang66@hisilicon.com Link: https://lore.kernel.org/r/1694571327-78697-3-git-send-email-chenxiang66@hisi... Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: xiabing xiabing12@h-partners.com --- drivers/scsi/hisi_sas/hisi_sas.h | 1 - drivers/scsi/hisi_sas/hisi_sas_main.c | 7 +++++-- drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 14 +++----------- 3 files changed, 8 insertions(+), 14 deletions(-)
diff --git a/drivers/scsi/hisi_sas/hisi_sas.h b/drivers/scsi/hisi_sas/hisi_sas.h index ed75328de62b..ea9acc478907 100644 --- a/drivers/scsi/hisi_sas/hisi_sas.h +++ b/drivers/scsi/hisi_sas/hisi_sas.h @@ -459,7 +459,6 @@ struct hisi_hba { const struct hisi_sas_hw *hw; /* Low level hw interface */ unsigned long sata_dev_bitmap[BITS_TO_LONGS(HISI_SAS_MAX_DEVICES)]; struct work_struct rst_work; - struct work_struct debugfs_work; u32 phy_state; u32 intr_coal_ticks; /* Time of interrupt coalesce in us */ u32 intr_coal_count; /* Interrupt count to coalesce */ diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index 2a1b08244d1f..51a4a079cdea 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -2215,8 +2215,11 @@ _hisi_sas_internal_task_abort(struct hisi_hba *hisi_hba,
/* Internal abort timed out */ if ((task->task_state_flags & SAS_TASK_STATE_ABORTED)) { - if (hisi_sas_debugfs_enable && hisi_hba->debugfs_itct[0].itct) - queue_work(hisi_hba->wq, &hisi_hba->debugfs_work); + if (hisi_sas_debugfs_enable && hisi_hba->debugfs_itct[0].itct) { + down(&hisi_hba->sem); + hisi_hba->hw->debugfs_snapshot_regs(hisi_hba); + up(&hisi_hba->sem); + }
if (!(task->task_state_flags & SAS_TASK_STATE_DONE)) { struct hisi_sas_slot *slot = task->lldd_task; diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index fd4a3f2af36b..561b9a832220 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -550,7 +550,6 @@ static int prot_mask; module_param(prot_mask, int, 0444); MODULE_PARM_DESC(prot_mask, " host protection capabilities mask, def=0x0 ");
-static void debugfs_work_handler_v3_hw(struct work_struct *work); static void debugfs_snapshot_regs_v3_hw(struct hisi_hba *hisi_hba);
static u32 hisi_sas_read32(struct hisi_hba *hisi_hba, u32 off) @@ -3336,7 +3335,6 @@ hisi_sas_shost_alloc_pci(struct pci_dev *pdev) hisi_hba = shost_priv(shost);
INIT_WORK(&hisi_hba->rst_work, hisi_sas_rst_work_handler); - INIT_WORK(&hisi_hba->debugfs_work, debugfs_work_handler_v3_hw); hisi_hba->hw = &hisi_sas_v3_hw; hisi_hba->pci_dev = pdev; hisi_hba->dev = dev; @@ -3851,7 +3849,9 @@ static ssize_t debugfs_trigger_dump_v3_hw_write(struct file *file, if (buf[0] != '1') return -EFAULT;
- queue_work(hisi_hba->wq, &hisi_hba->debugfs_work); + down(&hisi_hba->sem); + debugfs_snapshot_regs_v3_hw(hisi_hba); + up(&hisi_hba->sem);
return count; } @@ -4602,14 +4602,6 @@ static void debugfs_fifo_init_v3_hw(struct hisi_hba *hisi_hba) } }
-static void debugfs_work_handler_v3_hw(struct work_struct *work) -{ - struct hisi_hba *hisi_hba = - container_of(work, struct hisi_hba, debugfs_work); - - debugfs_snapshot_regs_v3_hw(hisi_hba); -} - static void debugfs_release_v3_hw(struct hisi_hba *hisi_hba, int dump_index) { struct device *dev = hisi_hba->dev;
mainline inclusion from mainline-v6.7-rc1 commit 63f0733d07ce60252e885602b39571ade0441015 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I8IL6H CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
----------------------------------------------------------------------
Currently, if CONFIG_SCSI_HISI_SAS_DEBUGFS_DEFAULT_ENABLE is enabled, the memory space used by DFX is allocated during device initialization, which occupies a large number of memory resources. The memory usage before and after the driver is loaded is as follows:
Memory usage before the driver is loaded: $ free -m total used free shared buff/cache available Mem: 867352 2578 864037 11 735 861681 Swap: 4095 0 4095
Memory usage after the driver which include 4 HBAs is loaded: $ insmod hisi_sas_v3_hw.ko $ free -m total used free shared buff/cache available Mem: 867352 4760 861848 11 743 859495 Swap: 4095 0 4095
The driver with 4 HBAs connected will allocate about 110 MB of memory without enabling debugfs.
Therefore, to avoid wasting memory resources, DFX memory is allocated during dump triggering. The dump may fail due to memory allocation failure. After this change, each dump costs about 10 MB of memory, and each dump lasts about 100 ms.
Signed-off-by: Yihang Li liyihang9@huawei.com Signed-off-by: Xiang Chen chenxiang66@hisilicon.com Link: https://lore.kernel.org/r/1694571327-78697-4-git-send-email-chenxiang66@hisi... Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: xiabing xiabing12@h-partners.com --- drivers/scsi/hisi_sas/hisi_sas.h | 2 +- drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 93 +++++++++++++------------- 2 files changed, 46 insertions(+), 49 deletions(-)
diff --git a/drivers/scsi/hisi_sas/hisi_sas.h b/drivers/scsi/hisi_sas/hisi_sas.h index ea9acc478907..c59ef1d6f8db 100644 --- a/drivers/scsi/hisi_sas/hisi_sas.h +++ b/drivers/scsi/hisi_sas/hisi_sas.h @@ -351,7 +351,7 @@ struct hisi_sas_hw { u8 reg_index, u8 reg_count, u8 *write_data); void (*wait_cmds_complete_timeout)(struct hisi_hba *hisi_hba, int delay_ms, int timeout_ms); - void (*debugfs_snapshot_regs)(struct hisi_hba *hisi_hba); + int (*debugfs_snapshot_regs)(struct hisi_hba *hisi_hba); int complete_hdr_size; struct scsi_host_template *sht; }; diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index 561b9a832220..a0761a7ae778 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -550,7 +550,7 @@ static int prot_mask; module_param(prot_mask, int, 0444); MODULE_PARM_DESC(prot_mask, " host protection capabilities mask, def=0x0 ");
-static void debugfs_snapshot_regs_v3_hw(struct hisi_hba *hisi_hba); +static int debugfs_snapshot_regs_v3_hw(struct hisi_hba *hisi_hba);
static u32 hisi_sas_read32(struct hisi_hba *hisi_hba, u32 off) { @@ -3799,37 +3799,6 @@ static void debugfs_create_files_v3_hw(struct hisi_hba *hisi_hba) &debugfs_ras_v3_hw_fops); }
-static void debugfs_snapshot_regs_v3_hw(struct hisi_hba *hisi_hba) -{ - int debugfs_dump_index = hisi_hba->debugfs_dump_index; - struct device *dev = hisi_hba->dev; - u64 timestamp = local_clock(); - - if (debugfs_dump_index >= hisi_sas_debugfs_dump_count) { - dev_warn(dev, "dump count exceeded!\n"); - return; - } - - do_div(timestamp, NSEC_PER_MSEC); - hisi_hba->debugfs_timestamp[debugfs_dump_index] = timestamp; - - debugfs_snapshot_prepare_v3_hw(hisi_hba); - - debugfs_snapshot_global_reg_v3_hw(hisi_hba); - debugfs_snapshot_port_reg_v3_hw(hisi_hba); - debugfs_snapshot_axi_reg_v3_hw(hisi_hba); - debugfs_snapshot_ras_reg_v3_hw(hisi_hba); - debugfs_snapshot_cq_reg_v3_hw(hisi_hba); - debugfs_snapshot_dq_reg_v3_hw(hisi_hba); - debugfs_snapshot_itct_reg_v3_hw(hisi_hba); - debugfs_snapshot_iost_reg_v3_hw(hisi_hba); - - debugfs_create_files_v3_hw(hisi_hba); - - debugfs_snapshot_restore_v3_hw(hisi_hba); - hisi_hba->debugfs_dump_index++; -} - static ssize_t debugfs_trigger_dump_v3_hw_write(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos) @@ -3837,9 +3806,6 @@ static ssize_t debugfs_trigger_dump_v3_hw_write(struct file *file, struct hisi_hba *hisi_hba = file->f_inode->i_private; char buf[8];
- if (hisi_hba->debugfs_dump_index >= hisi_sas_debugfs_dump_count) - return -EFAULT; - if (count > 8) return -EFAULT;
@@ -3850,7 +3816,10 @@ static ssize_t debugfs_trigger_dump_v3_hw_write(struct file *file, return -EFAULT;
down(&hisi_hba->sem); - debugfs_snapshot_regs_v3_hw(hisi_hba); + if (debugfs_snapshot_regs_v3_hw(hisi_hba)) { + up(&hisi_hba->sem); + return -EFAULT; + } up(&hisi_hba->sem);
return count; @@ -4636,7 +4605,7 @@ static int debugfs_alloc_v3_hw(struct hisi_hba *hisi_hba, int dump_index) { const struct hisi_sas_hw *hw = hisi_hba->hw; struct device *dev = hisi_hba->dev; - int p, c, d, r, i; + int p, c, d, r; size_t sz;
for (r = 0; r < DEBUGFS_REGS_NUM; r++) { @@ -4716,11 +4685,48 @@ static int debugfs_alloc_v3_hw(struct hisi_hba *hisi_hba, int dump_index)
return 0; fail: - for (i = 0; i < hisi_sas_debugfs_dump_count; i++) - debugfs_release_v3_hw(hisi_hba, i); + debugfs_release_v3_hw(hisi_hba, dump_index); return -ENOMEM; }
+static int debugfs_snapshot_regs_v3_hw(struct hisi_hba *hisi_hba) +{ + int debugfs_dump_index = hisi_hba->debugfs_dump_index; + struct device *dev = hisi_hba->dev; + u64 timestamp = local_clock(); + + if (debugfs_dump_index >= hisi_sas_debugfs_dump_count) { + dev_warn(dev, "dump count exceeded!\n"); + return -EINVAL; + } + + if (debugfs_alloc_v3_hw(hisi_hba, debugfs_dump_index)) { + dev_warn(dev, "failed to alloc memory\n"); + return -ENOMEM; + } + + do_div(timestamp, NSEC_PER_MSEC); + hisi_hba->debugfs_timestamp[debugfs_dump_index] = timestamp; + + debugfs_snapshot_prepare_v3_hw(hisi_hba); + + debugfs_snapshot_global_reg_v3_hw(hisi_hba); + debugfs_snapshot_port_reg_v3_hw(hisi_hba); + debugfs_snapshot_axi_reg_v3_hw(hisi_hba); + debugfs_snapshot_ras_reg_v3_hw(hisi_hba); + debugfs_snapshot_cq_reg_v3_hw(hisi_hba); + debugfs_snapshot_dq_reg_v3_hw(hisi_hba); + debugfs_snapshot_itct_reg_v3_hw(hisi_hba); + debugfs_snapshot_iost_reg_v3_hw(hisi_hba); + + debugfs_create_files_v3_hw(hisi_hba); + + debugfs_snapshot_restore_v3_hw(hisi_hba); + hisi_hba->debugfs_dump_index++; + + return 0; +} + static void debugfs_phy_down_cnt_init_v3_hw(struct hisi_hba *hisi_hba) { struct dentry *dir = debugfs_create_dir("phy_down_cnt", @@ -4807,7 +4813,6 @@ static void debugfs_exit_v3_hw(struct hisi_hba *hisi_hba) static void debugfs_init_v3_hw(struct hisi_hba *hisi_hba) { struct device *dev = hisi_hba->dev; - int i;
hisi_hba->debugfs_dir = debugfs_create_dir(dev_name(dev), hisi_sas_debugfs_dir); @@ -4824,14 +4829,6 @@ static void debugfs_init_v3_hw(struct hisi_hba *hisi_hba)
debugfs_phy_down_cnt_init_v3_hw(hisi_hba); debugfs_fifo_init_v3_hw(hisi_hba); - - for (i = 0; i < hisi_sas_debugfs_dump_count; i++) { - if (debugfs_alloc_v3_hw(hisi_hba, i)) { - debugfs_exit_v3_hw(hisi_hba); - dev_dbg(dev, "failed to init debugfs!\n"); - break; - } - } }
static int
From: Xiang Chen chenxiang66@hisilicon.com
mainline inclusion from mainline-v5.17-rc1 commit 5d9224fb076e9a2023e0b06d6a164d644612c0c0 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I8IL6H CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
----------------------------------------------------------------------
In commit 29e2bac87421 ("scsi: hisi_sas: Fix some issues related to asd_sas_port->phy_list"), we use asd_sas_port->phy_mask instead of accessing asd_sas_port->phy_list, and it is enough to use asd_sas_port->phy_mask to check the state of phy, so remove the unused check and variable.
Link: https://lore.kernel.org/r/1641300126-53574-1-git-send-email-chenxiang66@hisi... Fixes: 29e2bac87421 ("scsi: hisi_sas: Fix some issues related to asd_sas_port->phy_list") Reported-by: Nathan Chancellor nathan@kernel.org Reported-by: Colin King colin.i.king@gmail.com Acked-by: John Garry john.garry@huawei.com Signed-off-by: Xiang Chen chenxiang66@hisilicon.com Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: xiabing xiabing12@h-partners.com --- drivers/scsi/hisi_sas/hisi_sas_main.c | 5 ----- 1 file changed, 5 deletions(-)
diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index 51a4a079cdea..40ed38ee3809 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -1531,16 +1531,11 @@ static void hisi_sas_send_ata_reset_each_phy(struct hisi_hba *hisi_hba, struct device *dev = hisi_hba->dev; int s = sizeof(struct host_to_dev_fis); int rc = TMF_RESP_FUNC_FAILED; - struct asd_sas_phy *sas_phy; struct ata_link *link; u8 fis[20] = {0}; - u32 state; int i;
- state = hisi_hba->hw->get_phys_state(hisi_hba); for (i = 0; i < hisi_hba->n_phy; i++) { - if (!(state & BIT(sas_phy->id))) - continue; if (!(sas_port->phy_mask & BIT(i))) continue;
driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I8IL6H CVE: NA
----------------------------------------------------------------------
In commit 63f0733d07ce ("scsi: hisi_sas: Allocate DFX memory during dump trigger"), the memory allocation time of the DFX is changed from device initialization to dump occurs, so .debugfs_itct is not a valid address and do not need to check.
The parameter hisi_sas_debugfs_enable is enough to check whether automatic debugfs dump is triggered, so remove redunant checks.
Fixes: 63f0733d07ce ("scsi: hisi_sas: Allocate DFX memory during dump trigger") Signed-off-by: Yihang Li liyihang9@huawei.com Signed-off-by: xiabing xiabing12@h-partners.com --- drivers/scsi/hisi_sas/hisi_sas_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index 40ed38ee3809..7dbc724332d7 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -1672,7 +1672,7 @@ static int hisi_sas_controller_prereset(struct hisi_hba *hisi_hba) return -1; }
- if (hisi_sas_debugfs_enable && hisi_hba->debugfs_itct[0].itct) + if (hisi_sas_debugfs_enable) hisi_hba->hw->debugfs_snapshot_regs(hisi_hba);
return 0; @@ -2210,7 +2210,7 @@ _hisi_sas_internal_task_abort(struct hisi_hba *hisi_hba,
/* Internal abort timed out */ if ((task->task_state_flags & SAS_TASK_STATE_ABORTED)) { - if (hisi_sas_debugfs_enable && hisi_hba->debugfs_itct[0].itct) { + if (hisi_sas_debugfs_enable) { down(&hisi_hba->sem); hisi_hba->hw->debugfs_snapshot_regs(hisi_hba); up(&hisi_hba->sem);
From: Xingui Yang yangxingui@huawei.com
driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I8IL6H CVE: NA
----------------------------------------------------------------------
We find that some disks use D2H frame instead of SDB frame to return NCQ error. Currently, only the I/O corresponding to the D2H frame is processed in this scenario, which does not meet the processing requirements of the NCQ error scenario. So we set dev_status to HISI_SAS_DEV_NCQ_ERR and abort all I/Os of the disk in this scenario.
Signed-off-by: Xingui Yang yangxingui@huawei.com Reviewed-by: Xiang Chen chenxiang66@hisilicon.com Signed-off-by: xiabing xiabing12@h-partners.com --- drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index a0761a7ae778..3274ce65c7a4 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -2239,7 +2239,15 @@ slot_err_v3_hw(struct hisi_hba *hisi_hba, struct sas_task *task, case SAS_PROTOCOL_SATA | SAS_PROTOCOL_STP: if ((dw0 & CMPLT_HDR_RSPNS_XFRD_MSK) && (sipc_rx_err_type & RX_FIS_STATUS_ERR_MSK)) { - ts->stat = SAS_PROTO_RESPONSE; + if (task->ata_task.use_ncq) { + struct domain_device *device = task->dev; + struct hisi_sas_device *sas_dev = + device->lldd_dev; + sas_dev->dev_status = HISI_SAS_DEV_NCQ_ERR; + slot->abort = 1; + } else { + ts->stat = SAS_PROTO_RESPONSE; + } } else if (dma_rx_err_type & RX_DATA_LEN_UNDERFLOW_MSK) { ts->residual = trans_tx_fail_type; ts->stat = SAS_DATA_UNDERRUN;
driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I8IL6H CVE: NA
----------------------------------------------------------------------
Currently in directly attached scenario, the phyup event HISI_PHYE_PHY_UP_PM is notified before .phy_attached is set - this may cause the phyup work hisi_sas_bytes_dmaed() execution failed and the attached device will not be found.
To fix it, set .phy_attached before notifing phyup event.
Signed-off-by: Yihang Li liyihang9@huawei.com Signed-off-by: Xiang Chen chenxiang66@hisilicon.com Signed-off-by: xiabing xiabing12@h-partners.com --- drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index 3274ce65c7a4..9afc9eee1ba6 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -1597,6 +1597,11 @@ static irqreturn_t phy_up_v3_hw(int phy_no, struct hisi_hba *hisi_hba) }
phy->port_id = port_id; + spin_lock(&phy->lock); + /* Delete timer and set phy_attached atomically */ + del_timer(&phy->timer); + phy->phy_attached = 1; + spin_unlock(&phy->lock);
/* * Call pm_runtime_get_noresume() which pairs with @@ -1610,11 +1615,6 @@ static irqreturn_t phy_up_v3_hw(int phy_no, struct hisi_hba *hisi_hba)
res = IRQ_HANDLED;
- spin_lock(&phy->lock); - /* Delete timer and set phy_attached atomically */ - del_timer(&phy->timer); - phy->phy_attached = 1; - spin_unlock(&phy->lock); end: if (phy->reset_completion) complete(phy->reset_completion);
driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I8IL6H CVE: NA
----------------------------------------------------------------------
In function hisi_sas_controller_prereset(), -ENOSYS (Function not implemented) should be returned if the driver does not support .soft_reset. Returns -EPERM (Operation not permitted) if HISI_SAS_RESETTING_BIT is already be set.
In function _suspend_v3_hw(), returns -EPERM (Operation not permitted) if HISI_SAS_RESETTING_BIT is already be set.
Fixes: 4522204ab218 ("scsi: hisi_sas: tidy host controller reset function a bit") Signed-off-by: Yihang Li liyihang9@huawei.com Signed-off-by: Xiang Chen chenxiang66@hisilicon.com Signed-off-by: xiabing xiabing12@h-partners.com --- drivers/scsi/hisi_sas/hisi_sas_main.c | 4 ++-- drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index 7dbc724332d7..d7252b57db31 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -1664,12 +1664,12 @@ EXPORT_SYMBOL_GPL(hisi_sas_controller_reset_done); static int hisi_sas_controller_prereset(struct hisi_hba *hisi_hba) { if (!hisi_hba->hw->soft_reset) - return -1; + return -ENOENT;
down(&hisi_hba->sem); if (test_and_set_bit(HISI_SAS_RESET_BIT, &hisi_hba->flags)) { up(&hisi_hba->sem); - return -1; + return -EPERM; }
if (hisi_sas_debugfs_enable) diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index 9afc9eee1ba6..968ee8ef2e94 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -5092,7 +5092,7 @@ static int _suspend_v3_hw(struct device *device) }
if (test_and_set_bit(HISI_SAS_RESET_BIT, &hisi_hba->flags)) - return -1; + return -EPERM;
dev_warn(dev, "entering suspend state\n");
driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I8IL6H CVE: NA
----------------------------------------------------------------------
In commit 4b329abc91800 ("scsi: hisi_sas: Move slot variable definition in hisi_sas_abort_task()"), we move the variables slot to the function head. However, the variable slot may be NULL, we should check it in each branch.
Fixes: 4b329abc9180 ("scsi: hisi_sas: Move slot variable definition in hisi_sas_abort_task()") Signed-off-by: Yihang Li liyihang9@huawei.com Signed-off-by: Xiang Chen chenxiang66@hisilicon.com Signed-off-by: xiabing xiabing12@h-partners.com --- drivers/scsi/hisi_sas/hisi_sas_main.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index d7252b57db31..4f469ecb2f14 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -1741,7 +1741,10 @@ static int hisi_sas_abort_task(struct sas_task *task) task->task_state_flags |= SAS_TASK_STATE_ABORTED; spin_unlock_irqrestore(&task->task_state_lock, flags);
- if (slot && task->task_proto & SAS_PROTOCOL_SSP) { + if (!slot) + goto out; + + if (task->task_proto & SAS_PROTOCOL_SSP) { struct scsi_cmnd *cmnd = task->uldd_task; u16 tag = slot->idx; int rc2; @@ -1798,7 +1801,7 @@ static int hisi_sas_abort_task(struct sas_task *task) rc = hisi_sas_softreset_ata_disk(device); } } - } else if (slot && task->task_proto & SAS_PROTOCOL_SMP) { + } else if (task->task_proto & SAS_PROTOCOL_SMP) { /* SMP */ u32 tag = slot->idx; struct hisi_sas_cq *cq = &hisi_hba->cq[slot->dlvry_queue];
driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I8IL6H CVE: NA
----------------------------------------------------------------------
We obtain the semaphore and set HISI_SAS_RESETTING_BIT in hisi_sas_reset_prepare_v3_hw(), block the scsi host and set HISI_SAS_REJECT_CMD_BIT in hisi_sas_controller_reset_prepare(), released them in hisi_sas_controller_reset_done(). However, if the HW reset failure in FLR results in early return, the semaphore and flag bits will not be release.
Rollback some operations including clearing flags / releasing semaphore when FLR is failed.
Fixes: e5ea48014adc ("scsi: hisi_sas: Implement handlers of PCIe FLR for v3 hw") Signed-off-by: Yihang Li liyihang9@huawei.com Signed-off-by: Xiang Chen chenxiang66@hisilicon.com Signed-off-by: xiabing xiabing12@h-partners.com --- drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 5 +++++ 1 file changed, 5 insertions(+)
diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index 968ee8ef2e94..315fd3d99aa4 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -5042,6 +5042,7 @@ static void hisi_sas_reset_done_v3_hw(struct pci_dev *pdev) { struct sas_ha_struct *sha = pci_get_drvdata(pdev); struct hisi_hba *hisi_hba = sha->lldd_ha; + struct Scsi_Host *shost = hisi_hba->shost; struct device *dev = hisi_hba->dev; int rc;
@@ -5050,6 +5051,10 @@ static void hisi_sas_reset_done_v3_hw(struct pci_dev *pdev) rc = hw_init_v3_hw(hisi_hba); if (rc) { dev_err(dev, "FLR: hw init failed rc=%d\n", rc); + clear_bit(HISI_SAS_REJECT_CMD_BIT, &hisi_hba->flags); + scsi_unblock_requests(shost); + clear_bit(HISI_SAS_RESET_BIT, &hisi_hba->flags); + up(&hisi_hba->sem); return; }
driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I8IL6H CVE: NA
----------------------------------------------------------------------
In function debugfs_debugfs_snapshot_global_reg_v3_hw() it uses debugfs_axi_reg.count (which is the number of axi debugfs registers) to acquire the number of global debugfs registers.
Use debugfs_global_reg.count to acquire the number of global debugfs registers instead.
Fixes: 623a4b6d5c2a ("scsi: hisi_sas: Move debugfs code to v3 hw driver") Signed-off-by: Yihang Li liyihang9@huawei.com Signed-off-by: Xiang Chen chenxiang66@hisilicon.com Signed-off-by: xiabing xiabing12@h-partners.com --- drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index 315fd3d99aa4..cab49022f8ea 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -3425,7 +3425,7 @@ static void debugfs_snapshot_global_reg_v3_hw(struct hisi_hba *hisi_hba) u32 *databuf = hisi_hba->debugfs_regs[dump_index][DEBUGFS_GLOBAL].data; int i;
- for (i = 0; i < debugfs_axi_reg.count; i++, databuf++) + for (i = 0; i < debugfs_global_reg.count; i++, databuf++) *databuf = hisi_sas_read32(hisi_hba, 4 * i); }
driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I8IL6H CVE: NA
----------------------------------------------------------------------
If we issue a disable PHY command, the device attached with it will go offline, if a 2 bit ECC error occurs at the same time, a hung task may be found:
[ 4613.652388] INFO: task kworker/u256:0:165233 blocked for more than 120 seconds. [ 4613.666297] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 4613.674809] task:kworker/u256:0 state:D stack: 0 pid:165233 ppid: 2 flags:0x00000208 [ 4613.683959] Workqueue: 0000:74:02.0_disco_q sas_revalidate_domain [libsas] [ 4613.691518] Call trace: [ 4613.694678] __switch_to+0xf8/0x17c [ 4613.698872] __schedule+0x660/0xee0 [ 4613.703063] schedule+0xac/0x240 [ 4613.706994] schedule_timeout+0x500/0x610 [ 4613.711705] __down+0x128/0x36c [ 4613.715548] down+0x240/0x2d0 [ 4613.719221] hisi_sas_internal_abort_timeout+0x1bc/0x260 [hisi_sas_main] [ 4613.726618] sas_execute_internal_abort+0x144/0x310 [libsas] [ 4613.732976] sas_execute_internal_abort_dev+0x44/0x60 [libsas] [ 4613.739504] hisi_sas_internal_task_abort_dev.isra.0+0xbc/0x1b0 [hisi_sas_main] [ 4613.747499] hisi_sas_dev_gone+0x174/0x250 [hisi_sas_main] [ 4613.753682] sas_notify_lldd_dev_gone+0xec/0x2e0 [libsas] [ 4613.759781] sas_unregister_common_dev+0x4c/0x7a0 [libsas] [ 4613.765962] sas_destruct_devices+0xb8/0x120 [libsas] [ 4613.771709] sas_do_revalidate_domain.constprop.0+0x1b8/0x31c [libsas] [ 4613.778930] sas_revalidate_domain+0x60/0xa4 [libsas] [ 4613.784716] process_one_work+0x248/0x950 [ 4613.789424] worker_thread+0x318/0x934 [ 4613.793878] kthread+0x190/0x200 [ 4613.797810] ret_from_fork+0x10/0x18 [ 4613.802121] INFO: task kworker/u256:4:316722 blocked for more than 120 seconds. [ 4613.816026] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 4613.824538] task:kworker/u256:4 state:D stack: 0 pid:316722 ppid: 2 flags:0x00000208 [ 4613.833670] Workqueue: 0000:74:02.0 hisi_sas_rst_work_handler [hisi_sas_main] [ 4613.841491] Call trace: [ 4613.844647] __switch_to+0xf8/0x17c [ 4613.848852] __schedule+0x660/0xee0 [ 4613.853052] schedule+0xac/0x240 [ 4613.856984] schedule_timeout+0x500/0x610 [ 4613.861695] __down+0x128/0x36c [ 4613.865542] down+0x240/0x2d0 [ 4613.869216] hisi_sas_controller_prereset+0x58/0x1fc [hisi_sas_main] [ 4613.876324] hisi_sas_rst_work_handler+0x40/0x8c [hisi_sas_main] [ 4613.883019] process_one_work+0x248/0x950 [ 4613.887732] worker_thread+0x318/0x934 [ 4613.892204] kthread+0x190/0x200 [ 4613.896118] ret_from_fork+0x10/0x18 [ 4613.900423] INFO: task kworker/u256:1:348985 blocked for more than 121 seconds. [ 4613.914341] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 4613.922852] task:kworker/u256:1 state:D stack: 0 pid:348985 ppid: 2 flags:0x00000208 [ 4613.931984] Workqueue: 0000:74:02.0_event_q sas_port_event_worker [libsas] [ 4613.939549] Call trace: [ 4613.942702] __switch_to+0xf8/0x17c [ 4613.946892] __schedule+0x660/0xee0 [ 4613.951083] schedule+0xac/0x240 [ 4613.955015] schedule_timeout+0x500/0x610 [ 4613.959725] wait_for_common+0x200/0x610 [ 4613.964349] wait_for_completion+0x3c/0x5c [ 4613.969146] flush_workqueue+0x198/0x790 [ 4613.973776] sas_porte_broadcast_rcvd+0x1e8/0x320 [libsas] [ 4613.979960] sas_port_event_worker+0x54/0xa0 [libsas] [ 4613.985708] process_one_work+0x248/0x950 [ 4613.990420] worker_thread+0x318/0x934 [ 4613.994868] kthread+0x190/0x200 [ 4613.998800] ret_from_fork+0x10/0x18
This is because when the device goes offline, we obtain the hisi_hba semaphore and send the ABORT_DEV command to the device. However, the internal abort timed out due to the 2 bit ECC error and triggers automatic dump. In addition, because the hisi_hba semaphore has been obtained, the dump cannot be executed and the controller cannot be reset.
Therefore, the deadlocks occur on the following circular dependencies: hisi_sas_dev_gone() -> down() -> hisi_sas_internal_task_abort_dev() -> ... -> hisi_sas_internal_abort_timeout() -> down().
The deadlock is triggered only when the timeout occurs during device goes offline. To fix this issue, use .rst_ha_timeout to distinguish the scenario where a device goes offline from other scenarios.
Fixes: 2ff07b5c6fe9 ("scsi: hisi_sas: Directly call register snapshot instead of using workqueue") Signed-off-by: Yihang Li liyihang9@huawei.com Reviewed-by: Xiang Chen chenxiang66@hisilicon.com Signed-off-by: xiabing xiabing12@h-partners.com --- drivers/scsi/hisi_sas/hisi_sas_main.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-)
diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index 4f469ecb2f14..cb541302b5c9 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -2214,9 +2214,18 @@ _hisi_sas_internal_task_abort(struct hisi_hba *hisi_hba, /* Internal abort timed out */ if ((task->task_state_flags & SAS_TASK_STATE_ABORTED)) { if (hisi_sas_debugfs_enable) { - down(&hisi_hba->sem); + /* + * If timeout occurs in device gone scenario, to avoid + * circular dependency like: + * hisi_sas_dev_gone() -> down() -> ... -> + * hisi_sas_internal_abort_timeout() -> down(). + */ + if (!rst_to_recover) + down(&hisi_hba->sem); + hisi_hba->hw->debugfs_snapshot_regs(hisi_hba); - up(&hisi_hba->sem); + if (!rst_to_recover) + up(&hisi_hba->sem); }
if (!(task->task_state_flags & SAS_TASK_STATE_DONE)) {
driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I8IL6H CVE: NA
----------------------------------------------------------------------
Hisi_sas debugfs remove should be executed only when debugfs is enabled. In this patch, check whether debugfs is enabled and then remove it.
Signed-off-by: Yihang Li liyihang9@huawei.com Signed-off-by: xiabing xiabing12@h-partners.com --- drivers/scsi/hisi_sas/hisi_sas_main.c | 3 ++- drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 6 ++++-- 2 files changed, 6 insertions(+), 3 deletions(-)
diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index cb541302b5c9..8798a51e78c8 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -2942,7 +2942,8 @@ static __exit void hisi_sas_exit(void) { sas_release_transport(hisi_sas_stt);
- debugfs_remove(hisi_sas_debugfs_dir); + if (hisi_sas_debugfs_enable) + debugfs_remove(hisi_sas_debugfs_dir); }
module_init(hisi_sas_init); diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index cab49022f8ea..30a3b748692d 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -4969,7 +4969,8 @@ hisi_sas_v3_probe(struct pci_dev *pdev, const struct pci_device_id *id) err_out_register_ha: scsi_remove_host(shost); err_out_debugfs: - debugfs_exit_v3_hw(hisi_hba); + if (hisi_sas_debugfs_enable) + debugfs_exit_v3_hw(hisi_hba); err_out_ha: hisi_sas_free(hisi_hba); scsi_host_put(shost); @@ -5016,7 +5017,8 @@ static void hisi_sas_v3_remove(struct pci_dev *pdev) pci_release_regions(pdev); pci_disable_device(pdev); hisi_sas_free(hisi_hba); - debugfs_exit_v3_hw(hisi_hba); + if (hisi_sas_debugfs_enable) + debugfs_exit_v3_hw(hisi_hba); scsi_host_put(shost); }