Xingui Yang (3): scsi: hisi_sas: Modify v3 HW ATA completion process when SATA disk is in error status scsi: hisi_sas: Enable force phy when SATA disk directly connected scsi: hisi_sas: Release resource directly in hisi_sas_abort_task() when NCQ error
drivers/scsi/hisi_sas/hisi_sas.h | 1 + drivers/scsi/hisi_sas/hisi_sas_main.c | 20 +++++++++---- drivers/scsi/hisi_sas/hisi_sas_v2_hw.c | 10 +++++-- drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 41 ++++++++++++++++++++++++-- 4 files changed, 61 insertions(+), 11 deletions(-)
From: Xingui Yang yangxingui@huawei.com
driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5Q63H CVE: NA
-------------------------------------
When an NCQ error occurs, SAS controller will abnormally complete the I/Os that newly delivered to disk, and bit8 in CQ dw3 will be set to 1 to indicate current SATA disk is in error status. The current processing flow is set ts->stat to SAS_OPEN_REJECT and then sas_ata_task_done() will set fis stat to ATA_ERR. After analyzed by ata_eh_analyze_tf(), err_mask will set to AC_ERR_HSM. If media error occurs for four times within 10 minutes and the chip rejects new I/Os for four times, NCQ will be disabled due to excessive errors.
However, if media error occurs multiple times, the NCQ mode shouldn't be disabled. Therefore, use sas_task_abort() to handle abnormally completed I/Os when SATA disk is in error status.
[10253.397429] hisi_sas_v3_hw 0000:b4:02.0: erroneous completion disk err dev id=2 sas_addr=0x5000000000000605 CQ hdr: 0x400903 0x2007f 0x0 0x80470000 [10253.397430] hisi_sas_v3_hw 0000:b4:02.0: erroneous completion iptt=135 task= pK-error dev id=2 sas_addr=0x5000000000000605 CQ hdr: 0x203 0x20087 0x0 0x100 Error info: 0x0 0x0 0x0 0x0 [10253.397432] hisi_sas_v3_hw 0000:b4:02.0: erroneous completion iptt=136 task= pK-error dev id=2 sas_addr=0x5000000000000605 CQ hdr: 0x203 0x20088 0x0 0x100 Error info: 0x0 0x0 0x0 0x0
Signed-off-by: Xingui Yang yangxingui@huawei.com Reviewed-by: kang fenglong kangfenglong@huawei.com Signed-off-by: Yongqiang Liu liuyongqiang13@huawei.com --- drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index cb0940f81de1..35d579121b28 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -413,6 +413,8 @@ #define CMPLT_HDR_DEV_ID_OFF 16 #define CMPLT_HDR_DEV_ID_MSK (0xffff << CMPLT_HDR_DEV_ID_OFF) /* dw3 */ +/* ERR_CODE */ +#define SATA_DISK_IN_ERROR_STATUS BIT(8) #define COMLT_HDR_SATA_DISK_ERR_OFF 16 #define CMPLT_HDR_SATA_DISK_ERR_MSK (0x1 << COMLT_HDR_SATA_DISK_ERR_OFF) #define CMPLT_HDR_IO_IN_TARGET_OFF 17 @@ -2275,7 +2277,8 @@ slot_err_v3_hw(struct hisi_hba *hisi_hba, struct sas_task *task, if ((dw0 & CMPLT_HDR_RSPNS_XFRD_MSK) && (sipc_rx_err_type & RX_FIS_STATUS_ERR_MSK)) { ts->stat = SAS_PROTO_RESPONSE; - } else if (dw3 & CMPLT_HDR_IO_IN_TARGET_MSK) { + } else if ((dw3 & CMPLT_HDR_IO_IN_TARGET_MSK) || + (dw3 & SATA_DISK_IN_ERROR_STATUS)) { ts->stat = SAS_PHY_DOWN; slot->abort = 1; } else if (dma_rx_err_type & RX_DATA_LEN_UNDERFLOW_MSK) {
From: Xingui Yang yangxingui@huawei.com
driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5QDH7 CVE: NA
----------------------------------
the SAS controller determines the disk to which I/Os are delivered based on the port id in the DQ entry when SATA disk directly connected.
When the link is intermittently disconnected during I/O sending and the port id changes and is used by another link, data inconsistency on the SATA disk may occur during I/O retry. So enable force phy, then force the command to be executed in a certain phy, and if the port's phy does not match the phy configured in the command, the chip will stop delivering I/Os to disk.
Signed-off-by: Xingui Yang yangxingui@huawei.com Reviewed-by: kang fenglong kangfenglong@huawei.com Signed-off-by: Yongqiang Liu liuyongqiang13@huawei.com --- drivers/scsi/hisi_sas/hisi_sas_v2_hw.c | 10 ++++++++-- drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 14 ++++++++++++-- 2 files changed, 20 insertions(+), 4 deletions(-)
diff --git a/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c index 1c452f16b6e2..46d274582684 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c @@ -2517,10 +2517,16 @@ static void prep_ata_v2_hw(struct hisi_hba *hisi_hba, /* create header */ /* dw0 */ hdr->dw0 = cpu_to_le32(port->id << CMD_HDR_PORT_OFF); - if (parent_dev && DEV_IS_EXPANDER(parent_dev->dev_type)) + if (parent_dev && DEV_IS_EXPANDER(parent_dev->dev_type)) { hdr->dw0 |= cpu_to_le32(3 << CMD_HDR_CMD_OFF); - else + } else { + int phy_id = device->phy->identify.phy_identifier; + + hdr->dw0 |= cpu_to_le32((1 << phy_id) + << CMD_HDR_PHY_ID_OFF); + hdr->dw0 |= CMD_HDR_FORCE_PHY_MSK; hdr->dw0 |= cpu_to_le32(4 << CMD_HDR_CMD_OFF); + }
if (tmf && tmf->force_phy) { hdr->dw0 |= CMD_HDR_FORCE_PHY_MSK; diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index 35d579121b28..8e9f6491dacc 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -347,6 +347,10 @@ #define CMD_HDR_RESP_REPORT_MSK (0x1 << CMD_HDR_RESP_REPORT_OFF) #define CMD_HDR_TLR_CTRL_OFF 6 #define CMD_HDR_TLR_CTRL_MSK (0x3 << CMD_HDR_TLR_CTRL_OFF) +#define CMD_HDR_PHY_ID_OFF 8 +#define CMD_HDR_PHY_ID_MSK (0x1ff << CMD_HDR_PHY_ID_OFF) +#define CMD_HDR_FORCE_PHY_OFF 17 +#define CMD_HDR_FORCE_PHY_MSK (0x1 << CMD_HDR_FORCE_PHY_OFF) #define CMD_HDR_PORT_OFF 18 #define CMD_HDR_PORT_MSK (0xf << CMD_HDR_PORT_OFF) #define CMD_HDR_PRIORITY_OFF 27 @@ -1474,10 +1478,16 @@ static void prep_ata_v3_hw(struct hisi_hba *hisi_hba, u32 dw1 = 0, dw2 = 0, hdr_tag = 0;
hdr->dw0 = cpu_to_le32(port->id << CMD_HDR_PORT_OFF); - if (parent_dev && DEV_IS_EXPANDER(parent_dev->dev_type)) + if (parent_dev && DEV_IS_EXPANDER(parent_dev->dev_type)) { hdr->dw0 |= cpu_to_le32(3 << CMD_HDR_CMD_OFF); /* STP */ - else + } else { + int phy_id = device->phy->identify.phy_identifier; + + hdr->dw0 |= cpu_to_le32((1 << phy_id) + << CMD_HDR_PHY_ID_OFF); + hdr->dw0 |= CMD_HDR_FORCE_PHY_MSK; hdr->dw0 |= cpu_to_le32(4U << CMD_HDR_CMD_OFF); /* SATA */ + }
switch (task->data_dir) { case DMA_TO_DEVICE:
From: Xingui Yang yangxingui@huawei.com
driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5SXSB CVE: NA
------------------------------------------------
When the port is detached, EH will clear ATA_EH_RESET in ehc->i.action when call ata_eh_reset(), and device reset won't be executed.
As the disk won't return other I/Os normally after NCQ Error. In addition, the abort operation is added, then resource release is safe, so release NCQ command lldd resource directly in hisi_sas_abort_task() when NCQ error without soft reset to make sure read log command can be executed success later. But Soft reset still need to be used in other scenario.
Signed-off-by: Xingui Yang yangxingui@huawei.com Reviewed-by: kang fenglong kangfenglong@huawei.com Signed-off-by: Yongqiang Liu liuyongqiang13@huawei.com --- drivers/scsi/hisi_sas/hisi_sas.h | 1 + drivers/scsi/hisi_sas/hisi_sas_main.c | 20 ++++++++++++++------ drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 22 ++++++++++++++++++++++ 3 files changed, 37 insertions(+), 6 deletions(-)
diff --git a/drivers/scsi/hisi_sas/hisi_sas.h b/drivers/scsi/hisi_sas/hisi_sas.h index a791401be7b3..533408277156 100644 --- a/drivers/scsi/hisi_sas/hisi_sas.h +++ b/drivers/scsi/hisi_sas/hisi_sas.h @@ -101,6 +101,7 @@ enum { enum dev_status { HISI_SAS_DEV_INIT, HISI_SAS_DEV_NORMAL, + HISI_SAS_DEV_NCQ_ERR, };
enum { diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index 2411bb1eeb0f..e6ccaedcd8b9 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -1673,7 +1673,7 @@ static int hisi_sas_abort_task(struct sas_task *task) task->task_state_flags |= SAS_TASK_STATE_ABORTED; spin_unlock_irqrestore(&task->task_state_lock, flags);
- if (task->lldd_task && task->task_proto & SAS_PROTOCOL_SSP) { + if (slot && task->task_proto & SAS_PROTOCOL_SSP) { struct scsi_cmnd *cmnd = task->uldd_task; u32 tag = slot->idx; int rc2; @@ -1706,8 +1706,7 @@ static int hisi_sas_abort_task(struct sas_task *task) } else if (task->task_proto & SAS_PROTOCOL_SATA || task->task_proto & SAS_PROTOCOL_STP) { if (task->dev->dev_type == SAS_SATA_DEV) { - struct ata_port *ap = device->sata_dev.ap; - struct ata_link *link = &ap->link; + struct ata_queued_cmd *qc = task->uldd_task;
rc = hisi_sas_internal_task_abort(hisi_hba, device, HISI_SAS_INT_ABT_DEV, 0); @@ -1717,14 +1716,19 @@ static int hisi_sas_abort_task(struct sas_task *task) } hisi_sas_dereg_device(hisi_hba, device);
- if (link->eh_info.action & ATA_EH_RESET) { - slot->task = NULL; + /* + * If an ATA internal command times out in ATA EH, it + * need to execute soft reset, so check the scsicmd + */ + if ((sas_dev->dev_status == HISI_SAS_DEV_NCQ_ERR) && + qc && qc->scsicmd) { + hisi_sas_do_release_task(hisi_hba, task, slot); rc = TMF_RESP_FUNC_COMPLETE; } else { rc = hisi_sas_softreset_ata_disk(device); } } - } else if (task->lldd_task && task->task_proto & SAS_PROTOCOL_SMP) { + } else if (slot && task->task_proto & SAS_PROTOCOL_SMP) { /* SMP */ u32 tag = slot->idx; struct hisi_sas_cq *cq = &hisi_hba->cq[slot->dlvry_queue]; @@ -1843,8 +1847,12 @@ static int hisi_sas_I_T_nexus_reset(struct domain_device *device) { struct hisi_hba *hisi_hba = dev_to_hisi_hba(device); struct device *dev = hisi_hba->dev; + struct hisi_sas_device *sas_dev = device->lldd_dev; int rc;
+ if (sas_dev->dev_status == HISI_SAS_DEV_NCQ_ERR) + sas_dev->dev_status = HISI_SAS_DEV_NORMAL; + rc = hisi_sas_internal_task_abort(hisi_hba, device, HISI_SAS_INT_ABT_DEV, 0); if (rc < 0) { diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index 8e9f6491dacc..470be06525d9 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -398,6 +398,8 @@ #define CMPLT_HDR_CMPLT_MSK (0x3 << CMPLT_HDR_CMPLT_OFF) #define CMPLT_HDR_ERROR_PHASE_OFF 2 #define CMPLT_HDR_ERROR_PHASE_MSK (0xff << CMPLT_HDR_ERROR_PHASE_OFF) +/* bit[9:2] Error Phase */ +#define ERR_PHASE_RESPONSE_FRAME_REV_STAGE BIT(8) #define CMPLT_HDR_RSPNS_XFRD_OFF 10 #define CMPLT_HDR_RSPNS_XFRD_MSK (0x1 << CMPLT_HDR_RSPNS_XFRD_OFF) #define CMPLT_HDR_RSPNS_GOOD_OFF 11 @@ -423,6 +425,10 @@ #define CMPLT_HDR_SATA_DISK_ERR_MSK (0x1 << COMLT_HDR_SATA_DISK_ERR_OFF) #define CMPLT_HDR_IO_IN_TARGET_OFF 17 #define CMPLT_HDR_IO_IN_TARGET_MSK (0x1 << CMPLT_HDR_IO_IN_TARGET_OFF) +/* bit[23:18] ERR_FIS_ATA_STATUS */ +#define FIS_ATA_STATUS_ERR BIT(18) +/* bit[31:24] ERR_FIS_TYPE */ +#define FIS_TYPE_SDB BIT(31)
/* ITCT header */ /* qw0 */ @@ -2247,6 +2253,18 @@ static void hisi_sas_set_sense_data(struct sas_task *task, } }
+static bool is_ncq_err(struct hisi_sas_complete_v3_hdr *complete_hdr) +{ + u32 dw0, dw3; + + dw0 = le32_to_cpu(complete_hdr->dw0); + dw3 = le32_to_cpu(complete_hdr->dw3); + + return (dw0 & ERR_PHASE_RESPONSE_FRAME_REV_STAGE) && + (dw3 & FIS_TYPE_SDB) && + (dw3 & FIS_ATA_STATUS_ERR); +} + static void slot_err_v3_hw(struct hisi_hba *hisi_hba, struct sas_task *task, struct hisi_sas_slot *slot) @@ -2582,6 +2600,10 @@ static void cq_tasklet_v3_hw(unsigned long val) dev_err(dev, "erroneous completion disk err dev id=%d sas_addr=0x%llx CQ hdr: 0x%x 0x%x 0x%x 0x%x\n", device_id, itct->sas_addr, dw0, dw1, complete_hdr->act, dw3); + + if (is_ncq_err(complete_hdr)) + sas_dev->dev_status = HISI_SAS_DEV_NCQ_ERR; + link->eh_info.err_mask |= AC_ERR_DEV; link->eh_info.action |= ATA_EH_RESET; ata_link_abort(link);