From: Xingui Yang yangxingui@huawei.com
driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5CG2F CVE: NA
-----------------------------------------------------------------------
SATA_DISK_ERR bit is bit16 of cq dw3,when it is set to 1, it means this sata disk is in error status and IPTT is invalid, such as NCQ error. In this scenario, new IO issued from this disk will be rejected by sas controller, all I/O remained in disk should be aborted.
To ensure sas controller wouldn't operate memory before abort all I/O, all I/O remained in the disk should be set to aborted state by register and completed with state SAS_ABORTED_TASK through task_done(), then SCSI error handling thread will be wake up immediately to analyze the cause of the error, such as read log page for error details.
Signed-off-by: Xingui Yang yangxingui@huawei.com Reviewed-by: kang fenglong kangfenglong@huawei.com Acked-by: Xie XiuQi xiexiuqi@huawei.com Signed-off-by: Laibin Qiu qiulaibin@huawei.com --- drivers/scsi/hisi_sas/hisi_sas.h | 4 +- drivers/scsi/hisi_sas/hisi_sas_main.c | 81 +++++++++++++++++++-- drivers/scsi/hisi_sas/hisi_sas_v1_hw.c | 2 +- drivers/scsi/hisi_sas/hisi_sas_v2_hw.c | 2 +- drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 97 +++++++++++++++++++++----- 5 files changed, 162 insertions(+), 24 deletions(-)
diff --git a/drivers/scsi/hisi_sas/hisi_sas.h b/drivers/scsi/hisi_sas/hisi_sas.h index a791401be7b3..8628dfea9703 100644 --- a/drivers/scsi/hisi_sas/hisi_sas.h +++ b/drivers/scsi/hisi_sas/hisi_sas.h @@ -600,7 +600,8 @@ extern void hisi_sas_phy_enable(struct hisi_hba *hisi_hba, int phy_no, extern void hisi_sas_phy_down(struct hisi_hba *hisi_hba, int phy_no, int rdy); extern void hisi_sas_slot_task_free(struct hisi_hba *hisi_hba, struct sas_task *task, - struct hisi_sas_slot *slot); + struct hisi_sas_slot *slot, + bool need_lock); extern void hisi_sas_init_mem(struct hisi_hba *hisi_hba); extern void hisi_sas_rst_work_handler(struct work_struct *work); extern void hisi_sas_sync_rst_work_handler(struct work_struct *work); @@ -611,6 +612,7 @@ extern void hisi_sas_release_tasks(struct hisi_hba *hisi_hba); extern u8 hisi_sas_get_prog_phy_linkrate_mask(enum sas_linkrate max); extern void hisi_sas_controller_reset_prepare(struct hisi_hba *hisi_hba); extern void hisi_sas_controller_reset_done(struct hisi_hba *hisi_hba); +extern void hisi_sas_complete_disk_io(struct hisi_sas_device *sas_dev); extern void hisi_sas_debugfs_init(struct hisi_hba *hisi_hba); extern void hisi_sas_debugfs_exit(struct hisi_hba *hisi_hba); extern void hisi_sas_snapshot_regs(struct hisi_hba *hisi_hba); diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index 93df1c6b94c7..5de1e8ba5c6a 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -229,7 +229,7 @@ static void hisi_sas_slot_index_init(struct hisi_hba *hisi_hba) }
void hisi_sas_slot_task_free(struct hisi_hba *hisi_hba, struct sas_task *task, - struct hisi_sas_slot *slot) + struct hisi_sas_slot *slot, bool need_lock) { unsigned long flags; int device_id = slot->device_id; @@ -260,9 +260,13 @@ void hisi_sas_slot_task_free(struct hisi_hba *hisi_hba, struct sas_task *task, } }
- spin_lock_irqsave(&sas_dev->lock, flags); - list_del_init(&slot->entry); - spin_unlock_irqrestore(&sas_dev->lock, flags); + if (need_lock) { + spin_lock_irqsave(&sas_dev->lock, flags); + list_del_init(&slot->entry); + spin_unlock_irqrestore(&sas_dev->lock, flags); + } else { + list_del_init(&slot->entry); + }
memset(slot, 0, offsetof(struct hisi_sas_slot, buf));
@@ -1011,7 +1015,7 @@ static void hisi_sas_do_release_task(struct hisi_hba *hisi_hba, struct sas_task task->task_done(task); }
- hisi_sas_slot_task_free(hisi_hba, task, slot); + hisi_sas_slot_task_free(hisi_hba, task, slot, true); }
static void hisi_sas_release_task(struct hisi_hba *hisi_hba, @@ -3747,6 +3751,73 @@ int hisi_sas_remove(struct platform_device *pdev) } EXPORT_SYMBOL_GPL(hisi_sas_remove);
+void hisi_sas_complete_disk_io(struct hisi_sas_device *sas_dev) +{ + struct hisi_hba *hisi_hba; + struct device *dev; + struct domain_device *device; + struct hisi_sas_slot *slot, *slot2; + struct sas_task *task; + struct task_status_struct *ts; + struct sas_ha_struct *ha; + bool is_internal; + unsigned long flags; + + if (!sas_dev) + return; + + hisi_hba = sas_dev->hisi_hba; + dev = hisi_hba->dev; + device = sas_dev->sas_device; + spin_lock_irqsave(&sas_dev->lock, flags); + hisi_sas_dereg_device(hisi_hba, device); + list_for_each_entry_safe(slot, slot2, &sas_dev->list, entry) { + task = slot->task; + if (unlikely(!task || !task->lldd_task || !task->dev)) + continue; + + spin_lock_irqsave(&task->task_state_lock, flags); + if ((task->task_state_flags & SAS_TASK_STATE_ABORTED) || + (task->task_state_flags & SAS_TASK_STATE_DONE)) { + spin_unlock_irqrestore(&task->task_state_lock, flags); + dev_info(dev, "slot complete: iptt=%d task(%pK) already finished.\n", + slot->idx, task); + continue; + } + + task->task_state_flags |= SAS_TASK_STATE_DONE; + spin_unlock_irqrestore(&task->task_state_lock, flags); + + is_internal = slot->is_internal; + ts = &task->task_status; + device = task->dev; + ha = device->port->ha; + + memset(ts, 0, sizeof(*ts)); + ts->stat = SAS_ABORTED_TASK; + ts->resp = SAS_TASK_COMPLETE; + + hisi_sas_slot_task_free(hisi_hba, task, slot, false); + + if (!is_internal && (task->task_proto != SAS_PROTOCOL_SMP)) { + spin_lock_irqsave(&device->done_lock, flags); + if (test_bit(SAS_HA_FROZEN, &ha->state)) { + spin_unlock_irqrestore(&device->done_lock, + flags); + dev_info(dev, "slot complete: task(%pK) ignored\n ", + task); + continue; + } + spin_unlock_irqrestore(&device->done_lock, flags); + } + + if (task->task_done) + task->task_done(task); + } + spin_unlock_irqrestore(&sas_dev->lock, flags); +} +EXPORT_SYMBOL_GPL(hisi_sas_complete_disk_io); + bool hisi_sas_debugfs_enable = true; EXPORT_SYMBOL_GPL(hisi_sas_debugfs_enable); module_param_named(debugfs_enable, hisi_sas_debugfs_enable, bool, 0444); diff --git a/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c index 452665c641a6..6485e2b6456c 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c @@ -1320,7 +1320,7 @@ static int slot_complete_v1_hw(struct hisi_hba *hisi_hba, }
out: - hisi_sas_slot_task_free(hisi_hba, task, slot); + hisi_sas_slot_task_free(hisi_hba, task, slot, true); sts = ts->stat;
if (task->task_done) diff --git a/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c index 1c452f16b6e2..bd0639fd5162 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c @@ -2480,7 +2480,7 @@ slot_complete_v2_hw(struct hisi_hba *hisi_hba, struct hisi_sas_slot *slot) } task->task_state_flags |= SAS_TASK_STATE_DONE; spin_unlock_irqrestore(&task->task_state_lock, flags); - hisi_sas_slot_task_free(hisi_hba, task, slot); + hisi_sas_slot_task_free(hisi_hba, task, slot, true);
if (!is_internal && (task->task_proto != SAS_PROTOCOL_SMP)) { spin_lock_irqsave(&device->done_lock, flags); diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index 79026e887ee8..e84246c2331a 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -402,6 +402,17 @@ #define CMPLT_HDR_ERX_MSK (0x1 << CMPLT_HDR_ERX_OFF) #define CMPLT_HDR_ABORT_STAT_OFF 13 #define CMPLT_HDR_ABORT_STAT_MSK (0x7 << CMPLT_HDR_ABORT_STAT_OFF) +/* bit[9:2] Error Phase */ +#define ERR_PHASE_DQ_ENTRY_PARSING BIT(2) +#define ERR_PHASE_FRAME_LINK_STAGE BIT(3) +#define ERR_PHASE_CMD_TM_FRAME_SEND_STAGE BIT(4) +#define ERR_PHASE_DATA_FRAME_SEND_STAGE BIT(5) +#define ERR_PHASE_DATA_FRAME_REV_STAGE BIT(6) +#define ERR_PHASE_XFERDY_FRAME_REV_STAGE BIT(7) +#define ERR_PHASE_RESPONSE_FRAME_REV_STAGE BIT(8) +#define ERR_PHASE_DPH_SCHEDULING BIT(9) +#define ERR_PHASE_FRAME_REV_STAGE (ERR_PHASE_DATA_FRAME_REV_STAGE | \ + ERR_PHASE_XFERDY_FRAME_REV_STAGE | ERR_PHASE_RESPONSE_FRAME_REV_STAGE) /* abort_stat */ #define STAT_IO_NOT_VALID 0x1 #define STAT_IO_NO_DEVICE 0x2 @@ -413,8 +424,19 @@ #define CMPLT_HDR_DEV_ID_OFF 16 #define CMPLT_HDR_DEV_ID_MSK (0xffff << CMPLT_HDR_DEV_ID_OFF) /* dw3 */ +#define COMLT_HDR_SATA_DISK_ERR_OFF 16 +#define CMPLT_HDR_SATA_DISK_ERR_MSK (0x1 << COMLT_HDR_SATA_DISK_ERR_OFF) #define CMPLT_HDR_IO_IN_TARGET_OFF 17 #define CMPLT_HDR_IO_IN_TARGET_MSK (0x1 << CMPLT_HDR_IO_IN_TARGET_OFF) +/* bit[31:24] ERR_FIS_TYPE */ +#define CQ_PIO_DATA_FIS_TYPE BIT(24) +#define CQ_PIO_SETUP_FIS_TYPE BIT(25) +#define CQ_D2H_FIS_TYPE BIT(26) +#define CQ_WT_PIO_SETUP_FIS_TYPE BIT(27) +#define CQ_WT_DMA_SETUP_FIS_TYPE BIT(28) +#define CQ_RD_DMA_SETUP_FIS_TYPE BIT(29) +#define CQ_DMA_ACTIVE_FIS_TYPE BIT(30) +#define CQ_SDB_FIS_TYPE BIT(31)
/* ITCT header */ /* qw0 */ @@ -2233,6 +2255,26 @@ static void hisi_sas_set_sense_data(struct sas_task *task, } }
+static bool is_err_fis(u32 dw0) +{ + /* 0x3 means abnormal completion */ + return ((dw0 & CMPLT_HDR_CMPLT_MSK) == 0x3) && + (dw0 & ERR_PHASE_FRAME_REV_STAGE); +} + +static bool is_ncq_err(struct hisi_sas_complete_v3_hdr *complete_hdr) +{ + u32 dw0, dw3; + + dw0 = le32_to_cpu(complete_hdr->dw0); + dw3 = le32_to_cpu(complete_hdr->dw3); + + if (!is_err_fis(dw0) || !(dw3 & CMPLT_HDR_SATA_DISK_ERR_MSK)) + return false; + + return dw3 & CQ_SDB_FIS_TYPE; +} + static void slot_err_v3_hw(struct hisi_hba *hisi_hba, struct sas_task *task, struct hisi_sas_slot *slot) @@ -2336,16 +2378,24 @@ slot_complete_v3_hw(struct hisi_hba *hisi_hba, struct hisi_sas_slot *slot) if (unlikely(!task || !task->lldd_task || !task->dev)) return -EINVAL;
- ts = &task->task_status; - device = task->dev; - ha = device->port->ha; - sas_dev = device->lldd_dev; - spin_lock_irqsave(&task->task_state_lock, flags); + if (task->task_state_flags & SAS_TASK_STATE_ABORTED || + task->task_state_flags & SAS_TASK_STATE_DONE) { + spin_unlock_irqrestore(&task->task_state_lock, flags); + dev_info(dev, "slot complete: iptt=%d task(%pK) already finished.\n", + slot->idx, task); + return SAS_ABORTED_TASK; + } task->task_state_flags &= ~(SAS_TASK_STATE_PENDING | SAS_TASK_AT_INITIATOR); + task->task_state_flags |= SAS_TASK_STATE_DONE; spin_unlock_irqrestore(&task->task_state_lock, flags);
+ ts = &task->task_status; + device = task->dev; + ha = device->port->ha; + sas_dev = device->lldd_dev; + memset(ts, 0, sizeof(*ts)); ts->resp = SAS_TASK_COMPLETE;
@@ -2436,6 +2486,9 @@ slot_complete_v3_hw(struct hisi_hba *hisi_hba, struct hisi_sas_slot *slot) iu->resp_data[0]); } if (unlikely(slot->abort)) { + spin_lock_irqsave(&task->task_state_lock, flags); + task->task_state_flags &= ~SAS_TASK_STATE_DONE; + spin_unlock_irqrestore(&task->task_state_lock, flags); sas_task_abort(task); return ts->stat; } @@ -2499,15 +2552,7 @@ slot_complete_v3_hw(struct hisi_hba *hisi_hba, struct hisi_sas_slot *slot)
out: sts = ts->stat; - spin_lock_irqsave(&task->task_state_lock, flags); - if (task->task_state_flags & SAS_TASK_STATE_ABORTED) { - spin_unlock_irqrestore(&task->task_state_lock, flags); - dev_info(dev, "slot complete: task(%pK) aborted\n", task); - return SAS_ABORTED_TASK; - } - task->task_state_flags |= SAS_TASK_STATE_DONE; - spin_unlock_irqrestore(&task->task_state_lock, flags); - hisi_sas_slot_task_free(hisi_hba, task, slot); + hisi_sas_slot_task_free(hisi_hba, task, slot, true);
if (!is_internal && (task->task_proto != SAS_PROTOCOL_SMP)) { spin_lock_irqsave(&device->done_lock, flags); @@ -2543,12 +2588,32 @@ static void cq_tasklet_v3_hw(unsigned long val) while (rd_point != wr_point) { struct hisi_sas_complete_v3_hdr *complete_hdr; struct device *dev = hisi_hba->dev; + u32 dw0, dw1, dw3; int iptt;
complete_hdr = &complete_queue[rd_point]; + dw0 = le32_to_cpu(complete_hdr->dw0); + dw1 = le32_to_cpu(complete_hdr->dw1); + dw3 = le32_to_cpu(complete_hdr->dw3); + iptt = dw1 & CMPLT_HDR_IPTT_MSK;
- iptt = (complete_hdr->dw1) & CMPLT_HDR_IPTT_MSK; - if (likely(iptt < HISI_SAS_COMMAND_ENTRIES_V3_HW)) { + /* + * check for NCQ error and current iptt is invalid, all NCQ + * commands should be aborted + */ + if (unlikely(is_ncq_err(complete_hdr))) { + int device_id = (dw1 & CMPLT_HDR_DEV_ID_MSK) >> + CMPLT_HDR_DEV_ID_OFF; + struct hisi_sas_device *sas_dev = + &hisi_hba->devices[device_id]; + struct hisi_sas_itct *itct = + &hisi_hba->itct[device_id]; + + dev_err(dev, "erroneous completion ncq err dev id=%d sas_addr=0x%llx CQ hdr: 0x%x 0x%x 0x%x 0x%x\n", + device_id, itct->sas_addr, dw0, dw1, + complete_hdr->act, dw3); + hisi_sas_complete_disk_io(sas_dev); + } else if (likely(iptt < HISI_SAS_COMMAND_ENTRIES_V3_HW)) { slot = &hisi_hba->slot_info[iptt]; slot->cmplt_queue_slot = rd_point; slot->cmplt_queue = queue;