From: Wang Chao wangchao342@hisilicon.com
driver inclusion category: bugfix bugzilla: NA CVE: NA
When injecting 2bit ecc errors, it will cause confusion inside SAS controller which needs host reset to recover it. If a device is gone at the same times inject 2bit ecc errors, we may not receive the ITCT interrupt so it will wait for completion in clear_itct_v3_hw() all the time. And host reset will also not occur because it can't require hisi_hba->sem, so the system will be suspended.
To solve the issue, use wait_for_completion_timeout() instead of wait_for_completion(), and also don't mark the gone device as SAS_PHY_UNUSED when device gone.
Link: https://lore.kernel.org/r/1571926105-74636-4-git-send-email-john.garry@huawe... Signed-off-by: Wang Chao wangchao342@hisilicon.com Reviewed-by: Zhu Xiongxiong zhuxiongxiong@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/scsi/hisi_sas/hisi_sas.h | 4 +++- drivers/scsi/hisi_sas/hisi_sas_main.c | 15 ++++++++++----- drivers/scsi/hisi_sas/hisi_sas_v1_hw.c | 4 +++- drivers/scsi/hisi_sas/hisi_sas_v2_hw.c | 12 ++++++++++-- drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 11 +++++++++-- 5 files changed, 35 insertions(+), 11 deletions(-)
diff --git a/drivers/scsi/hisi_sas/hisi_sas.h b/drivers/scsi/hisi_sas/hisi_sas.h index 012445ee5dc5..80cd8870477b 100644 --- a/drivers/scsi/hisi_sas/hisi_sas.h +++ b/drivers/scsi/hisi_sas/hisi_sas.h @@ -77,6 +77,8 @@ #define HISI_SAS_SATA_PROTOCOL_FPDMA 0x8 #define HISI_SAS_SATA_PROTOCOL_ATAPI 0x10
+#define CLEAR_ITCT_TIMEOUT 20 + struct hisi_hba;
enum { @@ -298,7 +300,7 @@ struct hisi_sas_hw { void (*phy_set_linkrate)(struct hisi_hba *hisi_hba, int phy_no, struct sas_phy_linkrates *linkrates); enum sas_linkrate (*phy_get_max_linkrate)(void); - void (*clear_itct)(struct hisi_hba *hisi_hba, + int (*clear_itct)(struct hisi_hba *hisi_hba, struct hisi_sas_device *dev); void (*free_device)(struct hisi_sas_device *sas_dev); int (*get_wideport_bitmap)(struct hisi_hba *hisi_hba, int port_id); diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index 154df004b8a0..3df5d590aab8 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -1064,14 +1064,15 @@ static void hisi_sas_dev_gone(struct domain_device *device) struct hisi_sas_device *sas_dev = device->lldd_dev; struct hisi_hba *hisi_hba = dev_to_hisi_hba(device); struct device *dev = hisi_hba->dev; - int rc = 0; + int rc0 = 0; + int rc1 = 0;
dev_info(dev, "dev[%d:%x] is gone\n", sas_dev->device_id, sas_dev->dev_type);
down(&hisi_hba->sem); if (!test_bit(HISI_SAS_RESET_BIT, &hisi_hba->flags)) { - rc = hisi_sas_internal_task_abort(hisi_hba, device, + rc0 = hisi_sas_internal_task_abort(hisi_hba, device, HISI_SAS_INT_ABT_DEV, 0);
hisi_sas_dereg_device(hisi_hba, device); @@ -1080,16 +1081,20 @@ static void hisi_sas_dev_gone(struct domain_device *device) dev_info(dev, "dev gone: release remain resources anyway.\n"); }
- hisi_hba->hw->clear_itct(hisi_hba, sas_dev); + rc1 = hisi_hba->hw->clear_itct(hisi_hba, sas_dev); device->lldd_dev = NULL; }
if (hisi_hba->hw->free_device) hisi_hba->hw->free_device(sas_dev); - sas_dev->dev_type = SAS_PHY_UNUSED; + + /* Don't mark it as SAS_PHY_UNUSED if failed to clear ITCT */ + if (!rc1) + sas_dev->dev_type = SAS_PHY_UNUSED; sas_dev->sas_device = NULL; up(&hisi_hba->sem); - if (rc == -EIO) { + + if (rc0 == -EIO) { dev_err(dev, "internal abort timeout for dev gone.\n"); queue_work(hisi_hba->wq, &hisi_hba->rst_work); } diff --git a/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c index 8266b388e167..d57e70a6968a 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c @@ -537,7 +537,7 @@ static void setup_itct_v1_hw(struct hisi_hba *hisi_hba, (0xff00ULL << ITCT_HDR_REJ_OPEN_TL_OFF)); }
-static void clear_itct_v1_hw(struct hisi_hba *hisi_hba, +static int clear_itct_v1_hw(struct hisi_hba *hisi_hba, struct hisi_sas_device *sas_dev) { u64 dev_id = sas_dev->device_id; @@ -557,6 +557,8 @@ static void clear_itct_v1_hw(struct hisi_hba *hisi_hba, qw0 = cpu_to_le64(itct->qw0); qw0 &= ~ITCT_HDR_VALID_MSK; itct->qw0 = cpu_to_le64(qw0); + + return 0; }
static int reset_hw_v1_hw(struct hisi_hba *hisi_hba) diff --git a/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c index 206a13f53aa5..e50fef647a63 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c @@ -978,13 +978,14 @@ static void setup_itct_v2_hw(struct hisi_hba *hisi_hba, (0x1ULL << ITCT_HDR_RTOLT_OFF)); }
-static void clear_itct_v2_hw(struct hisi_hba *hisi_hba, +static int clear_itct_v2_hw(struct hisi_hba *hisi_hba, struct hisi_sas_device *sas_dev) { DECLARE_COMPLETION_ONSTACK(completion); u64 dev_id = sas_dev->device_id; struct hisi_sas_itct *itct = &hisi_hba->itct[dev_id]; u32 reg_val = hisi_sas_read32(hisi_hba, ENT_INT_SRC3); + struct device *dev = hisi_hba->dev; int i;
sas_dev->completion = &completion; @@ -994,13 +995,20 @@ static void clear_itct_v2_hw(struct hisi_hba *hisi_hba, hisi_sas_write32(hisi_hba, ENT_INT_SRC3, ENT_INT_SRC3_ITC_INT_MSK);
+ /* need to set register twice to clear ITCT for v2 hw */ for (i = 0; i < 2; i++) { reg_val = ITCT_CLR_EN_MSK | (dev_id & ITCT_DEV_MSK); hisi_sas_write32(hisi_hba, ITCT_CLR, reg_val); - wait_for_completion(sas_dev->completion); + if (!wait_for_completion_timeout(sas_dev->completion, + CLEAR_ITCT_TIMEOUT * HZ)) { + dev_warn(dev, "failed to clear ITCT\n"); + return -ETIMEDOUT; + }
memset(itct, 0, sizeof(struct hisi_sas_itct)); } + + return 0; }
static void free_device_v2_hw(struct hisi_sas_device *sas_dev) diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index dcac0b97140d..c3b57d0a6f24 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -856,13 +856,14 @@ static void setup_itct_v3_hw(struct hisi_hba *hisi_hba, (0x1ULL << ITCT_HDR_RTOLT_OFF)); }
-static void clear_itct_v3_hw(struct hisi_hba *hisi_hba, +static int clear_itct_v3_hw(struct hisi_hba *hisi_hba, struct hisi_sas_device *sas_dev) { DECLARE_COMPLETION_ONSTACK(completion); u64 dev_id = sas_dev->device_id; struct hisi_sas_itct *itct = &hisi_hba->itct[dev_id]; u32 reg_val = hisi_sas_read32(hisi_hba, ENT_INT_SRC3); + struct device *dev = hisi_hba->dev;
sas_dev->completion = &completion;
@@ -875,8 +876,14 @@ static void clear_itct_v3_hw(struct hisi_hba *hisi_hba, reg_val = ITCT_CLR_EN_MSK | (dev_id & ITCT_DEV_MSK); hisi_sas_write32(hisi_hba, ITCT_CLR, reg_val);
- wait_for_completion(sas_dev->completion); + if (!wait_for_completion_timeout(sas_dev->completion, + CLEAR_ITCT_TIMEOUT * HZ)) { + dev_warn(dev, "failed to clear ITCT\n"); + return -ETIMEDOUT; + } + memset(itct, 0, sizeof(struct hisi_sas_itct)); + return 0; }
static void dereg_device_v3_hw(struct hisi_hba *hisi_hba,