From: Bing Xia xiabing12@h-partners.com
This series contains some fixes and cleanups including: - Add slave_destroy interface for v3 hw; - Remove hisi_hba->timer for v3 hw; - Handle the NCQ error returned by D2H frame; - Fix disk not being scanned in after being removed; - Add a helper sas_get_sas_addr_and_dev_type(); - Directly call register snapshot instead of using workqueue; - Allocate DFX memory during dump trigger; - Fix a deadlock issue related to automatic debugfs; - Check whether debugfs is enabled before removing or releasing it; - Check usage count only when the runtime PM status is RPM_SUSPENDING - Remove redundant checks for automatic debugfs dump; - Allocation SMP request is aligned to ARCH_DMA_MINALIGN; - Modify the deadline for ata_wait_after_reset(); - Revert "scsi: hisi_sas: Disable SATA disk phy for severe I_T nexus reset failure"
Qi Liu (1): scsi: hisi_sas: Add slave_destroy interface for v3 hw
Xiang Chen (1): scsi: hisi_sas: Remove hisi_hba->timer for v3 hw
Xingui Yang (3): scsi: hisi_sas: Handle the NCQ error returned by D2H frame scsi: libsas: Fix disk not being scanned in after being removed scsi: libsas: Add a helper sas_get_sas_addr_and_dev_type()
Yihang Li (8): scsi: hisi_sas: Directly call register snapshot instead of using workqueue scsi: hisi_sas: Allocate DFX memory during dump trigger scsi: hisi_sas: Fix a deadlock issue related to automatic dump scsi: hisi_sas: Check whether debugfs is enabled before removing or releasing it scsi: hisi_sas: Check usage count only when the runtime PM status is RPM_SUSPENDING scsi: hisi_sas: Remove redundant checks for automatic debugfs dump scsi: libsas: Allocation SMP request is aligned to ARCH_DMA_MINALIGN scsi: hisi_sas: Modify the deadline for ata_wait_after_reset()
xiabing (1): Revert "scsi: hisi_sas: Disable SATA disk phy for severe I_T nexus reset failure"
drivers/scsi/hisi_sas/hisi_sas.h | 3 +- drivers/scsi/hisi_sas/hisi_sas_main.c | 60 +++++------ drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 138 +++++++++++++------------ drivers/scsi/libsas/sas_expander.c | 53 ++++++---- 4 files changed, 140 insertions(+), 114 deletions(-)
mainline inclusion from mainline-v6.7-rc1 commit 2ff07b5c6fe9173e7a7de3b23f300d71ad4d8fde category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I96KNQ CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
----------------------------------------------------------------------
Currently, register information dump is performed via workqueue, regardless of the trigger mode (automatic or manual). There is a delay in dumping register through workqueue, the exact register information at trigger time cannot be obtained.
Call register snapshot directly instead of through a workqueue.
Signed-off-by: Yihang Li liyihang9@huawei.com Signed-off-by: Xiang Chen chenxiang66@hisilicon.com Link: https://lore.kernel.org/r/1694571327-78697-3-git-send-email-chenxiang66@hisi... Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Bing Xia xiabing12@h-partners.com --- drivers/scsi/hisi_sas/hisi_sas.h | 1 - drivers/scsi/hisi_sas/hisi_sas_main.c | 7 +++++-- drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 14 +++----------- 3 files changed, 8 insertions(+), 14 deletions(-)
diff --git a/drivers/scsi/hisi_sas/hisi_sas.h b/drivers/scsi/hisi_sas/hisi_sas.h index 9e73e9cbbcfc..3d511c44c02d 100644 --- a/drivers/scsi/hisi_sas/hisi_sas.h +++ b/drivers/scsi/hisi_sas/hisi_sas.h @@ -451,7 +451,6 @@ struct hisi_hba { const struct hisi_sas_hw *hw; /* Low level hw interface */ unsigned long sata_dev_bitmap[BITS_TO_LONGS(HISI_SAS_MAX_DEVICES)]; struct work_struct rst_work; - struct work_struct debugfs_work; u32 phy_state; u32 intr_coal_ticks; /* Time of interrupt coalesce in us */ u32 intr_coal_count; /* Interrupt count to coalesce */ diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index b155ac800979..bbb7b2d9ffcf 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -1961,8 +1961,11 @@ static bool hisi_sas_internal_abort_timeout(struct sas_task *task, struct hisi_hba *hisi_hba = dev_to_hisi_hba(device); struct hisi_sas_internal_abort_data *timeout = data;
- if (hisi_sas_debugfs_enable && hisi_hba->debugfs_itct[0].itct) - queue_work(hisi_hba->wq, &hisi_hba->debugfs_work); + if (hisi_sas_debugfs_enable && hisi_hba->debugfs_itct[0].itct) { + down(&hisi_hba->sem); + hisi_hba->hw->debugfs_snapshot_regs(hisi_hba); + up(&hisi_hba->sem); + }
if (task->task_state_flags & SAS_TASK_STATE_DONE) { pr_err("Internal abort: timeout %016llx\n", diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index e914c0c13bb5..055fd666303b 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -558,7 +558,6 @@ static int experimental_iopoll_q_cnt; module_param(experimental_iopoll_q_cnt, int, 0444); MODULE_PARM_DESC(experimental_iopoll_q_cnt, "number of queues to be used as poll mode, def=0");
-static void debugfs_work_handler_v3_hw(struct work_struct *work); static void debugfs_snapshot_regs_v3_hw(struct hisi_hba *hisi_hba);
static u32 hisi_sas_read32(struct hisi_hba *hisi_hba, u32 off) @@ -3388,7 +3387,6 @@ hisi_sas_shost_alloc_pci(struct pci_dev *pdev) hisi_hba = shost_priv(shost);
INIT_WORK(&hisi_hba->rst_work, hisi_sas_rst_work_handler); - INIT_WORK(&hisi_hba->debugfs_work, debugfs_work_handler_v3_hw); hisi_hba->hw = &hisi_sas_v3_hw; hisi_hba->pci_dev = pdev; hisi_hba->dev = dev; @@ -3910,7 +3908,9 @@ static ssize_t debugfs_trigger_dump_v3_hw_write(struct file *file, if (buf[0] != '1') return -EFAULT;
- queue_work(hisi_hba->wq, &hisi_hba->debugfs_work); + down(&hisi_hba->sem); + debugfs_snapshot_regs_v3_hw(hisi_hba); + up(&hisi_hba->sem);
return count; } @@ -4661,14 +4661,6 @@ static void debugfs_fifo_init_v3_hw(struct hisi_hba *hisi_hba) } }
-static void debugfs_work_handler_v3_hw(struct work_struct *work) -{ - struct hisi_hba *hisi_hba = - container_of(work, struct hisi_hba, debugfs_work); - - debugfs_snapshot_regs_v3_hw(hisi_hba); -} - static void debugfs_release_v3_hw(struct hisi_hba *hisi_hba, int dump_index) { struct device *dev = hisi_hba->dev;
mainline inclusion from mainline-v6.7-rc1 commit 63f0733d07ce60252e885602b39571ade0441015 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I96KNQ CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
----------------------------------------------------------------------
Currently, if CONFIG_SCSI_HISI_SAS_DEBUGFS_DEFAULT_ENABLE is enabled, the memory space used by DFX is allocated during device initialization, which occupies a large number of memory resources. The memory usage before and after the driver is loaded is as follows:
Memory usage before the driver is loaded: $ free -m total used free shared buff/cache available Mem: 867352 2578 864037 11 735 861681 Swap: 4095 0 4095
Memory usage after the driver which include 4 HBAs is loaded: $ insmod hisi_sas_v3_hw.ko $ free -m total used free shared buff/cache available Mem: 867352 4760 861848 11 743 859495 Swap: 4095 0 4095
The driver with 4 HBAs connected will allocate about 110 MB of memory without enabling debugfs.
Therefore, to avoid wasting memory resources, DFX memory is allocated during dump triggering. The dump may fail due to memory allocation failure. After this change, each dump costs about 10 MB of memory, and each dump lasts about 100 ms.
Signed-off-by: Yihang Li liyihang9@huawei.com Signed-off-by: Xiang Chen chenxiang66@hisilicon.com Link: https://lore.kernel.org/r/1694571327-78697-4-git-send-email-chenxiang66@hisi... Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Bing Xia xiabing12@h-partners.com --- drivers/scsi/hisi_sas/hisi_sas.h | 2 +- drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 93 +++++++++++++------------- 2 files changed, 46 insertions(+), 49 deletions(-)
diff --git a/drivers/scsi/hisi_sas/hisi_sas.h b/drivers/scsi/hisi_sas/hisi_sas.h index 3d511c44c02d..1e4550156b73 100644 --- a/drivers/scsi/hisi_sas/hisi_sas.h +++ b/drivers/scsi/hisi_sas/hisi_sas.h @@ -343,7 +343,7 @@ struct hisi_sas_hw { u8 reg_index, u8 reg_count, u8 *write_data); void (*wait_cmds_complete_timeout)(struct hisi_hba *hisi_hba, int delay_ms, int timeout_ms); - void (*debugfs_snapshot_regs)(struct hisi_hba *hisi_hba); + int (*debugfs_snapshot_regs)(struct hisi_hba *hisi_hba); int complete_hdr_size; const struct scsi_host_template *sht; }; diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index 055fd666303b..2f515bf95bd3 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -558,7 +558,7 @@ static int experimental_iopoll_q_cnt; module_param(experimental_iopoll_q_cnt, int, 0444); MODULE_PARM_DESC(experimental_iopoll_q_cnt, "number of queues to be used as poll mode, def=0");
-static void debugfs_snapshot_regs_v3_hw(struct hisi_hba *hisi_hba); +static int debugfs_snapshot_regs_v3_hw(struct hisi_hba *hisi_hba);
static u32 hisi_sas_read32(struct hisi_hba *hisi_hba, u32 off) { @@ -3858,37 +3858,6 @@ static void debugfs_create_files_v3_hw(struct hisi_hba *hisi_hba) &debugfs_ras_v3_hw_fops); }
-static void debugfs_snapshot_regs_v3_hw(struct hisi_hba *hisi_hba) -{ - int debugfs_dump_index = hisi_hba->debugfs_dump_index; - struct device *dev = hisi_hba->dev; - u64 timestamp = local_clock(); - - if (debugfs_dump_index >= hisi_sas_debugfs_dump_count) { - dev_warn(dev, "dump count exceeded!\n"); - return; - } - - do_div(timestamp, NSEC_PER_MSEC); - hisi_hba->debugfs_timestamp[debugfs_dump_index] = timestamp; - - debugfs_snapshot_prepare_v3_hw(hisi_hba); - - debugfs_snapshot_global_reg_v3_hw(hisi_hba); - debugfs_snapshot_port_reg_v3_hw(hisi_hba); - debugfs_snapshot_axi_reg_v3_hw(hisi_hba); - debugfs_snapshot_ras_reg_v3_hw(hisi_hba); - debugfs_snapshot_cq_reg_v3_hw(hisi_hba); - debugfs_snapshot_dq_reg_v3_hw(hisi_hba); - debugfs_snapshot_itct_reg_v3_hw(hisi_hba); - debugfs_snapshot_iost_reg_v3_hw(hisi_hba); - - debugfs_create_files_v3_hw(hisi_hba); - - debugfs_snapshot_restore_v3_hw(hisi_hba); - hisi_hba->debugfs_dump_index++; -} - static ssize_t debugfs_trigger_dump_v3_hw_write(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos) @@ -3896,9 +3865,6 @@ static ssize_t debugfs_trigger_dump_v3_hw_write(struct file *file, struct hisi_hba *hisi_hba = file->f_inode->i_private; char buf[8];
- if (hisi_hba->debugfs_dump_index >= hisi_sas_debugfs_dump_count) - return -EFAULT; - if (count > 8) return -EFAULT;
@@ -3909,7 +3875,10 @@ static ssize_t debugfs_trigger_dump_v3_hw_write(struct file *file, return -EFAULT;
down(&hisi_hba->sem); - debugfs_snapshot_regs_v3_hw(hisi_hba); + if (debugfs_snapshot_regs_v3_hw(hisi_hba)) { + up(&hisi_hba->sem); + return -EFAULT; + } up(&hisi_hba->sem);
return count; @@ -4695,7 +4664,7 @@ static int debugfs_alloc_v3_hw(struct hisi_hba *hisi_hba, int dump_index) { const struct hisi_sas_hw *hw = hisi_hba->hw; struct device *dev = hisi_hba->dev; - int p, c, d, r, i; + int p, c, d, r; size_t sz;
for (r = 0; r < DEBUGFS_REGS_NUM; r++) { @@ -4775,11 +4744,48 @@ static int debugfs_alloc_v3_hw(struct hisi_hba *hisi_hba, int dump_index)
return 0; fail: - for (i = 0; i < hisi_sas_debugfs_dump_count; i++) - debugfs_release_v3_hw(hisi_hba, i); + debugfs_release_v3_hw(hisi_hba, dump_index); return -ENOMEM; }
+static int debugfs_snapshot_regs_v3_hw(struct hisi_hba *hisi_hba) +{ + int debugfs_dump_index = hisi_hba->debugfs_dump_index; + struct device *dev = hisi_hba->dev; + u64 timestamp = local_clock(); + + if (debugfs_dump_index >= hisi_sas_debugfs_dump_count) { + dev_warn(dev, "dump count exceeded!\n"); + return -EINVAL; + } + + if (debugfs_alloc_v3_hw(hisi_hba, debugfs_dump_index)) { + dev_warn(dev, "failed to alloc memory\n"); + return -ENOMEM; + } + + do_div(timestamp, NSEC_PER_MSEC); + hisi_hba->debugfs_timestamp[debugfs_dump_index] = timestamp; + + debugfs_snapshot_prepare_v3_hw(hisi_hba); + + debugfs_snapshot_global_reg_v3_hw(hisi_hba); + debugfs_snapshot_port_reg_v3_hw(hisi_hba); + debugfs_snapshot_axi_reg_v3_hw(hisi_hba); + debugfs_snapshot_ras_reg_v3_hw(hisi_hba); + debugfs_snapshot_cq_reg_v3_hw(hisi_hba); + debugfs_snapshot_dq_reg_v3_hw(hisi_hba); + debugfs_snapshot_itct_reg_v3_hw(hisi_hba); + debugfs_snapshot_iost_reg_v3_hw(hisi_hba); + + debugfs_create_files_v3_hw(hisi_hba); + + debugfs_snapshot_restore_v3_hw(hisi_hba); + hisi_hba->debugfs_dump_index++; + + return 0; +} + static void debugfs_phy_down_cnt_init_v3_hw(struct hisi_hba *hisi_hba) { struct dentry *dir = debugfs_create_dir("phy_down_cnt", @@ -4866,7 +4872,6 @@ static void debugfs_exit_v3_hw(struct hisi_hba *hisi_hba) static void debugfs_init_v3_hw(struct hisi_hba *hisi_hba) { struct device *dev = hisi_hba->dev; - int i;
hisi_hba->debugfs_dir = debugfs_create_dir(dev_name(dev), hisi_sas_debugfs_dir); @@ -4883,14 +4888,6 @@ static void debugfs_init_v3_hw(struct hisi_hba *hisi_hba)
debugfs_phy_down_cnt_init_v3_hw(hisi_hba); debugfs_fifo_init_v3_hw(hisi_hba); - - for (i = 0; i < hisi_sas_debugfs_dump_count; i++) { - if (debugfs_alloc_v3_hw(hisi_hba, i)) { - debugfs_exit_v3_hw(hisi_hba); - dev_dbg(dev, "failed to init debugfs!\n"); - break; - } - } }
static int
mainline inclusion from mainline-v6.9-rc1 commit 3c4f53b2c341ec6428b98cb51a89a09b025d0953 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I96KNQ CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
----------------------------------------------------------------------
If we issue a disabling PHY command, the device attached with it will go offline, if a 2 bit ECC error occurs at the same time, a hung task may be found:
[ 4613.652388] INFO: task kworker/u256:0:165233 blocked for more than 120 seconds. [ 4613.666297] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 4613.674809] task:kworker/u256:0 state:D stack: 0 pid:165233 ppid: 2 flags:0x00000208 [ 4613.683959] Workqueue: 0000:74:02.0_disco_q sas_revalidate_domain [libsas] [ 4613.691518] Call trace: [ 4613.694678] __switch_to+0xf8/0x17c [ 4613.698872] __schedule+0x660/0xee0 [ 4613.703063] schedule+0xac/0x240 [ 4613.706994] schedule_timeout+0x500/0x610 [ 4613.711705] __down+0x128/0x36c [ 4613.715548] down+0x240/0x2d0 [ 4613.719221] hisi_sas_internal_abort_timeout+0x1bc/0x260 [hisi_sas_main] [ 4613.726618] sas_execute_internal_abort+0x144/0x310 [libsas] [ 4613.732976] sas_execute_internal_abort_dev+0x44/0x60 [libsas] [ 4613.739504] hisi_sas_internal_task_abort_dev.isra.0+0xbc/0x1b0 [hisi_sas_main] [ 4613.747499] hisi_sas_dev_gone+0x174/0x250 [hisi_sas_main] [ 4613.753682] sas_notify_lldd_dev_gone+0xec/0x2e0 [libsas] [ 4613.759781] sas_unregister_common_dev+0x4c/0x7a0 [libsas] [ 4613.765962] sas_destruct_devices+0xb8/0x120 [libsas] [ 4613.771709] sas_do_revalidate_domain.constprop.0+0x1b8/0x31c [libsas] [ 4613.778930] sas_revalidate_domain+0x60/0xa4 [libsas] [ 4613.784716] process_one_work+0x248/0x950 [ 4613.789424] worker_thread+0x318/0x934 [ 4613.793878] kthread+0x190/0x200 [ 4613.797810] ret_from_fork+0x10/0x18 [ 4613.802121] INFO: task kworker/u256:4:316722 blocked for more than 120 seconds. [ 4613.816026] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 4613.824538] task:kworker/u256:4 state:D stack: 0 pid:316722 ppid: 2 flags:0x00000208 [ 4613.833670] Workqueue: 0000:74:02.0 hisi_sas_rst_work_handler [hisi_sas_main] [ 4613.841491] Call trace: [ 4613.844647] __switch_to+0xf8/0x17c [ 4613.848852] __schedule+0x660/0xee0 [ 4613.853052] schedule+0xac/0x240 [ 4613.856984] schedule_timeout+0x500/0x610 [ 4613.861695] __down+0x128/0x36c [ 4613.865542] down+0x240/0x2d0 [ 4613.869216] hisi_sas_controller_prereset+0x58/0x1fc [hisi_sas_main] [ 4613.876324] hisi_sas_rst_work_handler+0x40/0x8c [hisi_sas_main] [ 4613.883019] process_one_work+0x248/0x950 [ 4613.887732] worker_thread+0x318/0x934 [ 4613.892204] kthread+0x190/0x200 [ 4613.896118] ret_from_fork+0x10/0x18 [ 4613.900423] INFO: task kworker/u256:1:348985 blocked for more than 121 seconds. [ 4613.914341] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 4613.922852] task:kworker/u256:1 state:D stack: 0 pid:348985 ppid: 2 flags:0x00000208 [ 4613.931984] Workqueue: 0000:74:02.0_event_q sas_port_event_worker [libsas] [ 4613.939549] Call trace: [ 4613.942702] __switch_to+0xf8/0x17c [ 4613.946892] __schedule+0x660/0xee0 [ 4613.951083] schedule+0xac/0x240 [ 4613.955015] schedule_timeout+0x500/0x610 [ 4613.959725] wait_for_common+0x200/0x610 [ 4613.964349] wait_for_completion+0x3c/0x5c [ 4613.969146] flush_workqueue+0x198/0x790 [ 4613.973776] sas_porte_broadcast_rcvd+0x1e8/0x320 [libsas] [ 4613.979960] sas_port_event_worker+0x54/0xa0 [libsas] [ 4613.985708] process_one_work+0x248/0x950 [ 4613.990420] worker_thread+0x318/0x934 [ 4613.994868] kthread+0x190/0x200 [ 4613.998800] ret_from_fork+0x10/0x18
This is because when the device goes offline, we obtain the hisi_hba semaphore and send the ABORT_DEV command to the device. However, the internal abort timed out due to the 2 bit ECC error and triggers automatic dump. In addition, since the hisi_hba semaphore has been obtained, the dump cannot be executed and the controller cannot be reset.
Therefore, the deadlocks occur on the following circular dependencies: hisi_sas_dev_gone() -> down() -> hisi_sas_internal_task_abort_dev() -> ... -> hisi_sas_internal_abort_timeout() -> down().
The deadlock is triggered only when the timeout occurs during device goes offline. To fix this issue, use .rst_ha_timeout to distinguish the scenario where a device goes offline from other scenarios.
Fixes: 2ff07b5c6fe9 ("scsi: hisi_sas: Directly call register snapshot instead of using workqueue") Signed-off-by: Yihang Li liyihang9@huawei.com Signed-off-by: Xiang Chen chenxiang66@hisilicon.com Link: https://lore.kernel.org/r/1705904747-62186-2-git-send-email-chenxiang66@hisi... Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Bing Xia xiabing12@h-partners.com --- drivers/scsi/hisi_sas/hisi_sas_main.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index bbb7b2d9ffcf..1abc62b07d24 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -1962,9 +1962,17 @@ static bool hisi_sas_internal_abort_timeout(struct sas_task *task, struct hisi_sas_internal_abort_data *timeout = data;
if (hisi_sas_debugfs_enable && hisi_hba->debugfs_itct[0].itct) { - down(&hisi_hba->sem); + /* + * If timeout occurs in device gone scenario, to avoid + * circular dependency like: + * hisi_sas_dev_gone() -> down() -> ... -> + * hisi_sas_internal_abort_timeout() -> down(). + */ + if (!timeout->rst_ha_timeout) + down(&hisi_hba->sem); hisi_hba->hw->debugfs_snapshot_regs(hisi_hba); - up(&hisi_hba->sem); + if (!timeout->rst_ha_timeout) + up(&hisi_hba->sem); }
if (task->task_state_flags & SAS_TASK_STATE_DONE) {
mainline inclusion from mainline-v6.9-rc1 commit 69097a631c034451a75ca7cb6025460ba3a08f80 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I96KNQ CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
----------------------------------------------------------------------
hisi_sas debugfs remove should be executed only when debugfs is enabled. Check whether debugfs is enabled and then remove it only if enabled.
Signed-off-by: Yihang Li liyihang9@huawei.com Signed-off-by: Xiang Chen chenxiang66@hisilicon.com Link: https://lore.kernel.org/r/1705904747-62186-4-git-send-email-chenxiang66@hisi... Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Bing Xia xiabing12@h-partners.com --- drivers/scsi/hisi_sas/hisi_sas_main.c | 3 ++- drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 7 +++++-- 2 files changed, 7 insertions(+), 3 deletions(-)
diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index 1abc62b07d24..078b780a5b42 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -2625,7 +2625,8 @@ static __exit void hisi_sas_exit(void) { sas_release_transport(hisi_sas_stt);
- debugfs_remove(hisi_sas_debugfs_dir); + if (hisi_sas_debugfs_enable) + debugfs_remove(hisi_sas_debugfs_dir); }
module_init(hisi_sas_init); diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index 2f515bf95bd3..4a7057ad07a1 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -5021,7 +5021,8 @@ hisi_sas_v3_probe(struct pci_dev *pdev, const struct pci_device_id *id) err_out_remove_host: scsi_remove_host(shost); err_out_undo_debugfs: - debugfs_exit_v3_hw(hisi_hba); + if (hisi_sas_debugfs_enable) + debugfs_exit_v3_hw(hisi_hba); err_out_free_host: hisi_sas_free(hisi_hba); scsi_host_put(shost); @@ -5061,7 +5062,9 @@ static void hisi_sas_v3_remove(struct pci_dev *pdev)
hisi_sas_v3_destroy_irqs(pdev, hisi_hba); hisi_sas_free(hisi_hba); - debugfs_exit_v3_hw(hisi_hba); + if (hisi_sas_debugfs_enable) + debugfs_exit_v3_hw(hisi_hba); + scsi_host_put(shost); }
From: Xiang Chen chenxiang66@hisilicon.com
mainline inclusion from mainline-v6.9-rc1 commit f9242f166770b681d9f71341d96adc01c4da00ef category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I96KNQ CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
----------------------------------------------------------------------
hisi_hba->timer is not used for v3 hw but there are two places that some operations related to hisi_hba->timer are called by v3 hw:
- Deleting the timer in function hisi_sas_v3_hw() which is only for v3 hw;
- Deleting the timer in function hisi_sas_controller_reset_prepare() which is common for v1/v2/v3 hw.
We can remove the timer in the first case, but for the second scenario we need to remove it only for v3 hw, so check hw->sht which is NULL only for v3 hw before deleting hisi_hba->timer.
Signed-off-by: Xiang Chen chenxiang66@hisilicon.com Link: https://lore.kernel.org/r/1705904747-62186-5-git-send-email-chenxiang66@hisi... Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Bing Xia xiabing12@h-partners.com --- drivers/scsi/hisi_sas/hisi_sas_main.c | 7 ++++++- drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 1 - 2 files changed, 6 insertions(+), 2 deletions(-)
diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index 078b780a5b42..4cdab86399a9 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -1507,7 +1507,12 @@ void hisi_sas_controller_reset_prepare(struct hisi_hba *hisi_hba) scsi_block_requests(shost); hisi_hba->hw->wait_cmds_complete_timeout(hisi_hba, 100, 5000);
- del_timer_sync(&hisi_hba->timer); + /* + * hisi_hba->timer is only used for v1/v2 hw, and check hw->sht + * which is also only used for v1/v2 hw to skip it for v3 hw + */ + if (hisi_hba->hw->sht) + del_timer_sync(&hisi_hba->timer);
set_bit(HISI_SAS_REJECT_CMD_BIT, &hisi_hba->flags); } diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index 4a7057ad07a1..7307b6654e58 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -5054,7 +5054,6 @@ static void hisi_sas_v3_remove(struct pci_dev *pdev) struct Scsi_Host *shost = sha->shost;
pm_runtime_get_noresume(dev); - del_timer_sync(&hisi_hba->timer);
sas_unregister_ha(sha); flush_workqueue(hisi_hba->wq);
From: Xingui Yang yangxingui@huawei.com
driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I96KNQ CVE: NA
----------------------------------------------------------------------
We find that some disks use D2H frame instead of SDB frame to return NCQ error. Currently, only the I/O corresponding to the D2H frame is processed in this scenario, which does not meet the processing requirements of the NCQ error scenario. So we set dev_status to HISI_SAS_DEV_NCQ_ERR and abort all I/Os of the disk in this scenario.
Signed-off-by: Xingui Yang yangxingui@huawei.com Reviewed-by: Xiang Chen chenxiang66@hisilicon.com Signed-off-by: Bing Xia xiabing12@h-partners.com --- drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index 7307b6654e58..0ea0298ad84f 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -2244,7 +2244,15 @@ slot_err_v3_hw(struct hisi_hba *hisi_hba, struct sas_task *task, case SAS_PROTOCOL_SATA | SAS_PROTOCOL_STP: if ((dw0 & CMPLT_HDR_RSPNS_XFRD_MSK) && (sipc_rx_err_type & RX_FIS_STATUS_ERR_MSK)) { - ts->stat = SAS_PROTO_RESPONSE; + if (task->ata_task.use_ncq) { + struct domain_device *device = task->dev; + struct hisi_sas_device *sas_dev = + device->lldd_dev; + sas_dev->dev_status = HISI_SAS_DEV_NCQ_ERR; + slot->abort = 1; + } else { + ts->stat = SAS_PROTO_RESPONSE; + } } else if (dma_rx_err_type & RX_DATA_LEN_UNDERFLOW_MSK) { ts->residual = trans_tx_fail_type; ts->stat = SAS_DATA_UNDERRUN;
driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I96KNQ CVE: NA
----------------------------------------------------------------------
Users can suspend the machine with 'echo disk > /sys/power/state', but the suspend will fail because the SAS controller cannot be suspended:
[root@localhost ~]# echo freeze > /sys/power/state -bash: echo: write error: Device or resource busy [15104.142955] PM: suspend entry (s2idle) ... [15104.283465] hisi_sas_v3_hw 0000:32:04.0: entering suspend state [15104.283480] hisi_sas_v3_hw 0000:30:04.0: entering suspend state [15104.283500] hisi_sas_v3_hw 0000:32:04.0: PM suspend: host status cannot be suspended [15104.283508] hisi_sas_v3_hw 0000:30:04.0: PM suspend: host status cannot be suspended [15104.283516] hisi_sas_v3_hw 0000:32:04.0: PM: pci_pm_suspend(): suspend_v3_hw+0x0/0x210 [hisi_sas_v3_hw] returns -16 [15104.283527] hisi_sas_v3_hw 0000:32:04.0: PM: dpm_run_callback(): pci_pm_suspend+0x0/0x1c0 returns -16 [15104.283524] hisi_sas_v3_hw 0000:30:04.0: PM: pci_pm_suspend(): suspend_v3_hw+0x0/0x210 [hisi_sas_v3_hw] returns -16 [15104.283533] hisi_sas_v3_hw 0000:32:04.0: PM: failed to suspend async: error -16 [15104.283536] hisi_sas_v3_hw 0000:30:04.0: PM: dpm_run_callback(): pci_pm_suspend+0x0/0x1c0 returns -16 [15104.283542] hisi_sas_v3_hw 0000:30:04.0: PM: failed to suspend async: error -16
The problem is that when the ->runtime_suspend() callback suspend_v3_hw() is executing, the current runtime PM status is RPM_ACTIVE and the usage count of the controller is not 0, so return immediately.
To fix it, Check the device usage count only when the runtime PM status is RPM_SUSPENDING.
Signed-off-by: Yihang Li liyihang9@huawei.com Signed-off-by: Bing Xia xiabing12@h-partners.com --- drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index 0ea0298ad84f..54ba0395ec7d 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -5162,7 +5162,8 @@ static int _suspend_v3_hw(struct device *device) interrupt_disable_v3_hw(hisi_hba);
#ifdef CONFIG_PM - if (atomic_read(&device->power.usage_count)) { + if ((device->power.runtime_status == RPM_SUSPENDING) && + atomic_read(&device->power.usage_count)) { dev_err(dev, "PM suspend: host status cannot be suspended\n"); rc = -EBUSY; goto err_out;
mainline inclusion from mainline-v6.9-rc1 commit 3f030550476566b12091687c70071d05ad433e0d category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I96KNQ CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
----------------------------------------------------------------------
In commit 63f0733d07ce ("scsi: hisi_sas: Allocate DFX memory during dump trigger"), the memory allocation time of the DFX is changed from device initialization to dump occurs, so .debugfs_itct is not a valid address and do not need to check.
The parameter hisi_sas_debugfs_enable is enough to check whether automatic debugfs dump is triggered, so remove redunant checks.
Fixes: 63f0733d07ce ("scsi: hisi_sas: Allocate DFX memory during dump trigger") Signed-off-by: Yihang Li liyihang9@huawei.com Signed-off-by: Xiang Chen chenxiang66@hisilicon.com Link: https://lore.kernel.org/r/1705904747-62186-3-git-send-email-chenxiang66@hisi... Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Bing Xia xiabing12@h-partners.com --- drivers/scsi/hisi_sas/hisi_sas_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index 4cdab86399a9..097dfe4b620d 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -1578,7 +1578,7 @@ static int hisi_sas_controller_prereset(struct hisi_hba *hisi_hba) return -EPERM; }
- if (hisi_sas_debugfs_enable && hisi_hba->debugfs_itct[0].itct) + if (hisi_sas_debugfs_enable) hisi_hba->hw->debugfs_snapshot_regs(hisi_hba);
return 0; @@ -1966,7 +1966,7 @@ static bool hisi_sas_internal_abort_timeout(struct sas_task *task, struct hisi_hba *hisi_hba = dev_to_hisi_hba(device); struct hisi_sas_internal_abort_data *timeout = data;
- if (hisi_sas_debugfs_enable && hisi_hba->debugfs_itct[0].itct) { + if (hisi_sas_debugfs_enable) { /* * If timeout occurs in device gone scenario, to avoid * circular dependency like:
From: Xingui Yang yangxingui@huawei.com
mainline inclusion from mainline-v6.9-rc2 commit 8e68a458bcf5b5cb9c3624598bae28f08251601f category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I96KNQ CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
----------------------------------------------------------------------
As of commit d8649fc1c5e4 ("scsi: libsas: Do discovery on empty PHY to update PHY info"), do discovery will send a new SMP_DISCOVER and update phy->phy_change_count. We found that if the disk is reconnected and phy change_count changes at this time, the disk scanning process will not be triggered.
Therefore, call sas_set_ex_phy() to update the PHY info with the results of the last query. And because the previous phy info will be used when calling sas_unregister_devs_sas_addr(), sas_unregister_devs_sas_addr() should be called before sas_set_ex_phy().
Fixes: d8649fc1c5e4 ("scsi: libsas: Do discovery on empty PHY to update PHY info") Signed-off-by: Xingui Yang yangxingui@huawei.com Link: https://lore.kernel.org/r/20240307141413.48049-3-yangxingui@huawei.com Reviewed-by: John Garry john.g.garry@oracle.com Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Bing Xia xiabing12@h-partners.com --- drivers/scsi/libsas/sas_expander.c | 32 ++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-)
diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index a2204674b680..3dffe7dfc257 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c @@ -1940,6 +1940,7 @@ static int sas_rediscover_dev(struct domain_device *dev, int phy_id, struct expander_device *ex = &dev->ex_dev; struct ex_phy *phy = &ex->ex_phy[phy_id]; enum sas_device_type type = SAS_PHY_UNUSED; + struct smp_disc_resp *disc_resp; u8 sas_addr[SAS_ADDR_SIZE]; char msg[80] = ""; int res; @@ -1951,33 +1952,41 @@ static int sas_rediscover_dev(struct domain_device *dev, int phy_id, SAS_ADDR(dev->sas_addr), phy_id, msg);
memset(sas_addr, 0, SAS_ADDR_SIZE); - res = sas_get_phy_attached_dev(dev, phy_id, sas_addr, &type); + disc_resp = alloc_smp_resp(DISCOVER_RESP_SIZE); + if (!disc_resp) + return -ENOMEM; + + res = sas_get_phy_discover(dev, phy_id, disc_resp); switch (res) { case SMP_RESP_NO_PHY: phy->phy_state = PHY_NOT_PRESENT; sas_unregister_devs_sas_addr(dev, phy_id, last); - return res; + goto out_free_resp; case SMP_RESP_PHY_VACANT: phy->phy_state = PHY_VACANT; sas_unregister_devs_sas_addr(dev, phy_id, last); - return res; + goto out_free_resp; case SMP_RESP_FUNC_ACC: break; case -ECOMM: break; default: - return res; + goto out_free_resp; }
+ if (res == 0) + sas_get_sas_addr_and_dev_type(disc_resp, sas_addr, &type); + if ((SAS_ADDR(sas_addr) == 0) || (res == -ECOMM)) { phy->phy_state = PHY_EMPTY; sas_unregister_devs_sas_addr(dev, phy_id, last); /* - * Even though the PHY is empty, for convenience we discover - * the PHY to update the PHY info, like negotiated linkrate. + * Even though the PHY is empty, for convenience we update + * the PHY info, like negotiated linkrate. */ - sas_ex_phy_discover(dev, phy_id); - return res; + if (res == 0) + sas_set_ex_phy(dev, phy_id, disc_resp); + goto out_free_resp; } else if (SAS_ADDR(sas_addr) == SAS_ADDR(phy->attached_sas_addr) && dev_type_flutter(type, phy->attached_dev_type)) { struct domain_device *ata_dev = sas_ex_to_ata(dev, phy_id); @@ -1989,7 +1998,7 @@ static int sas_rediscover_dev(struct domain_device *dev, int phy_id, action = ", needs recovery"; pr_debug("ex %016llx phy%02d broadcast flutter%s\n", SAS_ADDR(dev->sas_addr), phy_id, action); - return res; + goto out_free_resp; }
/* we always have to delete the old device when we went here */ @@ -1998,7 +2007,10 @@ static int sas_rediscover_dev(struct domain_device *dev, int phy_id, SAS_ADDR(phy->attached_sas_addr)); sas_unregister_devs_sas_addr(dev, phy_id, last);
- return sas_discover_new(dev, phy_id); + res = sas_discover_new(dev, phy_id); +out_free_resp: + kfree(disc_resp); + return res; }
/**
From: Xingui Yang yangxingui@huawei.com
mainline inclusion from mainline-v6.9-rc2 commit a57345279fd311ba679b8083feb0eec5272c7729 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I96KNQ CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
----------------------------------------------------------------------
Add a helper to get attached_sas_addr and device type from disc_resp.
Suggested-by: John Garry john.g.garry@oracle.com Signed-off-by: Xingui Yang yangxingui@huawei.com Link: https://lore.kernel.org/r/20240307141413.48049-2-yangxingui@huawei.com Reviewed-by: John Garry john.g.garry@oracle.com Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Bing Xia xiabing12@h-partners.com --- drivers/scsi/libsas/sas_expander.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-)
diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index 3dffe7dfc257..5c261005b74e 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c @@ -1621,6 +1621,16 @@ int sas_discover_root_expander(struct domain_device *dev)
/* ---------- Domain revalidation ---------- */
+static void sas_get_sas_addr_and_dev_type(struct smp_disc_resp *disc_resp, + u8 *sas_addr, + enum sas_device_type *type) +{ + memcpy(sas_addr, disc_resp->disc.attached_sas_addr, SAS_ADDR_SIZE); + *type = to_dev_type(&disc_resp->disc); + if (*type == SAS_PHY_UNUSED) + memset(sas_addr, 0, SAS_ADDR_SIZE); +} + static int sas_get_phy_discover(struct domain_device *dev, int phy_id, struct smp_disc_resp *disc_resp) { @@ -1674,13 +1684,8 @@ int sas_get_phy_attached_dev(struct domain_device *dev, int phy_id, return -ENOMEM;
res = sas_get_phy_discover(dev, phy_id, disc_resp); - if (res == 0) { - memcpy(sas_addr, disc_resp->disc.attached_sas_addr, - SAS_ADDR_SIZE); - *type = to_dev_type(&disc_resp->disc); - if (*type == 0) - memset(sas_addr, 0, SAS_ADDR_SIZE); - } + if (res == 0) + sas_get_sas_addr_and_dev_type(disc_resp, sas_addr, type); kfree(disc_resp); return res; }
driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I96KNQ CVE: NA
----------------------------------------------------------------------
This series [1] reducing the kmalloc() minimum alignment on arm64 to 8 (from 128). In libsas, this will cause SMP requests to be 8-byte-aligned through kmalloc() allocation. However, for the hisi_sas hardware, all commands address must be 16-byte-aligned. Otherwise, the commands fail to be executed.
ARCH_DMA_MINALIGN represents the minimum (static) alignment for safe DMA operations, so use ARCH_DMA_MINALIGN as the alignment for SMP request.
Link: https://lkml.kernel.org/r/20230612153201.554742-1-catalin.marinas@arm.com [1] Signed-off-by: Yihang Li liyihang9@huawei.com Reviewed-by: Damien Le Moal dlemoal@kernel.org Reviewed-by: John Garry john.g.garry@oracle.com Reviewed-by: Jason Yan yanaijie@huawei.com Signed-off-by: Bing Xia xiabing12@h-partners.com --- drivers/scsi/libsas/sas_expander.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index 5c261005b74e..f6e6db8b8aba 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c @@ -135,7 +135,7 @@ static int smp_execute_task(struct domain_device *dev, void *req, int req_size,
static inline void *alloc_smp_req(int size) { - u8 *p = kzalloc(size, GFP_KERNEL); + u8 *p = kzalloc(ALIGN(size, ARCH_DMA_MINALIGN), GFP_KERNEL); if (p) p[0] = SMP_REQUEST; return p;
driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I96KNQ CVE: NA
----------------------------------------------------------------------
We found that the second parameter of function ata_wait_after_reset() is incorrectly used. We call smp_ata_check_ready_type() to poll the device type until the 30s timeout, so the correct deadline should be (jiffies + 30000).
Fixes: 3c2673a09cf1 ("scsi: hisi_sas: Fix SATA devices missing issue during I_T nexus reset") Signed-off-by: xiabing xiabing12@h-partners.com Signed-off-by: Yihang Li liyihang9@huawei.com Reviewed-by: Xiang Chen chenxiang66@hisilicon.com Signed-off-by: Bing Xia xiabing12@h-partners.com --- drivers/scsi/hisi_sas/hisi_sas_main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index 097dfe4b620d..7245600aedb2 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -1796,8 +1796,10 @@ static int hisi_sas_debug_I_T_nexus_reset(struct domain_device *device)
if (dev_is_sata(device)) { struct ata_link *link = &device->sata_dev.ap->link; + unsigned long deadline = ata_deadline(jiffies, + HISI_SAS_WAIT_PHYUP_TIMEOUT / HZ * 1000);
- rc = ata_wait_after_reset(link, HISI_SAS_WAIT_PHYUP_TIMEOUT, + rc = ata_wait_after_reset(link, deadline, smp_ata_check_ready_type); } else { msleep(2000);
From: Qi Liu liuqi115@huawei.com
driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I96KNQ CVE: NA
----------------------------------------------------------------------
WARNING is triggered when executing link reset of remote PHY and rmmod SAS driver simultaneously. Following is the WARNING log:
WARNING: CPU: 61 PID: 21818 at drivers/base/core.c:1347 __device_links_no_driver+0xb4/0xc0 Call trace: __device_links_no_driver+0xb4/0xc0 device_links_driver_cleanup+0xb0/0xfc __device_release_driver+0x198/0x23c device_release_driver+0x38/0x50 bus_remove_device+0x130/0x140 device_del+0x184/0x434 __scsi_remove_device+0x118/0x150 scsi_remove_target+0x1bc/0x240 sas_rphy_remove+0x90/0x94 sas_rphy_delete+0x24/0x3c sas_destruct_devices+0x64/0xa0 [libsas] sas_revalidate_domain+0xe4/0x150 [libsas] process_one_work+0x1e0/0x46c worker_thread+0x15c/0x464 kthread+0x160/0x170 ret_from_fork+0x10/0x20 ---[ end trace 71e059eb58f85d4a ]---
During SAS phy up, link->status is set to DL_STATE_AVAILABLE in device_links_driver_bound, then this setting influences __device_links_no_driver() before driver rmmod and caused WARNING.
So we add the slave_destroy interface, to make sure link is removed after flush workque.
Fixes: 16fd4a7c59170 ("scsi: hisi_sas: Add device link between SCSI devices and hisi_hba") Signed-off-by: Qi Liu liuqi115@huawei.com Signed-off-by: Bing Xia xiabing12@h-partners.com --- drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-)
diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index 54ba0395ec7d..1173c767084f 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -2916,7 +2916,7 @@ static int slave_configure_v3_hw(struct scsi_device *sdev) return 0;
if (!device_link_add(&sdev->sdev_gendev, dev, - DL_FLAG_PM_RUNTIME | DL_FLAG_RPM_ACTIVE)) { + DL_FLAG_STATELESS | DL_FLAG_PM_RUNTIME | DL_FLAG_RPM_ACTIVE)) { if (pm_runtime_enabled(dev)) { dev_info(dev, "add device link failed, disable runtime PM for the host\n"); pm_runtime_disable(dev); @@ -2926,6 +2926,15 @@ static int slave_configure_v3_hw(struct scsi_device *sdev) return 0; }
+static void slave_destroy_v3_hw(struct scsi_device *sdev) +{ + struct Scsi_Host *shost = dev_to_shost(&sdev->sdev_gendev); + struct hisi_hba *hisi_hba = shost_priv(shost); + struct device *dev = hisi_hba->dev; + + device_link_remove(&sdev->sdev_gendev, dev); +} + static struct attribute *host_v3_hw_attrs[] = { &dev_attr_phy_event_threshold.attr, &dev_attr_intr_conv_v3_hw.attr, @@ -3342,6 +3351,7 @@ static const struct scsi_host_template sht_v3_hw = { .eh_device_reset_handler = sas_eh_device_reset_handler, .eh_target_reset_handler = sas_eh_target_reset_handler, .slave_alloc = hisi_sas_slave_alloc, + .slave_destroy = slave_destroy_v3_hw, .target_destroy = sas_target_destroy, .ioctl = sas_ioctl, #ifdef CONFIG_COMPAT
From: xiabing xiabing12@h-partners.com
driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I96KNQ CVE: NA
----------------------------------------------------------------------
In that commit, if the softreset fails upon certain conditions, just disable the PHY associated with the disk. The user needs to restore the PHY.
SATA disks do not support simultaneous connection of multiple hosts. Therefore, when multiple controllers are connected to a SATA disk at the same time, the controller which is connected later failed to issue an ATA softreset to the SATA disk. As a result, the PHY associated with the disk is disabled and cannot be automatically recovered.
Now that, we will not focus on the execution result of softreset. No matter whether the execution is successful or not, we will directly carry out I_T_nexus_reset.
Signed-off-by: Yihang Li liyihang9@huawei.com Signed-off-by: Bing Xia xiabing12@h-partners.com --- drivers/scsi/hisi_sas/hisi_sas_main.c | 29 +++++---------------------- 1 file changed, 5 insertions(+), 24 deletions(-)
diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index 7245600aedb2..b0b76c93d732 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -1825,33 +1825,14 @@ static int hisi_sas_I_T_nexus_reset(struct domain_device *device) } hisi_sas_dereg_device(hisi_hba, device);
- rc = hisi_sas_debug_I_T_nexus_reset(device); - if (rc == TMF_RESP_FUNC_COMPLETE && dev_is_sata(device)) { - struct sas_phy *local_phy; - + if (dev_is_sata(device)) { rc = hisi_sas_softreset_ata_disk(device); - switch (rc) { - case -ECOMM: - rc = -ENODEV; - break; - case TMF_RESP_FUNC_FAILED: - case -EMSGSIZE: - case -EIO: - local_phy = sas_get_local_phy(device); - rc = sas_phy_enable(local_phy, 0); - if (!rc) { - local_phy->enabled = 0; - dev_err(dev, "Disabled local phy of ATA disk %016llx due to softreset fail (%d)\n", - SAS_ADDR(device->sas_addr), rc); - rc = -ENODEV; - } - sas_put_local_phy(local_phy); - break; - default: - break; - } + if (rc == TMF_RESP_FUNC_FAILED) + dev_err(dev, "ata disk %016llx reset (%d)\n", + SAS_ADDR(device->sas_addr), rc); }
+ rc = hisi_sas_debug_I_T_nexus_reset(device); if ((rc == TMF_RESP_FUNC_COMPLETE) || (rc == -ENODEV)) hisi_sas_release_task(hisi_hba, device);
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/5922 邮件列表地址:https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/R...
FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/5922 Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/R...