From: Daniel Wagner <dwagner(a)suse.de>
stable inclusion
from stable-v5.10.211
commit 4f2c95015ec2a1899161be6c0bdaecedd5a7bfb2
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I9J6AL
CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?h=…
-------------------------
[ Upstream commit 70fbfc47a392b98e5f8dba70c6efc6839205c982 ]
The module exit path has race between deleting all controllers and
freeing 'left over IDs'. To prevent double free a synchronization
between nvme_delete_ctrl and ida_destroy has been added by the initial
commit.
There is some logic around trying to prevent from hanging forever in
wait_for_completion, though it does not handling all cases. E.g.
blktests is able to reproduce the situation where the module unload
hangs forever.
If we completely rely on the cleanup code executed from the
nvme_delete_ctrl path, all IDs will be freed eventually. This makes
calling ida_destroy unnecessary. We only have to ensure that all
nvme_delete_ctrl code has been executed before we leave
nvme_fc_exit_module. This is done by flushing the nvme_delete_wq
workqueue.
While at it, remove the unused nvme_fc_wq workqueue too.
Reviewed-by: Christoph Hellwig <hch(a)lst.de>
Reviewed-by: Hannes Reinecke <hare(a)suse.de>
Signed-off-by: Daniel Wagner <dwagner(a)suse.de>
Signed-off-by: Keith Busch <kbusch(a)kernel.org>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
Signed-off-by: Baogen Shang <baogen.shang(a)windriver.com>
---
drivers/nvme/host/fc.c | 47 ++++++------------------------------------
1 file changed, 6 insertions(+), 41 deletions(-)
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index b534a85e2bf1..92db88ceb96f 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -220,11 +220,6 @@ static LIST_HEAD(nvme_fc_lport_list);
static DEFINE_IDA(nvme_fc_local_port_cnt);
static DEFINE_IDA(nvme_fc_ctrl_cnt);
-static struct workqueue_struct *nvme_fc_wq;
-
-static bool nvme_fc_waiting_to_unload;
-static DECLARE_COMPLETION(nvme_fc_unload_proceed);
-
/*
* These items are short-term. They will eventually be moved into
* a generic FC class. See comments in module init.
@@ -254,8 +249,6 @@ nvme_fc_free_lport(struct kref *ref)
/* remove from transport list */
spin_lock_irqsave(&nvme_fc_lock, flags);
list_del(&lport->port_list);
- if (nvme_fc_waiting_to_unload && list_empty(&nvme_fc_lport_list))
- complete(&nvme_fc_unload_proceed);
spin_unlock_irqrestore(&nvme_fc_lock, flags);
ida_simple_remove(&nvme_fc_local_port_cnt, lport->localport.port_num);
@@ -3823,10 +3816,6 @@ static int __init nvme_fc_init_module(void)
{
int ret;
- nvme_fc_wq = alloc_workqueue("nvme_fc_wq", WQ_MEM_RECLAIM, 0);
- if (!nvme_fc_wq)
- return -ENOMEM;
-
/*
* NOTE:
* It is expected that in the future the kernel will combine
@@ -3844,7 +3833,7 @@ static int __init nvme_fc_init_module(void)
ret = class_register(&fc_class);
if (ret) {
pr_err("couldn't register class fc\n");
- goto out_destroy_wq;
+ return ret;
}
/*
@@ -3868,8 +3857,6 @@ static int __init nvme_fc_init_module(void)
device_destroy(&fc_class, MKDEV(0, 0));
out_destroy_class:
class_unregister(&fc_class);
-out_destroy_wq:
- destroy_workqueue(nvme_fc_wq);
return ret;
}
@@ -3889,45 +3876,23 @@ nvme_fc_delete_controllers(struct nvme_fc_rport *rport)
spin_unlock(&rport->lock);
}
-static void
-nvme_fc_cleanup_for_unload(void)
+static void __exit nvme_fc_exit_module(void)
{
struct nvme_fc_lport *lport;
struct nvme_fc_rport *rport;
-
- list_for_each_entry(lport, &nvme_fc_lport_list, port_list) {
- list_for_each_entry(rport, &lport->endp_list, endp_list) {
- nvme_fc_delete_controllers(rport);
- }
- }
-}
-
-static void __exit nvme_fc_exit_module(void)
-{
unsigned long flags;
- bool need_cleanup = false;
spin_lock_irqsave(&nvme_fc_lock, flags);
- nvme_fc_waiting_to_unload = true;
- if (!list_empty(&nvme_fc_lport_list)) {
- need_cleanup = true;
- nvme_fc_cleanup_for_unload();
- }
+ list_for_each_entry(lport, &nvme_fc_lport_list, port_list)
+ list_for_each_entry(rport, &lport->endp_list, endp_list)
+ nvme_fc_delete_controllers(rport);
spin_unlock_irqrestore(&nvme_fc_lock, flags);
- if (need_cleanup) {
- pr_info("%s: waiting for ctlr deletes\n", __func__);
- wait_for_completion(&nvme_fc_unload_proceed);
- pr_info("%s: ctrl deletes complete\n", __func__);
- }
+ flush_workqueue(nvme_delete_wq);
nvmf_unregister_transport(&nvme_fc_transport);
- ida_destroy(&nvme_fc_local_port_cnt);
- ida_destroy(&nvme_fc_ctrl_cnt);
-
device_destroy(&fc_class, MKDEV(0, 0));
class_unregister(&fc_class);
- destroy_workqueue(nvme_fc_wq);
}
module_init(nvme_fc_init_module);
--
2.33.0
From: Ranjan Kumar <ranjan.kumar(a)broadcom.com>
stable inclusion
from stable-v5.10.214
commit ba3a55d118bf211f4a7f53f83dafbc3405ea198d
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I9J6AL
CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?h=…
-------------------------
[ Upstream commit ee0017c3ed8a8abfa4d40e42f908fb38c31e7515 ]
If the driver detects that the controller is not ready before sending the
first IOC facts command, it will wait for a maximum of 10 seconds for it to
become ready. However, even if the controller becomes ready within 10
seconds, the driver will still issue a diagnostic reset.
Modify the driver to avoid sending a diag reset if the controller becomes
ready within the 10-second wait time.
Signed-off-by: Ranjan Kumar <ranjan.kumar(a)broadcom.com>
Link: https://lore.kernel.org/r/20240221071724.14986-1-ranjan.kumar@broadcom.com
Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
Signed-off-by: Baogen Shang <baogen.shang(a)windriver.com>
---
drivers/scsi/mpt3sas/mpt3sas_base.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c
index 4bf1ea39d21a..55200f5db150 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_base.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_base.c
@@ -6357,7 +6357,9 @@ _base_wait_for_iocstate(struct MPT3SAS_ADAPTER *ioc, int timeout)
return -EFAULT;
}
- issue_diag_reset:
+ return 0;
+
+issue_diag_reset:
rc = _base_diag_reset(ioc);
return rc;
}
--
2.33.0
From: Justin Tee <justin.tee(a)broadcom.com>
stable inclusion
from stable-v5.10.215
commit c473288f27d15014447de5a891bdf22a0695847a
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I9J6AL
CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?h=…
-------------------------
[ Upstream commit 2ae917d4bcab80ab304b774d492e2fcd6c52c06b ]
The call to lpfc_sli4_resume_rpi() in lpfc_rcv_padisc() may return an
unsuccessful status. In such cases, the elsiocb is not issued, the
completion is not called, and thus the elsiocb resource is leaked.
Check return value after calling lpfc_sli4_resume_rpi() and conditionally
release the elsiocb resource.
Signed-off-by: Justin Tee <justin.tee(a)broadcom.com>
Link: https://lore.kernel.org/r/20240131185112.149731-3-justintee8345@gmail.com
Reviewed-by: Himanshu Madhani <himanshu.madhani(a)oracle.com>
Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
Signed-off-by: Baogen Shang <baogen.shang(a)windriver.com>
---
drivers/scsi/lpfc/lpfc_nportdisc.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c
index 1e22364a31fc..d6287c58d504 100644
--- a/drivers/scsi/lpfc/lpfc_nportdisc.c
+++ b/drivers/scsi/lpfc/lpfc_nportdisc.c
@@ -784,8 +784,10 @@ lpfc_rcv_padisc(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
/* Save the ELS cmd */
elsiocb->drvrTimeout = cmd;
- lpfc_sli4_resume_rpi(ndlp,
- lpfc_mbx_cmpl_resume_rpi, elsiocb);
+ if (lpfc_sli4_resume_rpi(ndlp,
+ lpfc_mbx_cmpl_resume_rpi,
+ elsiocb))
+ kfree(elsiocb);
goto out;
}
}
--
2.33.0
From: Keith Busch <kbusch(a)kernel.org>
stable inclusion
from stable-v5.10.209
commit 06a33eec1dc28dfbcd62c1ff94a3229f3ee265bc
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I9J6AL
CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?h=…
-------------------------
[ Upstream commit 74fbc88e161424b3b96a22b23a8e3e1edab9d05c ]
The block layer doesn't support logical block sizes smaller than 512
bytes. The nvme spec doesn't support that small either, but the driver
isn't checking to make sure the device responded with usable data.
Failing to catch this will result in a kernel bug, either from a
division by zero when stacking, or a zero length bio.
Reviewed-by: Jens Axboe <axboe(a)kernel.dk>
Signed-off-by: Keith Busch <kbusch(a)kernel.org>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
Signed-off-by: Baogen Shang <baogen.shang(a)windriver.com>
---
drivers/nvme/host/core.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 142f3a03509c..9fcc05c4f88c 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2038,9 +2038,10 @@ static void nvme_update_disk_info(struct gendisk *disk,
/*
* The block layer can't support LBA sizes larger than the page size
- * yet, so catch this early and don't allow block I/O.
+ * or smaller than a sector size yet, so catch this early and don't
+ * allow block I/O.
*/
- if (ns->lba_shift > PAGE_SHIFT) {
+ if (ns->lba_shift > PAGE_SHIFT || ns->lba_shift < SECTOR_SHIFT) {
capacity = 0;
bs = (1 << 9);
}
--
2.33.0
From: Thinh Tran <thinhtr(a)linux.vnet.ibm.com>
stable inclusion
from stable-v5.10.209
commit 1059aa41c5a84abfab4cc7371d6b5ff2b30b6c2d
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I9J6AL
CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?h=…
-------------------------
[ Upstream commit 16b55b1f2269962fb6b5154b8bf43f37c9a96637 ]
When an EEH error is encountered by a PCI adapter, the EEH driver
modifies the PCI channel's state as shown below:
enum {
/* I/O channel is in normal state */
pci_channel_io_normal = (__force pci_channel_state_t) 1,
/* I/O to channel is blocked */
pci_channel_io_frozen = (__force pci_channel_state_t) 2,
/* PCI card is dead */
pci_channel_io_perm_failure = (__force pci_channel_state_t) 3,
};
If the same EEH error then causes the tg3 driver's transmit timeout
logic to execute, the tg3_tx_timeout() function schedules a reset
task via tg3_reset_task_schedule(), which may cause a race condition
between the tg3 and EEH driver as both attempt to recover the HW via
a reset action.
EEH driver gets error event
--> eeh_set_channel_state()
and set device to one of
error state above scheduler: tg3_reset_task() get
returned error from tg3_init_hw()
--> dev_close() shuts down the interface
tg3_io_slot_reset() and
tg3_io_resume() fail to
reset/resume the device
To resolve this issue, we avoid the race condition by checking the PCI
channel state in the tg3_reset_task() function and skip the tg3 driver
initiated reset when the PCI channel is not in the normal state. (The
driver has no access to tg3 device registers at this point and cannot
even complete the reset task successfully without external assistance.)
We'll leave the reset procedure to be managed by the EEH driver which
calls the tg3_io_error_detected(), tg3_io_slot_reset() and
tg3_io_resume() functions as appropriate.
Adding the same checking in tg3_dump_state() to avoid dumping all
device registers when the PCI channel is not in the normal state.
Signed-off-by: Thinh Tran <thinhtr(a)linux.vnet.ibm.com>
Tested-by: Venkata Sai Duggi <venkata.sai.duggi(a)ibm.com>
Reviewed-by: David Christensen <drc(a)linux.vnet.ibm.com>
Reviewed-by: Michael Chan <michael.chan(a)broadcom.com>
Link: https://lore.kernel.org/r/20231201001911.656-1-thinhtr@linux.vnet.ibm.com
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
Signed-off-by: Baogen Shang <baogen.shang(a)windriver.com>
---
drivers/net/ethernet/broadcom/tg3.c | 11 ++++++++++-
1 file changed, 10 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index 4e74a3d44d1e..56ca913f0c2d 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -6454,6 +6454,14 @@ static void tg3_dump_state(struct tg3 *tp)
int i;
u32 *regs;
+ /* If it is a PCI error, all registers will be 0xffff,
+ * we don't dump them out, just report the error and return
+ */
+ if (tp->pdev->error_state != pci_channel_io_normal) {
+ netdev_err(tp->dev, "PCI channel ERROR!\n");
+ return;
+ }
+
regs = kzalloc(TG3_REG_BLK_SIZE, GFP_ATOMIC);
if (!regs)
return;
@@ -11195,7 +11203,8 @@ static void tg3_reset_task(struct work_struct *work)
rtnl_lock();
tg3_full_lock(tp, 0);
- if (tp->pcierr_recovery || !netif_running(tp->dev)) {
+ if (tp->pcierr_recovery || !netif_running(tp->dev) ||
+ tp->pdev->error_state != pci_channel_io_normal) {
tg3_flag_clear(tp, RESET_TASK_PENDING);
tg3_full_unlock(tp);
rtnl_unlock();
--
2.33.0
From: Zhipeng Lu <alexious(a)zju.edu.cn>
stable inclusion
from stable-v5.10.209
commit aeed2b4e4a70c7568d4a5eecd6a109713c0dfbf4
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I9J6AL
CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?h=…
-------------------------
[ Upstream commit ac16667237a82e2597e329eb9bc520d1cf9dff30 ]
When the allocation of
adev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries fails,
amdgpu_free_extended_power_table is called to free some fields of adev.
However, when the control flow returns to si_dpm_sw_init, it goes to
label dpm_failed and calls si_dpm_fini, which calls
amdgpu_free_extended_power_table again and free those fields again. Thus
a double-free is triggered.
Fixes: 841686df9f7d ("drm/amdgpu: add SI DPM support (v4)")
Signed-off-by: Zhipeng Lu <alexious(a)zju.edu.cn>
Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
Signed-off-by: Baogen Shang <baogen.shang(a)windriver.com>
---
drivers/gpu/drm/amd/pm/powerplay/si_dpm.c | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/amd/pm/powerplay/si_dpm.c b/drivers/gpu/drm/amd/pm/powerplay/si_dpm.c
index d6544a6dabc7..6f0653c81f8f 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/si_dpm.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/si_dpm.c
@@ -7349,10 +7349,9 @@ static int si_dpm_init(struct amdgpu_device *adev)
kcalloc(4,
sizeof(struct amdgpu_clock_voltage_dependency_entry),
GFP_KERNEL);
- if (!adev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries) {
- amdgpu_free_extended_power_table(adev);
+ if (!adev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries)
return -ENOMEM;
- }
+
adev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.count = 4;
adev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries[0].clk = 0;
adev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries[0].v = 0;
--
2.33.0