From: Zhigang Luo Zhigang.Luo@amd.com
mainline inclusion from mainline-v6.10-rc1 commit dfb15c4ab58658aaa6161b546e7eb852ae7cc132 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/I9U8NU CVE: CVE-2024-36949
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
If there are more than one device doing reset in parallel, the first device will call kfd_suspend_all_processes() to evict all processes on all devices, this call takes time to finish. other device will start reset and recover without waiting. if the process has not been evicted before doing recover, it will be restored, then caused page fault.
Signed-off-by: Zhigang Luo Zhigang.Luo@amd.com Reviewed-by: Felix Kuehling felix.kuehling@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com
Conflicts: drivers/gpu/drm/amd/amdkfd/kfd_device.c [Some contexts different. No functional impact.] Signed-off-by: Zheng Zucheng zhengzucheng@huawei.com --- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index e3d7a408d659..9f2eb8cf744a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -603,16 +603,14 @@ bool kfd_is_locked(void)
void kgd2kfd_suspend(struct kfd_dev *kfd) { - int count; if (!kfd->init_complete) return;
mutex_lock(&kfd_processes_mutex); - count = ++kfd_locked; - mutex_unlock(&kfd_processes_mutex); /* For first KFD device suspend all the KFD processes */ - if (count == 1) + if (++kfd_locked == 1) kfd_suspend_all_processes(); + mutex_unlock(&kfd_processes_mutex);
kfd->dqm->ops.stop(kfd->dqm);
@@ -621,7 +619,7 @@ void kgd2kfd_suspend(struct kfd_dev *kfd)
int kgd2kfd_resume(struct kfd_dev *kfd) { - int ret, count; + int ret;
if (!kfd->init_complete) return 0; @@ -631,11 +629,10 @@ int kgd2kfd_resume(struct kfd_dev *kfd) return ret;
mutex_lock(&kfd_processes_mutex); - count = --kfd_locked; - mutex_unlock(&kfd_processes_mutex); - WARN_ONCE(count < 0, "KFD suspend / resume ref. error"); - if (count == 0) + if (--kfd_locked == 0) ret = kfd_resume_all_processes(); + WARN_ONCE(kfd_locked < 0, "KFD suspend / resume ref. error"); + mutex_unlock(&kfd_processes_mutex);
return ret; }