fixing CVE-2024-36022
Ahmad Rehman (1): [Backport] drm/amdgpu: Init zone device and drm client after mode-1 reset on reload
Felix Kuehling (1): [Backport] drm/amdkfd: Export DMABufs from KFD using GEM handles
Flora Cui (1): [Backport] drm/amdkfd: init drm_client with funcs hook
Le Ma (1): [Backport] drm/amdgpu: move the drm client creation behind drm device registration
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 24 ++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 7 ++++ .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 33 +++++++++++++++---- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 9 ++++- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 4 +-- 5 files changed, 67 insertions(+), 10 deletions(-)
From: Felix Kuehling Felix.Kuehling@amd.com
mainline inclusion from mainline-v6.8-rc1 commit 1819200166ce511ac298dc96b9b17eb655a9edc4 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/I9TM7F CVE: CVE-2024-36022
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
Create GEM handles for exporting DMABufs using GEM-Prime APIs. The GEM handles are created in a drm_client_dev context to avoid exposing them in user mode contexts through a DMABuf import.
Signed-off-by: Felix Kuehling Felix.Kuehling@amd.com Reviewed-by: Ramesh Errabolu Ramesh.Errabolu@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Conflicts: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c [Due to not merge commit 0e2e7c5b3d71 drm/amdgpu: Attach eviction fence on alloc] Signed-off-by: Liu Chuang liuchuang40@huawei.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 11 +++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 5 +++ .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 33 +++++++++++++++---- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 4 +-- 4 files changed, 44 insertions(+), 9 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 25d5fda5b243..fd78935e618b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -141,6 +141,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) { int i; int last_valid_bit; + int ret;
amdgpu_amdkfd_gpuvm_init_mem_limits();
@@ -159,6 +160,12 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) .enable_mes = adev->enable_mes, };
+ ret = drm_client_init(&adev->ddev, &adev->kfd.client, "kfd", NULL); + if (ret) { + dev_err(adev->dev, "Failed to init DRM client: %d\n", ret); + return; + } + /* this is going to have a few of the MSBs set that we need to * clear */ @@ -197,6 +204,10 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
adev->kfd.init_complete = kgd2kfd_device_init(adev->kfd.dev, &gpu_resources); + if (adev->kfd.init_complete) + drm_client_register(&adev->kfd.client); + else + drm_client_release(&adev->kfd.client);
amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 2fe9860725bd..e859b4930414 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -33,6 +33,7 @@ #include <linux/mmu_notifier.h> #include <linux/memremap.h> #include <kgd_kfd_interface.h> +#include <drm/drm_client.h> #include "amdgpu_sync.h" #include "amdgpu_vm.h" #include "amdgpu_xcp.h" @@ -83,6 +84,7 @@ struct kgd_mem {
struct amdgpu_sync sync;
+ uint32_t gem_handle; bool aql_queue; bool is_imported; }; @@ -105,6 +107,9 @@ struct amdgpu_kfd_dev {
/* HMM page migration MEMORY_DEVICE_PRIVATE mapping */ struct dev_pagemap pgmap; + + /* Client for KFD BO GEM handle allocations */ + struct drm_client_dev client; };
enum kgd_engine_type { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 15c5a2533ba6..ef1d4ae4cd8c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -25,6 +25,7 @@ #include <linux/pagemap.h> #include <linux/sched/mm.h> #include <linux/sched/task.h> +#include <linux/fdtable.h> #include <drm/ttm/ttm_tt.h>
#include <drm/drm_exec.h> @@ -774,13 +775,22 @@ kfd_mem_dmaunmap_attachment(struct kgd_mem *mem, static int kfd_mem_export_dmabuf(struct kgd_mem *mem) { if (!mem->dmabuf) { - struct dma_buf *ret = amdgpu_gem_prime_export( - &mem->bo->tbo.base, + struct amdgpu_device *bo_adev; + struct dma_buf *dmabuf; + int r, fd; + + bo_adev = amdgpu_ttm_adev(mem->bo->tbo.bdev); + r = drm_gem_prime_handle_to_fd(&bo_adev->ddev, bo_adev->kfd.client.file, + mem->gem_handle, mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ? - DRM_RDWR : 0); - if (IS_ERR(ret)) - return PTR_ERR(ret); - mem->dmabuf = ret; + DRM_RDWR : 0, &fd); + if (r) + return r; + dmabuf = dma_buf_get(fd); + close_fd(fd); + if (WARN_ON_ONCE(IS_ERR(dmabuf))) + return PTR_ERR(dmabuf); + mem->dmabuf = dmabuf; }
return 0; @@ -1737,6 +1747,9 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( pr_debug("Failed to allow vma node access. ret %d\n", ret); goto err_node_allow; } + ret = drm_gem_handle_create(adev->kfd.client.file, gobj, &(*mem)->gem_handle); + if (ret) + goto err_gem_handle_create; bo = gem_to_amdgpu_bo(gobj); if (bo_type == ttm_bo_type_sg) { bo->tbo.sg = sg; @@ -1778,6 +1791,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( allocate_init_user_pages_failed: err_pin_bo: remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info); + drm_gem_handle_delete(adev->kfd.client.file, (*mem)->gem_handle); +err_gem_handle_create: drm_vma_node_revoke(&gobj->vma_node, drm_priv); err_node_allow: /* Don't unreserve system mem limit twice */ @@ -1893,8 +1908,12 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
/* Free the BO*/ drm_vma_node_revoke(&mem->bo->tbo.base.vma_node, drm_priv); - if (mem->dmabuf) + if (!mem->is_imported) + drm_gem_handle_delete(adev->kfd.client.file, mem->gem_handle); + if (mem->dmabuf) { dma_buf_put(mem->dmabuf); + mem->dmabuf = NULL; + } mutex_destroy(&mem->lock);
/* If this releases the last reference, it will end up calling diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index d33ba4fe9ad5..559464dab7c2 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1851,8 +1851,8 @@ static uint32_t get_process_num_bos(struct kfd_process *p) return num_of_bos; }
-static int criu_get_prime_handle(struct kgd_mem *mem, int flags, - u32 *shared_fd) +static int criu_get_prime_handle(struct kgd_mem *mem, + int flags, u32 *shared_fd) { struct dma_buf *dmabuf; int ret;
From: Flora Cui flora.cui@amd.com
mainline inclusion from mainline-v6.8-rc1 commit 733965a90f885de5e9626d383c7605a0a7850074 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/I9TM7F CVE: CVE-2024-36022
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
otherwise drm_client_dev_unregister() would try to kfree(&adev->kfd.client).
Fixes: 1819200166ce ("drm/amdkfd: Export DMABufs from KFD using GEM handles") Signed-off-by: Flora Cui flora.cui@amd.com Reviewed-by: Felix Kuehling felix.kuehling@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Liu Chuang liuchuang40@huawei.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index fd78935e618b..0e89042055ab 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -137,6 +137,9 @@ static void amdgpu_amdkfd_reset_work(struct work_struct *work) amdgpu_device_gpu_recover(adev, NULL, &reset_context); }
+static const struct drm_client_funcs kfd_client_funcs = { + .unregister = drm_client_release, +}; void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) { int i; @@ -160,7 +163,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) .enable_mes = adev->enable_mes, };
- ret = drm_client_init(&adev->ddev, &adev->kfd.client, "kfd", NULL); + ret = drm_client_init(&adev->ddev, &adev->kfd.client, "kfd", &kfd_client_funcs); if (ret) { dev_err(adev->dev, "Failed to init DRM client: %d\n", ret); return;
From: Le Ma le.ma@amd.com
mainline inclusion from mainline-v6.8-rc3 commit c0125b848abecfbc944bebe2cab076f09455b230 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/I9TM7F CVE: CVE-2024-36022
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
This patch is to eliminate interrupt warning below:
"[drm] Fence fallback timer expired on ring sdma0.0".
An early vm pt clearing job is sent to SDMA ahead of interrupt enabled. And re-locating the drm client creation following after drm_dev_register looks like a more proper flow.
v2: wrap the drm client creation
Fixes: 1819200166ce ("drm/amdkfd: Export DMABufs from KFD using GEM handles") Signed-off-by: Le Ma le.ma@amd.com Reviewed-by: Felix Kuehling felix.kuehling@amd.com Reviewed-by: Lijo Lazar lijo.lazar@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Liu Chuang liuchuang40@huawei.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 32 ++++++++++++++-------- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 4 +++ 3 files changed, 27 insertions(+), 11 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 0e89042055ab..4b7c84686a55 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -140,11 +140,31 @@ static void amdgpu_amdkfd_reset_work(struct work_struct *work) static const struct drm_client_funcs kfd_client_funcs = { .unregister = drm_client_release, }; + +int amdgpu_amdkfd_drm_client_create(struct amdgpu_device *adev) +{ + int ret; + + if (!adev->kfd.init_complete) + return 0; + + ret = drm_client_init(&adev->ddev, &adev->kfd.client, "kfd", + &kfd_client_funcs); + if (ret) { + dev_err(adev->dev, "Failed to init DRM client: %d\n", + ret); + return ret; + } + + drm_client_register(&adev->kfd.client); + + return 0; +} + void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) { int i; int last_valid_bit; - int ret;
amdgpu_amdkfd_gpuvm_init_mem_limits();
@@ -163,12 +183,6 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) .enable_mes = adev->enable_mes, };
- ret = drm_client_init(&adev->ddev, &adev->kfd.client, "kfd", &kfd_client_funcs); - if (ret) { - dev_err(adev->dev, "Failed to init DRM client: %d\n", ret); - return; - } - /* this is going to have a few of the MSBs set that we need to * clear */ @@ -207,10 +221,6 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
adev->kfd.init_complete = kgd2kfd_device_init(adev->kfd.dev, &gpu_resources); - if (adev->kfd.init_complete) - drm_client_register(&adev->kfd.client); - else - drm_client_release(&adev->kfd.client);
amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index e859b4930414..efe0329a9f8e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -187,6 +187,8 @@ int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev, struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context, struct mm_struct *mm, struct svm_range_bo *svm_bo); + +int amdgpu_amdkfd_drm_client_create(struct amdgpu_device *adev); #if defined(CONFIG_DEBUG_FS) int kfd_debugfs_kfd_mem_limits(struct seq_file *m, void *data); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index f9bc38d20ce3..f7613edffb65 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2161,6 +2161,10 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, if (ret) goto err_pci;
+ ret = amdgpu_amdkfd_drm_client_create(adev); + if (ret) + goto err_pci; + /* * 1. don't init fbdev on hw without DCE * 2. don't init fbdev if there are no connectors
From: Ahmad Rehman Ahmad.Rehman@amd.com
mainline inclusion from mainline-v6.9-rc1 commit f679fd6057fbf5ab34aaee28d58b7f81af0cbf48 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/I9TM7F CVE: CVE-2024-36022
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
In passthrough environment, when amdgpu is reloaded after unload, mode-1 is triggered after initializing the necessary IPs, That init does not include KFD, and KFD init waits until the reset is completed. KFD init is called in the reset handler, but in this case, the zone device and drm client is not initialized, causing app to create kernel panic.
v2: Removing the init KFD condition from amdgpu_amdkfd_drm_client_create. As the previous version has the potential of creating DRM client twice.
v3: v2 patch results in SDMA engine hung as DRM open causes VM clear to SDMA before SDMA init. Adding the condition to in drm client creation, on top of v1, to guard against drm client creation call multiple times.
Signed-off-by: Ahmad Rehman Ahmad.Rehman@amd.com Reviewed-by: Felix Kuehling Felix.Kuehling@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Liu Chuang liuchuang40@huawei.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 4b7c84686a55..458d959989e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -145,7 +145,7 @@ int amdgpu_amdkfd_drm_client_create(struct amdgpu_device *adev) { int ret;
- if (!adev->kfd.init_complete) + if (!adev->kfd.init_complete || adev->kfd.client.dev) return 0;
ret = drm_client_init(&adev->ddev, &adev->kfd.client, "kfd", diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index f7613edffb65..18d456694ab7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2376,8 +2376,11 @@ static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work) } for (i = 0; i < mgpu_info.num_dgpu; i++) { adev = mgpu_info.gpu_ins[i].adev; - if (!adev->kfd.init_complete) + if (!adev->kfd.init_complete) { + kgd2kfd_init_zone_device(adev); amdgpu_amdkfd_device_init(adev); + amdgpu_amdkfd_drm_client_create(adev); + } amdgpu_ttm_set_buffer_funcs_status(adev, true); } }
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/10167 邮件列表地址:https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/I...
FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/10167 Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/I...