From: Dusica Milinkovic Dusica.Milinkovic@amd.com
stable inclusion from stable-v5.10.141 commit afa169f79d479edb23f1a727a37265a25d5a1e8e category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I685FC
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
[ Upstream commit 373008bfc9cdb0f050258947fa5a095f0657e1bc ]
[Why] During multi-vf executing benchmark (Luxmark) observed kiq error timeout. It happenes because all of VFs do the tlb invalidation at the same time. Although each VF has the invalidate register set, from hardware side the invalidate requests are queue to execute.
[How] In case of 12 VF increase timeout on 12*100ms
Signed-off-by: Dusica Milinkovic Dusica.Milinkovic@amd.com Acked-by: Shaoyun Liu shaoyun.liu@amd.com Acked-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Jialin Zhang zhangjialin11@huawei.com Reviewed-by: Zheng Zengkai zhengzengkai@huawei.com --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +- drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 3 ++- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 3 ++- 3 files changed, 5 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index d949d6c52f24..ff5555353eb4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -283,7 +283,7 @@ enum amdgpu_kiq_irq { AMDGPU_CP_KIQ_IRQ_DRIVER0 = 0, AMDGPU_CP_KIQ_IRQ_LAST }; - +#define SRIOV_USEC_TIMEOUT 1200000 /* wait 12 * 100ms for SRIOV */ #define MAX_KIQ_REG_WAIT 5000 /* in usecs, 5ms */ #define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */ #define MAX_KIQ_REG_TRY 80 /* 20 -> 80 */ diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 150fa5258fb6..2aa9242c58ab 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -371,6 +371,7 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint32_t seq; uint16_t queried_pasid; bool ret; + u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT : adev->usec_timeout; struct amdgpu_ring *ring = &adev->gfx.kiq.ring; struct amdgpu_kiq *kiq = &adev->gfx.kiq;
@@ -389,7 +390,7 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
amdgpu_ring_commit(ring); spin_unlock(&adev->gfx.kiq.ring_lock); - r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); + r = amdgpu_fence_wait_polling(ring, seq, usec_timeout); if (r < 1) { dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r); return -ETIME; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 3a864041968f..1673bf3bae55 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -839,6 +839,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint32_t seq; uint16_t queried_pasid; bool ret; + u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT : adev->usec_timeout; struct amdgpu_ring *ring = &adev->gfx.kiq.ring; struct amdgpu_kiq *kiq = &adev->gfx.kiq;
@@ -878,7 +879,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
amdgpu_ring_commit(ring); spin_unlock(&adev->gfx.kiq.ring_lock); - r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); + r = amdgpu_fence_wait_polling(ring, seq, usec_timeout); if (r < 1) { dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r); up_read(&adev->reset_sem);