summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPhilip Yang <Philip.Yang@amd.com>2020-11-30 16:02:00 -0500
committerPhilip Yang <Philip.Yang@amd.com>2020-11-30 18:28:16 -0500
commit47eb3d5e2acef1e99c6e071d4b83ab42ef6ea5d3 (patch)
tree6215820dd201587bfb805e9d2e4b139c4cd00e64
parent170291cad6477ebedfa2126b6d0f7fdcc75d41df (diff)
drm/amdkfd: keep BOs in system memory if restore failed
If vram is used up, display allocate vram evict the KFD BOs to system memory. KFD schedule restore work to restore BOs back to vram. If display BOs are pinned in vram, KFD restore work will keep retry, and may never success. If restore BO back to vram failed, keep the BO in system memory to prevent endless retry restore, and GPU mapping will update to system memory. Signed-off-by: Philip Yang <Philip.Yang@amd.com> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c17
1 files changed, 15 insertions, 2 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 32838fed950d..c23e6692c55e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -2369,6 +2369,8 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
int ret = 0, i;
struct list_head duplicate_save;
struct amdgpu_sync sync_obj;
+ unsigned long failed_size = 0;
+ unsigned long total_size = 0;
INIT_LIST_HEAD(&duplicate_save);
INIT_LIST_HEAD(&ctx.list);
@@ -2425,10 +2427,18 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
uint32_t domain = mem->domain;
struct kfd_bo_va_list *bo_va_entry;
+ total_size += amdgpu_bo_size(bo);
+
ret = amdgpu_amdkfd_bo_validate(bo, domain, false);
if (ret) {
- pr_debug("Memory eviction: Validate BOs failed. Try again\n");
- goto validate_map_fail;
+ pr_debug("Memory eviction: Validate BOs failed\n");
+ failed_size += amdgpu_bo_size(bo);
+ ret = amdgpu_amdkfd_bo_validate(bo,
+ AMDGPU_GEM_DOMAIN_GTT, false);
+ if (ret) {
+ pr_debug("Memory eviction: Try again\n");
+ goto validate_map_fail;
+ }
}
ret = amdgpu_sync_fence(&sync_obj, bo->tbo.moving);
if (ret) {
@@ -2448,6 +2458,9 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
}
}
+ if (failed_size)
+ pr_debug("0x%lx/0x%lx in system\n", failed_size, total_size);
+
/* Update page directories */
ret = process_update_pds(process_info, &sync_obj);
if (ret) {