diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 261 |
1 files changed, 211 insertions, 50 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 716f2afeb6a9..887483b8b818 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -34,6 +34,7 @@ #include <ttm/ttm_placement.h> #include <ttm/ttm_module.h> #include <ttm/ttm_page_alloc.h> +#include <ttm/ttm_memory.h> #include <drm/drmP.h> #include <drm/amdgpu_drm.h> #include <linux/seq_file.h> @@ -74,7 +75,7 @@ static void amdgpu_ttm_mem_global_release(struct drm_global_reference *ref) ttm_mem_global_release(ref->object); } -static int amdgpu_ttm_global_init(struct amdgpu_device *adev) +int amdgpu_ttm_global_init(struct amdgpu_device *adev) { struct drm_global_reference *global_ref; struct amdgpu_ring *ring; @@ -88,10 +89,10 @@ static int amdgpu_ttm_global_init(struct amdgpu_device *adev) global_ref->init = &amdgpu_ttm_mem_global_init; global_ref->release = &amdgpu_ttm_mem_global_release; r = drm_global_item_ref(global_ref); - if (r != 0) { + if (r) { DRM_ERROR("Failed setting up TTM memory accounting " "subsystem.\n"); - return r; + goto error_mem; } adev->mman.bo_global_ref.mem_glob = @@ -102,26 +103,30 @@ static int amdgpu_ttm_global_init(struct amdgpu_device *adev) global_ref->init = &ttm_bo_global_init; global_ref->release = &ttm_bo_global_release; r = drm_global_item_ref(global_ref); - if (r != 0) { + if (r) { DRM_ERROR("Failed setting up TTM BO subsystem.\n"); - drm_global_item_unref(&adev->mman.mem_global_ref); - return r; + goto error_bo; } ring = adev->mman.buffer_funcs_ring; rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_KERNEL]; r = amd_sched_entity_init(&ring->sched, &adev->mman.entity, rq, amdgpu_sched_jobs); - if (r != 0) { + if (r) { DRM_ERROR("Failed setting up TTM BO move run queue.\n"); - drm_global_item_unref(&adev->mman.mem_global_ref); - drm_global_item_unref(&adev->mman.bo_global_ref.ref); - return r; + goto error_entity; } adev->mman.mem_global_referenced = true; return 0; + +error_entity: + drm_global_item_unref(&adev->mman.bo_global_ref.ref); +error_bo: + drm_global_item_unref(&adev->mman.mem_global_ref); +error_mem: + return r; } static void amdgpu_ttm_global_fini(struct amdgpu_device *adev) @@ -155,7 +160,7 @@ static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type, man->default_caching = TTM_PL_FLAG_CACHED; break; case TTM_PL_TT: - man->func = &ttm_bo_manager_func; + man->func = &amdgpu_gtt_mgr_func; man->gpu_offset = adev->mc.gtt_start; man->available_caching = TTM_PL_MASK_CACHING; man->default_caching = TTM_PL_FLAG_CACHED; @@ -190,12 +195,13 @@ static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type, static void amdgpu_evict_flags(struct ttm_buffer_object *bo, struct ttm_placement *placement) { - struct amdgpu_bo *rbo; + struct amdgpu_bo *abo; static struct ttm_place placements = { .fpfn = 0, .lpfn = 0, .flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM }; + unsigned i; if (!amdgpu_ttm_bo_is_amdgpu_bo(bo)) { placement->placement = &placements; @@ -204,28 +210,44 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, placement->num_busy_placement = 1; return; } - rbo = container_of(bo, struct amdgpu_bo, tbo); + abo = container_of(bo, struct amdgpu_bo, tbo); switch (bo->mem.mem_type) { case TTM_PL_VRAM: - if (rbo->adev->mman.buffer_funcs_ring->ready == false) - amdgpu_ttm_placement_from_domain(rbo, AMDGPU_GEM_DOMAIN_CPU); - else - amdgpu_ttm_placement_from_domain(rbo, AMDGPU_GEM_DOMAIN_GTT); + if (abo->adev->mman.buffer_funcs_ring->ready == false) { + amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU); + } else { + amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT); + for (i = 0; i < abo->placement.num_placement; ++i) { + if (!(abo->placements[i].flags & + TTM_PL_FLAG_TT)) + continue; + + if (abo->placements[i].lpfn) + continue; + + /* set an upper limit to force directly + * allocating address space for the BO. + */ + abo->placements[i].lpfn = + abo->adev->mc.gtt_size >> PAGE_SHIFT; + } + } break; case TTM_PL_TT: default: - amdgpu_ttm_placement_from_domain(rbo, AMDGPU_GEM_DOMAIN_CPU); + amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU); } - *placement = rbo->placement; + *placement = abo->placement; } static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp) { - struct amdgpu_bo *rbo = container_of(bo, struct amdgpu_bo, tbo); + struct amdgpu_bo *abo = container_of(bo, struct amdgpu_bo, tbo); if (amdgpu_ttm_tt_get_usermm(bo->ttm)) return -EPERM; - return drm_vma_node_verify_access(&rbo->gem_base.vma_node, filp); + return drm_vma_node_verify_access(&abo->gem_base.vma_node, + filp->private_data); } static void amdgpu_move_null(struct ttm_buffer_object *bo, @@ -251,26 +273,30 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo, adev = amdgpu_get_adev(bo->bdev); ring = adev->mman.buffer_funcs_ring; - old_start = (u64)old_mem->start << PAGE_SHIFT; - new_start = (u64)new_mem->start << PAGE_SHIFT; switch (old_mem->mem_type) { - case TTM_PL_VRAM: - old_start += adev->mc.vram_start; - break; case TTM_PL_TT: - old_start += adev->mc.gtt_start; + r = amdgpu_ttm_bind(bo, old_mem); + if (r) + return r; + + case TTM_PL_VRAM: + old_start = (u64)old_mem->start << PAGE_SHIFT; + old_start += bo->bdev->man[old_mem->mem_type].gpu_offset; break; default: DRM_ERROR("Unknown placement %d\n", old_mem->mem_type); return -EINVAL; } switch (new_mem->mem_type) { - case TTM_PL_VRAM: - new_start += adev->mc.vram_start; - break; case TTM_PL_TT: - new_start += adev->mc.gtt_start; + r = amdgpu_ttm_bind(bo, new_mem); + if (r) + return r; + + case TTM_PL_VRAM: + new_start = (u64)new_mem->start << PAGE_SHIFT; + new_start += bo->bdev->man[new_mem->mem_type].gpu_offset; break; default: DRM_ERROR("Unknown placement %d\n", old_mem->mem_type); @@ -285,7 +311,7 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo, r = amdgpu_copy_buffer(ring, old_start, new_start, new_mem->num_pages * PAGE_SIZE, /* bytes */ - bo->resv, &fence); + bo->resv, &fence, false); if (r) return r; @@ -314,7 +340,7 @@ static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, placement.num_busy_placement = 1; placement.busy_placement = &placements; placements.fpfn = 0; - placements.lpfn = 0; + placements.lpfn = adev->mc.gtt_size >> PAGE_SHIFT; placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT; r = ttm_bo_mem_space(bo, &placement, &tmp_mem, interruptible, no_wait_gpu); @@ -335,7 +361,7 @@ static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, if (unlikely(r)) { goto out_cleanup; } - r = ttm_bo_move_ttm(bo, true, interruptible, no_wait_gpu, new_mem); + r = ttm_bo_move_ttm(bo, interruptible, no_wait_gpu, new_mem); out_cleanup: ttm_bo_mem_put(bo, &tmp_mem); return r; @@ -361,14 +387,14 @@ static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, placement.num_busy_placement = 1; placement.busy_placement = &placements; placements.fpfn = 0; - placements.lpfn = 0; + placements.lpfn = adev->mc.gtt_size >> PAGE_SHIFT; placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT; r = ttm_bo_mem_space(bo, &placement, &tmp_mem, interruptible, no_wait_gpu); if (unlikely(r)) { return r; } - r = ttm_bo_move_ttm(bo, true, interruptible, no_wait_gpu, &tmp_mem); + r = ttm_bo_move_ttm(bo, interruptible, no_wait_gpu, &tmp_mem); if (unlikely(r)) { goto out_cleanup; } @@ -435,8 +461,7 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, if (r) { memcpy: - r = ttm_bo_move_memcpy(bo, evict, interruptible, - no_wait_gpu, new_mem); + r = ttm_bo_move_memcpy(bo, interruptible, no_wait_gpu, new_mem); if (r) { return r; } @@ -524,6 +549,7 @@ struct amdgpu_ttm_tt { spinlock_t guptasklock; struct list_head guptasks; atomic_t mmu_invalidations; + struct list_head list; }; int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) @@ -641,7 +667,6 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem) { struct amdgpu_ttm_tt *gtt = (void*)ttm; - uint32_t flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem); int r; if (gtt->userptr) { @@ -651,7 +676,6 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm, return r; } } - gtt->offset = (unsigned long)(bo_mem->start << PAGE_SHIFT); if (!ttm->num_pages) { WARN(1, "nothing to bind %lu pages for mreg %p back %p!\n", ttm->num_pages, bo_mem, ttm); @@ -662,14 +686,71 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm, bo_mem->mem_type == AMDGPU_PL_OA) return -EINVAL; + return 0; +} + +bool amdgpu_ttm_is_bound(struct ttm_tt *ttm) +{ + struct amdgpu_ttm_tt *gtt = (void *)ttm; + + return gtt && !list_empty(>t->list); +} + +int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem) +{ + struct ttm_tt *ttm = bo->ttm; + struct amdgpu_ttm_tt *gtt = (void *)bo->ttm; + uint32_t flags; + int r; + + if (!ttm || amdgpu_ttm_is_bound(ttm)) + return 0; + + r = amdgpu_gtt_mgr_alloc(&bo->bdev->man[TTM_PL_TT], bo, + NULL, bo_mem); + if (r) { + DRM_ERROR("Failed to allocate GTT address space (%d)\n", r); + return r; + } + + flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem); + gtt->offset = (u64)bo_mem->start << PAGE_SHIFT; r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages, ttm->pages, gtt->ttm.dma_address, flags); if (r) { - DRM_ERROR("failed to bind %lu pages at 0x%08X\n", - ttm->num_pages, (unsigned)gtt->offset); + DRM_ERROR("failed to bind %lu pages at 0x%08llX\n", + ttm->num_pages, gtt->offset); return r; } + spin_lock(>t->adev->gtt_list_lock); + list_add_tail(>t->list, >t->adev->gtt_list); + spin_unlock(>t->adev->gtt_list_lock); + return 0; +} + +int amdgpu_ttm_recover_gart(struct amdgpu_device *adev) +{ + struct amdgpu_ttm_tt *gtt, *tmp; + struct ttm_mem_reg bo_mem; + uint32_t flags; + int r; + + bo_mem.mem_type = TTM_PL_TT; + spin_lock(&adev->gtt_list_lock); + list_for_each_entry_safe(gtt, tmp, &adev->gtt_list, list) { + flags = amdgpu_ttm_tt_pte_flags(gtt->adev, >t->ttm.ttm, &bo_mem); + r = amdgpu_gart_bind(adev, gtt->offset, gtt->ttm.ttm.num_pages, + gtt->ttm.ttm.pages, gtt->ttm.dma_address, + flags); + if (r) { + spin_unlock(&adev->gtt_list_lock); + DRM_ERROR("failed to bind %lu pages at 0x%08llX\n", + gtt->ttm.ttm.num_pages, gtt->offset); + return r; + } + } + spin_unlock(&adev->gtt_list_lock); return 0; } @@ -677,12 +758,19 @@ static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm) { struct amdgpu_ttm_tt *gtt = (void *)ttm; + if (gtt->userptr) + amdgpu_ttm_tt_unpin_userptr(ttm); + + if (!amdgpu_ttm_is_bound(ttm)) + return 0; + /* unbind shouldn't be done for GDS/GWS/OA in ttm_bo_clean_mm */ if (gtt->adev->gart.ready) amdgpu_gart_unbind(gtt->adev, gtt->offset, ttm->num_pages); - if (gtt->userptr) - amdgpu_ttm_tt_unpin_userptr(ttm); + spin_lock(>t->adev->gtt_list_lock); + list_del_init(>t->list); + spin_unlock(>t->adev->gtt_list_lock); return 0; } @@ -720,6 +808,7 @@ static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_bo_device *bdev, kfree(gtt); return NULL; } + INIT_LIST_HEAD(>t->list); return >t->ttm.ttm; } @@ -991,10 +1080,6 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) unsigned i, j; int r; - r = amdgpu_ttm_global_init(adev); - if (r) { - return r; - } /* No others user of address space so set it to 0 */ r = ttm_bo_device_init(&adev->mman.bdev, adev->mman.bo_global_ref.ref.object, @@ -1159,7 +1244,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t dst_offset, uint32_t byte_count, struct reservation_object *resv, - struct fence **fence) + struct fence **fence, bool direct_submit) { struct amdgpu_device *adev = ring->adev; struct amdgpu_job *job; @@ -1203,8 +1288,79 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, amdgpu_ring_pad_ib(ring, &job->ibs[0]); WARN_ON(job->ibs[0].length_dw > num_dw); + if (direct_submit) { + r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, + NULL, NULL, fence); + job->fence = fence_get(*fence); + if (r) + DRM_ERROR("Error scheduling IBs (%d)\n", r); + amdgpu_job_free(job); + } else { + r = amdgpu_job_submit(job, ring, &adev->mman.entity, + AMDGPU_FENCE_OWNER_UNDEFINED, fence); + if (r) + goto error_free; + } + + return r; + +error_free: + amdgpu_job_free(job); + return r; +} + +int amdgpu_fill_buffer(struct amdgpu_bo *bo, + uint32_t src_data, + struct reservation_object *resv, + struct fence **fence) +{ + struct amdgpu_device *adev = bo->adev; + struct amdgpu_job *job; + struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; + + uint32_t max_bytes, byte_count; + uint64_t dst_offset; + unsigned int num_loops, num_dw; + unsigned int i; + int r; + + byte_count = bo->tbo.num_pages << PAGE_SHIFT; + max_bytes = adev->mman.buffer_funcs->fill_max_bytes; + num_loops = DIV_ROUND_UP(byte_count, max_bytes); + num_dw = num_loops * adev->mman.buffer_funcs->fill_num_dw; + + /* for IB padding */ + while (num_dw & 0x7) + num_dw++; + + r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, &job); + if (r) + return r; + + if (resv) { + r = amdgpu_sync_resv(adev, &job->sync, resv, + AMDGPU_FENCE_OWNER_UNDEFINED); + if (r) { + DRM_ERROR("sync failed (%d).\n", r); + goto error_free; + } + } + + dst_offset = bo->tbo.mem.start << PAGE_SHIFT; + for (i = 0; i < num_loops; i++) { + uint32_t cur_size_in_bytes = min(byte_count, max_bytes); + + amdgpu_emit_fill_buffer(adev, &job->ibs[0], src_data, + dst_offset, cur_size_in_bytes); + + dst_offset += cur_size_in_bytes; + byte_count -= cur_size_in_bytes; + } + + amdgpu_ring_pad_ib(ring, &job->ibs[0]); + WARN_ON(job->ibs[0].length_dw > num_dw); r = amdgpu_job_submit(job, ring, &adev->mman.entity, - AMDGPU_FENCE_OWNER_UNDEFINED, fence); + AMDGPU_FENCE_OWNER_UNDEFINED, fence); if (r) goto error_free; @@ -1395,3 +1551,8 @@ static void amdgpu_ttm_debugfs_fini(struct amdgpu_device *adev) #endif } + +u64 amdgpu_ttm_get_gtt_mem_size(struct amdgpu_device *adev) +{ + return ttm_get_kernel_zone_memory_size(adev->mman.mem_global_ref.object); +} |