diff options
Diffstat (limited to 'drivers/misc/habanalabs/common/memory.c')
-rw-r--r-- | drivers/misc/habanalabs/common/memory.c | 182 |
1 files changed, 129 insertions, 53 deletions
diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c index 1f5910517b0e..2938cbbafbbc 100644 --- a/drivers/misc/habanalabs/common/memory.c +++ b/drivers/misc/habanalabs/common/memory.c @@ -81,16 +81,6 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args, num_pgs, total_size); return -ENOMEM; } - - if (hdev->memory_scrub) { - rc = hdev->asic_funcs->scrub_device_mem(hdev, paddr, - total_size); - if (rc) { - dev_err(hdev->dev, - "Failed to scrub contiguous device memory\n"); - goto pages_pack_err; - } - } } phys_pg_pack = kzalloc(sizeof(*phys_pg_pack), GFP_KERNEL); @@ -128,24 +118,13 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args, goto page_err; } - if (hdev->memory_scrub) { - rc = hdev->asic_funcs->scrub_device_mem(hdev, - phys_pg_pack->pages[i], - page_size); - if (rc) { - dev_err(hdev->dev, - "Failed to scrub device memory\n"); - goto page_err; - } - } - num_curr_pgs++; } } spin_lock(&vm->idr_lock); handle = idr_alloc(&vm->phys_pg_pack_handles, phys_pg_pack, 1, 0, - GFP_ATOMIC); + GFP_KERNEL); spin_unlock(&vm->idr_lock); if (handle < 0) { @@ -280,37 +259,67 @@ static void dram_pg_pool_do_release(struct kref *ref) * @phys_pg_pack: physical page pack to free. * * This function does the following: - * - For DRAM memory only, iterate over the pack and free each physical block - * structure by returning it to the general pool. + * - For DRAM memory only + * - iterate over the pack, scrub and free each physical block structure by + * returning it to the general pool. + * In case of error during scrubbing, initiate hard reset. + * Once hard reset is triggered, scrubbing is bypassed while freeing the + * memory continues. * - Free the hl_vm_phys_pg_pack structure. */ -static void free_phys_pg_pack(struct hl_device *hdev, +static int free_phys_pg_pack(struct hl_device *hdev, struct hl_vm_phys_pg_pack *phys_pg_pack) { struct hl_vm *vm = &hdev->vm; u64 i; + int rc = 0; - if (!phys_pg_pack->created_from_userptr) { - if (phys_pg_pack->contiguous) { - gen_pool_free(vm->dram_pg_pool, phys_pg_pack->pages[0], + if (phys_pg_pack->created_from_userptr) + goto end; + + if (phys_pg_pack->contiguous) { + if (hdev->memory_scrub && !hdev->disabled) { + rc = hdev->asic_funcs->scrub_device_mem(hdev, + phys_pg_pack->pages[0], phys_pg_pack->total_size); + if (rc) + dev_err(hdev->dev, + "Failed to scrub contiguous device memory\n"); + } - for (i = 0; i < phys_pg_pack->npages ; i++) - kref_put(&vm->dram_pg_pool_refcount, - dram_pg_pool_do_release); - } else { - for (i = 0 ; i < phys_pg_pack->npages ; i++) { - gen_pool_free(vm->dram_pg_pool, + gen_pool_free(vm->dram_pg_pool, phys_pg_pack->pages[0], + phys_pg_pack->total_size); + + for (i = 0; i < phys_pg_pack->npages ; i++) + kref_put(&vm->dram_pg_pool_refcount, + dram_pg_pool_do_release); + } else { + for (i = 0 ; i < phys_pg_pack->npages ; i++) { + if (hdev->memory_scrub && !hdev->disabled && rc == 0) { + rc = hdev->asic_funcs->scrub_device_mem( + hdev, phys_pg_pack->pages[i], phys_pg_pack->page_size); - kref_put(&vm->dram_pg_pool_refcount, - dram_pg_pool_do_release); + if (rc) + dev_err(hdev->dev, + "Failed to scrub device memory\n"); } + gen_pool_free(vm->dram_pg_pool, + phys_pg_pack->pages[i], + phys_pg_pack->page_size); + kref_put(&vm->dram_pg_pool_refcount, + dram_pg_pool_do_release); } } + if (rc && !hdev->disabled) + hl_device_reset(hdev, HL_RESET_HARD); + +end: kvfree(phys_pg_pack->pages); kfree(phys_pg_pack); + + return rc; } /** @@ -349,7 +358,7 @@ static int free_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args) atomic64_sub(phys_pg_pack->total_size, &ctx->dram_phys_mem); atomic64_sub(phys_pg_pack->total_size, &hdev->dram_used_mem); - free_phys_pg_pack(hdev, phys_pg_pack); + return free_phys_pg_pack(hdev, phys_pg_pack); } else { spin_unlock(&vm->idr_lock); dev_err(hdev->dev, @@ -857,6 +866,7 @@ static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr, u64 next_vaddr = vaddr, paddr, mapped_pg_cnt = 0, i; u32 page_size = phys_pg_pack->page_size; int rc = 0; + bool is_host_addr; for (i = 0 ; i < phys_pg_pack->npages ; i++) { paddr = phys_pg_pack->pages[i]; @@ -878,6 +888,8 @@ static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr, return 0; err: + is_host_addr = !hl_is_dram_va(hdev, vaddr); + next_vaddr = vaddr; for (i = 0 ; i < mapped_pg_cnt ; i++) { if (hl_mmu_unmap_page(ctx, next_vaddr, page_size, @@ -888,6 +900,17 @@ err: phys_pg_pack->pages[i], page_size); next_vaddr += page_size; + + /* + * unmapping on Palladium can be really long, so avoid a CPU + * soft lockup bug by sleeping a little between unmapping pages + * + * In addition, on host num of pages could be huge, + * because page size could be 4KB, so when unmapping host + * pages sleep every 32K pages to avoid soft lockup + */ + if (hdev->pldm || (is_host_addr && (i & 0x7FFF) == 0)) + usleep_range(50, 200); } return rc; @@ -921,9 +944,9 @@ static void unmap_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr, * unmapping on Palladium can be really long, so avoid a CPU * soft lockup bug by sleeping a little between unmapping pages * - * In addition, when unmapping host memory we pass through - * the Linux kernel to unpin the pages and that takes a long - * time. Therefore, sleep every 32K pages to avoid soft lockup + * In addition, on host num of pages could be huge, + * because page size could be 4KB, so when unmapping host + * pages sleep every 32K pages to avoid soft lockup */ if (hdev->pldm || (is_host_addr && (i & 0x7FFF) == 0)) usleep_range(50, 200); @@ -1117,9 +1140,9 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, *device_addr = ret_vaddr; if (is_userptr) - free_phys_pg_pack(hdev, phys_pg_pack); + rc = free_phys_pg_pack(hdev, phys_pg_pack); - return 0; + return rc; map_err: if (add_va_block(hdev, va_range, ret_vaddr, @@ -1272,7 +1295,7 @@ static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, kfree(hnode); if (is_userptr) { - free_phys_pg_pack(hdev, phys_pg_pack); + rc = free_phys_pg_pack(hdev, phys_pg_pack); dma_unmap_host_va(hdev, userptr); } @@ -1305,9 +1328,15 @@ static int map_block(struct hl_device *hdev, u64 address, u64 *handle, static void hw_block_vm_close(struct vm_area_struct *vma) { - struct hl_ctx *ctx = (struct hl_ctx *) vma->vm_private_data; + struct hl_vm_hw_block_list_node *lnode = + (struct hl_vm_hw_block_list_node *) vma->vm_private_data; + struct hl_ctx *ctx = lnode->ctx; + mutex_lock(&ctx->hw_block_list_lock); + list_del(&lnode->node); + mutex_unlock(&ctx->hw_block_list_lock); hl_ctx_put(ctx); + kfree(lnode); vma->vm_private_data = NULL; } @@ -1325,7 +1354,9 @@ static const struct vm_operations_struct hw_block_vm_ops = { */ int hl_hw_block_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma) { + struct hl_vm_hw_block_list_node *lnode; struct hl_device *hdev = hpriv->hdev; + struct hl_ctx *ctx = hpriv->ctx; u32 block_id, block_size; int rc; @@ -1351,17 +1382,31 @@ int hl_hw_block_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma) return -EINVAL; } + lnode = kzalloc(sizeof(*lnode), GFP_KERNEL); + if (!lnode) + return -ENOMEM; + vma->vm_ops = &hw_block_vm_ops; - vma->vm_private_data = hpriv->ctx; + vma->vm_private_data = lnode; - hl_ctx_get(hdev, hpriv->ctx); + hl_ctx_get(hdev, ctx); rc = hdev->asic_funcs->hw_block_mmap(hdev, vma, block_id, block_size); if (rc) { - hl_ctx_put(hpriv->ctx); + hl_ctx_put(ctx); + kfree(lnode); return rc; } + lnode->ctx = ctx; + lnode->vaddr = vma->vm_start; + lnode->size = block_size; + lnode->id = block_id; + + mutex_lock(&ctx->hw_block_list_lock); + list_add_tail(&lnode->node, &ctx->hw_block_mem_list); + mutex_unlock(&ctx->hw_block_list_lock); + vma->vm_pgoff = block_id; return 0; @@ -1574,7 +1619,7 @@ static int get_user_memory(struct hl_device *hdev, u64 addr, u64 size, rc = sg_alloc_table_from_pages(userptr->sgt, userptr->pages, - npages, offset, size, GFP_ATOMIC); + npages, offset, size, GFP_KERNEL); if (rc < 0) { dev_err(hdev->dev, "failed to create SG table from pages\n"); goto put_pages; @@ -1624,11 +1669,7 @@ int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size, return -EINVAL; } - /* - * This function can be called also from data path, hence use atomic - * always as it is not a big allocation. - */ - userptr->sgt = kzalloc(sizeof(*userptr->sgt), GFP_ATOMIC); + userptr->sgt = kzalloc(sizeof(*userptr->sgt), GFP_KERNEL); if (!userptr->sgt) return -ENOMEM; @@ -2122,3 +2163,38 @@ void hl_vm_fini(struct hl_device *hdev) vm->init_done = false; } + +/** + * hl_hw_block_mem_init() - HW block memory initialization. + * @ctx: pointer to the habanalabs context structure. + * + * This function initializes the HW block virtual mapped addresses list and + * it's lock. + */ +void hl_hw_block_mem_init(struct hl_ctx *ctx) +{ + mutex_init(&ctx->hw_block_list_lock); + INIT_LIST_HEAD(&ctx->hw_block_mem_list); +} + +/** + * hl_hw_block_mem_fini() - HW block memory teardown. + * @ctx: pointer to the habanalabs context structure. + * + * This function clears the HW block virtual mapped addresses list and destroys + * it's lock. + */ +void hl_hw_block_mem_fini(struct hl_ctx *ctx) +{ + struct hl_vm_hw_block_list_node *lnode, *tmp; + + if (!list_empty(&ctx->hw_block_mem_list)) + dev_crit(ctx->hdev->dev, "HW block mem list isn't empty\n"); + + list_for_each_entry_safe(lnode, tmp, &ctx->hw_block_mem_list, node) { + list_del(&lnode->node); + kfree(lnode); + } + + mutex_destroy(&ctx->hw_block_list_lock); +} |