diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 104 |
1 files changed, 70 insertions, 34 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 88c1eb9ad068..431742eb7811 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -62,6 +62,8 @@ #include "amdgpu_ras.h" #include "amdgpu_xgmi.h" +#include "amdgpu_reset.h" + /* add these here since we already include dce12 headers and these are for DCN */ #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION 0x055d #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_BASE_IDX 2 @@ -787,13 +789,13 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, */ if (adev->gfx.kiq.ring.sched.ready && (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) && - down_read_trylock(&adev->reset_sem)) { + down_read_trylock(&adev->reset_domain->sem)) { uint32_t req = hub->vm_inv_eng0_req + hub->eng_distance * eng; uint32_t ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng; amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req, 1 << vmid); - up_read(&adev->reset_sem); + up_read(&adev->reset_domain->sem); return; } @@ -900,7 +902,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, if (amdgpu_in_reset(adev)) return -EIO; - if (ring->sched.ready && down_read_trylock(&adev->reset_sem)) { + if (ring->sched.ready && down_read_trylock(&adev->reset_domain->sem)) { /* Vega20+XGMI caches PTEs in TC and TLB. Add a * heavy-weight TLB flush (type 2), which flushes * both. Due to a race condition with concurrent @@ -927,7 +929,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, if (r) { amdgpu_ring_undo(ring); spin_unlock(&adev->gfx.kiq.ring_lock); - up_read(&adev->reset_sem); + up_read(&adev->reset_domain->sem); return -ETIME; } @@ -936,10 +938,10 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); if (r < 1) { dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r); - up_read(&adev->reset_sem); + up_read(&adev->reset_domain->sem); return -ETIME; } - up_read(&adev->reset_sem); + up_read(&adev->reset_domain->sem); return 0; } @@ -1202,7 +1204,7 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev) adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM; adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_VG20; adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0]; - adev->umc.ras_funcs = &umc_v6_1_ras_funcs; + adev->umc.ras = &umc_v6_1_ras; break; case IP_VERSION(6, 1, 2): adev->umc.max_ras_err_cnt_per_query = UMC_V6_1_TOTAL_CHANNEL_NUM; @@ -1210,15 +1212,16 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev) adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM; adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_ARCT; adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0]; - adev->umc.ras_funcs = &umc_v6_1_ras_funcs; + adev->umc.ras = &umc_v6_1_ras; break; case IP_VERSION(6, 7, 0): - adev->umc.max_ras_err_cnt_per_query = UMC_V6_7_TOTAL_CHANNEL_NUM; + adev->umc.max_ras_err_cnt_per_query = + UMC_V6_7_TOTAL_CHANNEL_NUM * UMC_V6_7_BAD_PAGE_NUM_PER_CHANNEL; adev->umc.channel_inst_num = UMC_V6_7_CHANNEL_INSTANCE_NUM; adev->umc.umc_inst_num = UMC_V6_7_UMC_INSTANCE_NUM; adev->umc.channel_offs = UMC_V6_7_PER_CHANNEL_OFFSET; if (!adev->gmc.xgmi.connected_to_cpu) - adev->umc.ras_funcs = &umc_v6_7_ras_funcs; + adev->umc.ras = &umc_v6_7_ras; if (1 & adev->smuio.funcs->get_die_id(adev)) adev->umc.channel_idx_tbl = &umc_v6_7_channel_idx_tbl_first[0][0]; else @@ -1227,6 +1230,23 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev) default: break; } + + if (adev->umc.ras) { + amdgpu_ras_register_ras_block(adev, &adev->umc.ras->ras_block); + + strcpy(adev->umc.ras->ras_block.ras_comm.name, "umc"); + adev->umc.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__UMC; + adev->umc.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; + adev->umc.ras_if = &adev->umc.ras->ras_block.ras_comm; + + /* If don't define special ras_late_init function, use default ras_late_init */ + if (!adev->umc.ras->ras_block.ras_late_init) + adev->umc.ras->ras_block.ras_late_init = amdgpu_umc_ras_late_init; + + /* If not defined special ras_cb function, use default ras_cb */ + if (!adev->umc.ras->ras_block.ras_cb) + adev->umc.ras->ras_block.ras_cb = amdgpu_umc_process_ras_data_cb; + } } static void gmc_v9_0_set_mmhub_funcs(struct amdgpu_device *adev) @@ -1248,18 +1268,27 @@ static void gmc_v9_0_set_mmhub_ras_funcs(struct amdgpu_device *adev) { switch (adev->ip_versions[MMHUB_HWIP][0]) { case IP_VERSION(9, 4, 0): - adev->mmhub.ras_funcs = &mmhub_v1_0_ras_funcs; + adev->mmhub.ras = &mmhub_v1_0_ras; break; case IP_VERSION(9, 4, 1): - adev->mmhub.ras_funcs = &mmhub_v9_4_ras_funcs; + adev->mmhub.ras = &mmhub_v9_4_ras; break; case IP_VERSION(9, 4, 2): - adev->mmhub.ras_funcs = &mmhub_v1_7_ras_funcs; + adev->mmhub.ras = &mmhub_v1_7_ras; break; default: /* mmhub ras is not available */ break; } + + if (adev->mmhub.ras) { + amdgpu_ras_register_ras_block(adev, &adev->mmhub.ras->ras_block); + + strcpy(adev->mmhub.ras->ras_block.ras_comm.name, "mmhub"); + adev->mmhub.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__MMHUB; + adev->mmhub.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; + adev->mmhub.ras_if = &adev->mmhub.ras->ras_block.ras_comm; + } } static void gmc_v9_0_set_gfxhub_funcs(struct amdgpu_device *adev) @@ -1269,7 +1298,9 @@ static void gmc_v9_0_set_gfxhub_funcs(struct amdgpu_device *adev) static void gmc_v9_0_set_hdp_ras_funcs(struct amdgpu_device *adev) { - adev->hdp.ras_funcs = &hdp_v4_0_ras_funcs; + adev->hdp.ras = &hdp_v4_0_ras; + amdgpu_ras_register_ras_block(adev, &adev->hdp.ras->ras_block); + adev->hdp.ras_if = &adev->hdp.ras->ras_block.ras_comm; } static void gmc_v9_0_set_mca_funcs(struct amdgpu_device *adev) @@ -1287,6 +1318,7 @@ static void gmc_v9_0_set_mca_funcs(struct amdgpu_device *adev) static int gmc_v9_0_early_init(void *handle) { + int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; /* ARCT and VEGA20 don't have XGMI defined in their IP discovery tables */ @@ -1316,6 +1348,10 @@ static int gmc_v9_0_early_init(void *handle) adev->gmc.private_aperture_end = adev->gmc.private_aperture_start + (4ULL << 30) - 1; + r = amdgpu_gmc_ras_early_init(adev); + if (r) + return r; + return 0; } @@ -1342,13 +1378,13 @@ static int gmc_v9_0_late_init(void *handle) } if (!amdgpu_persistent_edc_harvesting_supported(adev)) { - if (adev->mmhub.ras_funcs && - adev->mmhub.ras_funcs->reset_ras_error_count) - adev->mmhub.ras_funcs->reset_ras_error_count(adev); + if (adev->mmhub.ras && adev->mmhub.ras->ras_block.hw_ops && + adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count) + adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(adev); - if (adev->hdp.ras_funcs && - adev->hdp.ras_funcs->reset_ras_error_count) - adev->hdp.ras_funcs->reset_ras_error_count(adev); + if (adev->hdp.ras && adev->hdp.ras->ras_block.hw_ops && + adev->hdp.ras->ras_block.hw_ops->reset_ras_error_count) + adev->hdp.ras->ras_block.hw_ops->reset_ras_error_count(adev); } r = amdgpu_gmc_ras_late_init(adev); @@ -1517,7 +1553,7 @@ static void gmc_v9_0_save_registers(struct amdgpu_device *adev) static int gmc_v9_0_sw_init(void *handle) { - int r, vram_width = 0, vram_type = 0, vram_vendor = 0; + int r, vram_width = 0, vram_type = 0, vram_vendor = 0, dma_addr_bits; struct amdgpu_device *adev = (struct amdgpu_device *)handle; adev->gfxhub.funcs->init(adev); @@ -1633,12 +1669,13 @@ static int gmc_v9_0_sw_init(void *handle) */ adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */ - r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44)); + dma_addr_bits = adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) ? 48:44; + r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(dma_addr_bits)); if (r) { printk(KERN_WARNING "amdgpu: No suitable DMA available.\n"); return r; } - adev->need_swiotlb = drm_need_swiotlb(44); + adev->need_swiotlb = drm_need_swiotlb(dma_addr_bits); r = gmc_v9_0_mc_init(adev); if (r) @@ -1752,14 +1789,7 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev) return -EINVAL; } - if (amdgpu_sriov_vf(adev) && amdgpu_in_reset(adev)) - goto skip_pin_bo; - - r = amdgpu_gtt_mgr_recover(&adev->mman.gtt_mgr); - if (r) - return r; - -skip_pin_bo: + amdgpu_gtt_mgr_recover(&adev->mman.gtt_mgr); r = adev->gfxhub.funcs->gart_enable(adev); if (r) return r; @@ -1776,7 +1806,6 @@ skip_pin_bo: DRM_INFO("PTB located at 0x%016llX\n", (unsigned long long)amdgpu_bo_gpu_offset(adev->gart.bo)); - adev->gart.ready = true; return 0; } @@ -1784,7 +1813,7 @@ static int gmc_v9_0_hw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; bool value; - int i; + int i, r; /* The sequence of these two function calls matters.*/ gmc_v9_0_init_golden_registers(adev); @@ -1819,7 +1848,14 @@ static int gmc_v9_0_hw_init(void *handle) if (adev->umc.funcs && adev->umc.funcs->init_registers) adev->umc.funcs->init_registers(adev); - return gmc_v9_0_gart_enable(adev); + r = gmc_v9_0_gart_enable(adev); + if (r) + return r; + + if (amdgpu_emu_mode == 1) + return amdgpu_gmc_vram_checking(adev); + else + return r; } /** |