diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 1247 |
1 files changed, 456 insertions, 791 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 758d636a6f52..aa5a50f5eac8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -21,7 +21,7 @@ * */ #include <linux/firmware.h> -#include "drmP.h" +#include <drm/drmP.h> #include "amdgpu.h" #include "amdgpu_gfx.h" #include "vi.h" @@ -40,7 +40,6 @@ #include "bif/bif_5_0_d.h" #include "bif/bif_5_0_sh_mask.h" - #include "gca/gfx_8_0_d.h" #include "gca/gfx_8_0_enum.h" #include "gca/gfx_8_0_sh_mask.h" @@ -52,7 +51,7 @@ #include "smu/smu_7_1_3_d.h" #define GFX8_NUM_GFX_RINGS 1 -#define GFX8_NUM_COMPUTE_RINGS 8 +#define GFX8_MEC_HPD_SIZE 2048 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001 @@ -657,10 +656,8 @@ static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev); static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev); static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev); static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev); -static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, uint64_t addr); -static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, uint64_t addr); -static int gfx_v8_0_compute_mqd_sw_init(struct amdgpu_device *adev); -static void gfx_v8_0_compute_mqd_sw_fini(struct amdgpu_device *adev); +static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring); +static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring); static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev) { @@ -859,7 +856,8 @@ err1: } -static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) { +static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) +{ release_firmware(adev->gfx.pfp_fw); adev->gfx.pfp_fw = NULL; release_firmware(adev->gfx.me_fw); @@ -941,12 +939,6 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); - /* chain ib ucode isn't formal released, just disable it by far - * TODO: when ucod ready we should use ucode version to judge if - * chain-ib support or not. - */ - adev->virt.chained_ib_support = false; - adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); @@ -960,6 +952,17 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); + /* + * Support for MCBP/Virtualization in combination with chained IBs is + * formal released on feature version #46 + */ + if (adev->gfx.ce_feature_version >= 46 && + adev->gfx.pfp_feature_version >= 46) { + adev->virt.chained_ib_support = true; + DRM_INFO("Chained IB support enabled!\n"); + } else + adev->virt.chained_ib_support = false; + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); if (err) @@ -1373,64 +1376,22 @@ static void gfx_v8_0_mec_fini(struct amdgpu_device *adev) } } -static int gfx_v8_0_kiq_init_ring(struct amdgpu_device *adev, - struct amdgpu_ring *ring, - struct amdgpu_irq_src *irq) -{ - struct amdgpu_kiq *kiq = &adev->gfx.kiq; - int r = 0; - - r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs); - if (r) - return r; - - ring->adev = NULL; - ring->ring_obj = NULL; - ring->use_doorbell = true; - ring->doorbell_index = AMDGPU_DOORBELL_KIQ; - if (adev->gfx.mec2_fw) { - ring->me = 2; - ring->pipe = 0; - } else { - ring->me = 1; - ring->pipe = 1; - } - - ring->queue = 0; - ring->eop_gpu_addr = kiq->eop_gpu_addr; - sprintf(ring->name, "kiq %d.%d.%d", ring->me, ring->pipe, ring->queue); - r = amdgpu_ring_init(adev, ring, 1024, - irq, AMDGPU_CP_KIQ_IRQ_DRIVER0); - if (r) - dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r); - - return r; -} -static void gfx_v8_0_kiq_free_ring(struct amdgpu_ring *ring, - struct amdgpu_irq_src *irq) -{ - amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs); - amdgpu_ring_fini(ring); -} - -#define MEC_HPD_SIZE 2048 - static int gfx_v8_0_mec_init(struct amdgpu_device *adev) { int r; u32 *hpd; + size_t mec_hpd_size; - /* - * we assign only 1 pipe because all other pipes will - * be handled by KFD - */ - adev->gfx.mec.num_mec = 1; - adev->gfx.mec.num_pipe = 1; - adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8; + bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); + + /* take ownership of the relevant compute queues */ + amdgpu_gfx_compute_queue_acquire(adev); + + mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE; if (adev->gfx.mec.hpd_eop_obj == NULL) { r = amdgpu_bo_create(adev, - adev->gfx.mec.num_queue * MEC_HPD_SIZE, + mec_hpd_size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, &adev->gfx.mec.hpd_eop_obj); @@ -1459,7 +1420,7 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev) return r; } - memset(hpd, 0, adev->gfx.mec.num_queue * MEC_HPD_SIZE); + memset(hpd, 0, mec_hpd_size); amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); @@ -1467,38 +1428,6 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev) return 0; } -static void gfx_v8_0_kiq_fini(struct amdgpu_device *adev) -{ - struct amdgpu_kiq *kiq = &adev->gfx.kiq; - - amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL); -} - -static int gfx_v8_0_kiq_init(struct amdgpu_device *adev) -{ - int r; - u32 *hpd; - struct amdgpu_kiq *kiq = &adev->gfx.kiq; - - r = amdgpu_bo_create_kernel(adev, MEC_HPD_SIZE, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj, - &kiq->eop_gpu_addr, (void **)&hpd); - if (r) { - dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r); - return r; - } - - memset(hpd, 0, MEC_HPD_SIZE); - - r = amdgpu_bo_reserve(kiq->eop_obj, true); - if (unlikely(r != 0)) - dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r); - amdgpu_bo_kunmap(kiq->eop_obj); - amdgpu_bo_unreserve(kiq->eop_obj); - - return 0; -} - static const u32 vgpr_init_compute_shader[] = { 0x7e000209, 0x7e020208, @@ -1907,46 +1836,7 @@ static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev) adev->gfx.config.max_tile_pipes = 2; adev->gfx.config.max_sh_per_se = 1; adev->gfx.config.max_backends_per_se = 2; - - switch (adev->pdev->revision) { - case 0xc4: - case 0x84: - case 0xc8: - case 0xcc: - case 0xe1: - case 0xe3: - /* B10 */ - adev->gfx.config.max_cu_per_sh = 8; - break; - case 0xc5: - case 0x81: - case 0x85: - case 0xc9: - case 0xcd: - case 0xe2: - case 0xe4: - /* B8 */ - adev->gfx.config.max_cu_per_sh = 6; - break; - case 0xc6: - case 0xca: - case 0xce: - case 0x88: - case 0xe6: - /* B6 */ - adev->gfx.config.max_cu_per_sh = 6; - break; - case 0xc7: - case 0x87: - case 0xcb: - case 0xe5: - case 0x89: - default: - /* B4 */ - adev->gfx.config.max_cu_per_sh = 4; - break; - } - + adev->gfx.config.max_cu_per_sh = 8; adev->gfx.config.max_texture_channel_caches = 2; adev->gfx.config.max_gprs = 256; adev->gfx.config.max_gs_threads = 32; @@ -1963,35 +1853,7 @@ static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev) adev->gfx.config.max_tile_pipes = 2; adev->gfx.config.max_sh_per_se = 1; adev->gfx.config.max_backends_per_se = 1; - - switch (adev->pdev->revision) { - case 0x80: - case 0x81: - case 0xc0: - case 0xc1: - case 0xc2: - case 0xc4: - case 0xc8: - case 0xc9: - case 0xd6: - case 0xda: - case 0xe9: - case 0xea: - adev->gfx.config.max_cu_per_sh = 3; - break; - case 0x83: - case 0xd0: - case 0xd1: - case 0xd2: - case 0xd4: - case 0xdb: - case 0xe1: - case 0xe2: - default: - adev->gfx.config.max_cu_per_sh = 2; - break; - } - + adev->gfx.config.max_cu_per_sh = 3; adev->gfx.config.max_texture_channel_caches = 2; adev->gfx.config.max_gprs = 256; adev->gfx.config.max_gs_threads = 16; @@ -2083,13 +1945,67 @@ static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev) return 0; } +static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, + int mec, int pipe, int queue) +{ + int r; + unsigned irq_type; + struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; + + ring = &adev->gfx.compute_ring[ring_id]; + + /* mec0 is me1 */ + ring->me = mec + 1; + ring->pipe = pipe; + ring->queue = queue; + + ring->ring_obj = NULL; + ring->use_doorbell = true; + ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id; + ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + + (ring_id * GFX8_MEC_HPD_SIZE); + sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); + + irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) + + ring->pipe; + + /* type-2 packets are deprecated on MEC, use type-3 instead */ + r = amdgpu_ring_init(adev, ring, 1024, + &adev->gfx.eop_irq, irq_type); + if (r) + return r; + + + return 0; +} + static int gfx_v8_0_sw_init(void *handle) { - int i, r; + int i, j, k, r, ring_id; struct amdgpu_ring *ring; struct amdgpu_kiq *kiq; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + switch (adev->asic_type) { + case CHIP_FIJI: + case CHIP_TONGA: + case CHIP_POLARIS11: + case CHIP_POLARIS12: + case CHIP_POLARIS10: + case CHIP_CARRIZO: + adev->gfx.mec.num_mec = 2; + break; + case CHIP_TOPAZ: + case CHIP_STONEY: + default: + adev->gfx.mec.num_mec = 1; + break; + } + + adev->gfx.mec.num_pipe_per_mec = 4; + adev->gfx.mec.num_queue_per_pipe = 8; + /* KIQ event */ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq); if (r) @@ -2151,49 +2067,41 @@ static int gfx_v8_0_sw_init(void *handle) return r; } - /* set up the compute queues */ - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - unsigned irq_type; - /* max 32 queues per MEC */ - if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) { - DRM_ERROR("Too many (%d) compute rings!\n", i); - break; + /* set up the compute queues - allocate horizontally across pipes */ + ring_id = 0; + for (i = 0; i < adev->gfx.mec.num_mec; ++i) { + for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { + for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { + if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j)) + continue; + + r = gfx_v8_0_compute_ring_init(adev, + ring_id, + i, k, j); + if (r) + return r; + + ring_id++; + } } - ring = &adev->gfx.compute_ring[i]; - ring->ring_obj = NULL; - ring->use_doorbell = true; - ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i; - ring->me = 1; /* first MEC */ - ring->pipe = i / 8; - ring->queue = i % 8; - ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE); - sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); - irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe; - /* type-2 packets are deprecated on MEC, use type-3 instead */ - r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, - irq_type); - if (r) - return r; } - if (amdgpu_sriov_vf(adev)) { - r = gfx_v8_0_kiq_init(adev); - if (r) { - DRM_ERROR("Failed to init KIQ BOs!\n"); - return r; - } + r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE); + if (r) { + DRM_ERROR("Failed to init KIQ BOs!\n"); + return r; + } - kiq = &adev->gfx.kiq; - r = gfx_v8_0_kiq_init_ring(adev, &kiq->ring, &kiq->irq); - if (r) - return r; + kiq = &adev->gfx.kiq; + r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq); + if (r) + return r; - /* create MQD for all compute queues as wel as KIQ for SRIOV case */ - r = gfx_v8_0_compute_mqd_sw_init(adev); - if (r) - return r; - } + /* create MQD for all compute queues as well as KIQ for SRIOV case */ + r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation)); + if (r) + return r; /* reserve GDS, GWS and OA resource for gfx */ r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size, @@ -2237,11 +2145,9 @@ static int gfx_v8_0_sw_fini(void *handle) for (i = 0; i < adev->gfx.num_compute_rings; i++) amdgpu_ring_fini(&adev->gfx.compute_ring[i]); - if (amdgpu_sriov_vf(adev)) { - gfx_v8_0_compute_mqd_sw_fini(adev); - gfx_v8_0_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); - gfx_v8_0_kiq_fini(adev); - } + amdgpu_gfx_compute_mqd_sw_fini(adev); + amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); + amdgpu_gfx_kiq_fini(adev); gfx_v8_0_mec_fini(adev); gfx_v8_0_rlc_fini(adev); @@ -3594,11 +3500,6 @@ static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, WREG32(mmGRBM_GFX_INDEX, data); } -static u32 gfx_v8_0_create_bitmask(u32 bit_width) -{ - return (u32)((1ULL << bit_width) - 1); -} - static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev) { u32 data, mask; @@ -3608,8 +3509,8 @@ static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev) data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE); - mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se / - adev->gfx.config.max_sh_per_se); + mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se / + adev->gfx.config.max_sh_per_se); return (~data) & mask; } @@ -3823,7 +3724,7 @@ static void gfx_v8_0_setup_rb(struct amdgpu_device *adev) /** * gfx_v8_0_init_compute_vmid - gart enable * - * @rdev: amdgpu_device pointer + * @adev: amdgpu_device pointer * * Initialize compute vmid sh_mem registers * @@ -4481,6 +4382,39 @@ static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev) return 0; } +static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring) +{ + u32 tmp; + /* no gfx doorbells on iceland */ + if (adev->asic_type == CHIP_TOPAZ) + return; + + tmp = RREG32(mmCP_RB_DOORBELL_CONTROL); + + if (ring->use_doorbell) { + tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, + DOORBELL_OFFSET, ring->doorbell_index); + tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, + DOORBELL_HIT, 0); + tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, + DOORBELL_EN, 1); + } else { + tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0); + } + + WREG32(mmCP_RB_DOORBELL_CONTROL, tmp); + + if (adev->flags & AMD_IS_APU) + return; + + tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, + DOORBELL_RANGE_LOWER, + AMDGPU_DOORBELL_GFX_RING0); + WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp); + + WREG32(mmCP_RB_DOORBELL_RANGE_UPPER, + CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); +} static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev) { @@ -4528,34 +4462,7 @@ static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev) WREG32(mmCP_RB0_BASE, rb_addr); WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr)); - /* no gfx doorbells on iceland */ - if (adev->asic_type != CHIP_TOPAZ) { - tmp = RREG32(mmCP_RB_DOORBELL_CONTROL); - if (ring->use_doorbell) { - tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, - DOORBELL_OFFSET, ring->doorbell_index); - tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, - DOORBELL_HIT, 0); - tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, - DOORBELL_EN, 1); - } else { - tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, - DOORBELL_EN, 0); - } - WREG32(mmCP_RB_DOORBELL_CONTROL, tmp); - - if (adev->asic_type == CHIP_TONGA) { - tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, - DOORBELL_RANGE_LOWER, - AMDGPU_DOORBELL_GFX_RING0); - WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp); - - WREG32(mmCP_RB_DOORBELL_RANGE_UPPER, - CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); - } - - } - + gfx_v8_0_set_cpg_door_bell(adev, ring); /* start the ring */ amdgpu_ring_clear_ring(ring); gfx_v8_0_cp_gfx_start(adev); @@ -4628,29 +4535,6 @@ static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev) return 0; } -static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev) -{ - int i, r; - - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; - - if (ring->mqd_obj) { - r = amdgpu_bo_reserve(ring->mqd_obj, false); - if (unlikely(r != 0)) - dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r); - - amdgpu_bo_unpin(ring->mqd_obj); - amdgpu_bo_unreserve(ring->mqd_obj); - - amdgpu_bo_unref(&ring->mqd_obj); - ring->mqd_obj = NULL; - ring->mqd_ptr = NULL; - ring->mqd_gpu_addr = 0; - } - } -} - /* KIQ functions */ static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring) { @@ -4666,45 +4550,111 @@ static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring) WREG32(mmRLC_CP_SCHEDULERS, tmp); } -static void gfx_v8_0_kiq_enable(struct amdgpu_ring *ring) +static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev) { - amdgpu_ring_alloc(ring, 8); + struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; + uint32_t scratch, tmp = 0; + uint64_t queue_mask = 0; + int r, i; + + for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { + if (!test_bit(i, adev->gfx.mec.queue_bitmap)) + continue; + + /* This situation may be hit in the future if a new HW + * generation exposes more than 64 queues. If so, the + * definition of queue_mask needs updating */ + if (WARN_ON(i > (sizeof(queue_mask)*8))) { + DRM_ERROR("Invalid KCQ enabled: %d\n", i); + break; + } + + queue_mask |= (1ull << i); + } + + r = amdgpu_gfx_scratch_get(adev, &scratch); + if (r) { + DRM_ERROR("Failed to get scratch reg (%d).\n", r); + return r; + } + WREG32(scratch, 0xCAFEDEAD); + + r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 11); + if (r) { + DRM_ERROR("Failed to lock KIQ (%d).\n", r); + amdgpu_gfx_scratch_free(adev, scratch); + return r; + } /* set resources */ - amdgpu_ring_write(ring, PACKET3(PACKET3_SET_RESOURCES, 6)); - amdgpu_ring_write(ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */ - amdgpu_ring_write(ring, 0x000000FF); /* queue mask lo */ - amdgpu_ring_write(ring, 0); /* queue mask hi */ - amdgpu_ring_write(ring, 0); /* gws mask lo */ - amdgpu_ring_write(ring, 0); /* gws mask hi */ - amdgpu_ring_write(ring, 0); /* oac mask */ - amdgpu_ring_write(ring, 0); /* gds heap base:0, gds heap size:0 */ - amdgpu_ring_commit(ring); - udelay(50); + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); + amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */ + amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ + amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ + amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ + amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ + amdgpu_ring_write(kiq_ring, 0); /* oac mask */ + amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; + uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); + uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); + + /* map queues */ + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); + /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ + amdgpu_ring_write(kiq_ring, + PACKET3_MAP_QUEUES_NUM_QUEUES(1)); + amdgpu_ring_write(kiq_ring, + PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) | + PACKET3_MAP_QUEUES_QUEUE(ring->queue) | + PACKET3_MAP_QUEUES_PIPE(ring->pipe) | + PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */ + amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); + amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); + } + /* write to scratch for completion */ + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); + amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); + amdgpu_ring_write(kiq_ring, 0xDEADBEEF); + amdgpu_ring_commit(kiq_ring); + + for (i = 0; i < adev->usec_timeout; i++) { + tmp = RREG32(scratch); + if (tmp == 0xDEADBEEF) + break; + DRM_UDELAY(1); + } + if (i >= adev->usec_timeout) { + DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n", + scratch, tmp); + r = -EINVAL; + } + amdgpu_gfx_scratch_free(adev, scratch); + + return r; } -static void gfx_v8_0_map_queue_enable(struct amdgpu_ring *kiq_ring, - struct amdgpu_ring *ring) +static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req) { - struct amdgpu_device *adev = kiq_ring->adev; - uint64_t mqd_addr, wptr_addr; + int i, r = 0; - mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); - wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); - amdgpu_ring_alloc(kiq_ring, 8); + if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) { + WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req); + for (i = 0; i < adev->usec_timeout; i++) { + if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK)) + break; + udelay(1); + } + if (i == adev->usec_timeout) + r = -ETIMEDOUT; + } + WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0); + WREG32(mmCP_HQD_PQ_RPTR, 0); + WREG32(mmCP_HQD_PQ_WPTR, 0); - amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); - /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ - amdgpu_ring_write(kiq_ring, 0x21010000); - amdgpu_ring_write(kiq_ring, (ring->doorbell_index << 2) | - (ring->queue << 26) | - (ring->pipe << 29) | - ((ring->me == 1 ? 0 : 1) << 31)); /* doorbell */ - amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); - amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); - amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); - amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); - amdgpu_ring_commit(kiq_ring); - udelay(50); + return r; } static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring) @@ -4721,7 +4671,12 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring) mqd->compute_static_thread_mgmt_se2 = 0xffffffff; mqd->compute_static_thread_mgmt_se3 = 0xffffffff; mqd->compute_misc_reserved = 0x00000003; - + if (!(adev->flags & AMD_IS_APU)) { + mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr + + offsetof(struct vi_mqd_allocation, dyamic_cu_mask)); + mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr + + offsetof(struct vi_mqd_allocation, dyamic_cu_mask)); + } eop_base_addr = ring->eop_gpu_addr >> 8; mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); @@ -4729,7 +4684,7 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring) /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ tmp = RREG32(mmCP_HQD_EOP_CONTROL); tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, - (order_base_2(MEC_HPD_SIZE / 4) - 1)); + (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1)); mqd->cp_hqd_eop_control = tmp; @@ -4741,11 +4696,6 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring) mqd->cp_hqd_pq_doorbell_control = tmp; - /* disable the queue if it's active */ - mqd->cp_hqd_dequeue_request = 0; - mqd->cp_hqd_pq_rptr = 0; - mqd->cp_hqd_pq_wptr = 0; - /* set the pointer to the MQD */ mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc; mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); @@ -4815,149 +4765,160 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring) tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); mqd->cp_hqd_persistent_state = tmp; + /* set MTYPE */ + tmp = RREG32(mmCP_HQD_IB_CONTROL); + tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); + tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3); + mqd->cp_hqd_ib_control = tmp; + + tmp = RREG32(mmCP_HQD_IQ_TIMER); + tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3); + mqd->cp_hqd_iq_timer = tmp; + + tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL); + tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3); + mqd->cp_hqd_ctx_save_control = tmp; + + /* defaults */ + mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR); + mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR); + mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY); + mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY); + mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM); + mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO); + mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI); + mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET); + mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE); + mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET); + mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE); + mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS); + mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR); + mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM); + mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES); + /* activate the queue */ mqd->cp_hqd_active = 1; return 0; } -static int gfx_v8_0_kiq_init_register(struct amdgpu_ring *ring) +int gfx_v8_0_mqd_commit(struct amdgpu_device *adev, + struct vi_mqd *mqd) { - struct amdgpu_device *adev = ring->adev; - struct vi_mqd *mqd = ring->mqd_ptr; - int j; + uint32_t mqd_reg; + uint32_t *mqd_data; + + /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */ + mqd_data = &mqd->cp_mqd_base_addr_lo; /* disable wptr polling */ WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0); - WREG32(mmCP_HQD_EOP_BASE_ADDR, mqd->cp_hqd_eop_base_addr_lo); - WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, mqd->cp_hqd_eop_base_addr_hi); + /* program all HQD registers */ + for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++) + WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]); - /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ - WREG32(mmCP_HQD_EOP_CONTROL, mqd->cp_hqd_eop_control); - - /* enable doorbell? */ - WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control); - - /* disable the queue if it's active */ - if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) { - WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1); - for (j = 0; j < adev->usec_timeout; j++) { - if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK)) - break; - udelay(1); - } - WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request); - WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr); - WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr); + /* Tonga errata: EOP RPTR/WPTR should be left unmodified. + * This is safe since EOP RPTR==WPTR for any inactive HQD + * on ASICs that do not support context-save. + * EOP writes/reads can start anywhere in the ring. + */ + if (adev->asic_type != CHIP_TONGA) { + WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr); + WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr); + WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem); } - /* set the pointer to the MQD */ - WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo); - WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi); - - /* set MQD vmid to 0 */ - WREG32(mmCP_MQD_CONTROL, mqd->cp_mqd_control); + for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++) + WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]); - /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ - WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo); - WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi); + /* activate the HQD */ + for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++) + WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]); - /* set up the HQD, this is similar to CP_RB0_CNTL */ - WREG32(mmCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control); - - /* set the wb address whether it's enabled or not */ - WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, - mqd->cp_hqd_pq_rptr_report_addr_lo); - WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, - mqd->cp_hqd_pq_rptr_report_addr_hi); - - /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ - WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo); - WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, mqd->cp_hqd_pq_wptr_poll_addr_hi); - - /* enable the doorbell if requested */ - if (ring->use_doorbell) { - if ((adev->asic_type == CHIP_CARRIZO) || - (adev->asic_type == CHIP_FIJI) || - (adev->asic_type == CHIP_STONEY)) { - WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, - AMDGPU_DOORBELL_KIQ << 2); - WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, - AMDGPU_DOORBELL_MEC_RING7 << 2); - } - } - WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control); + return 0; +} - /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ - WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr); +static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + struct vi_mqd *mqd = ring->mqd_ptr; + int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS; - /* set the vmid for the queue */ - WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid); + gfx_v8_0_kiq_setting(ring); - WREG32(mmCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state); + if (adev->gfx.in_reset) { /* for GPU_RESET case */ + /* reset MQD to a clean status */ + if (adev->gfx.mec.mqd_backup[mqd_idx]) + memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation)); - /* activate the queue */ - WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active); + /* reset ring buffer */ + ring->wptr = 0; + amdgpu_ring_clear_ring(ring); + mutex_lock(&adev->srbm_mutex); + vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + gfx_v8_0_mqd_commit(adev, mqd); + vi_srbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + } else { + memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation)); + ((struct vi_mqd_allocation *)mqd)->dyamic_cu_mask = 0xFFFFFFFF; + ((struct vi_mqd_allocation *)mqd)->dyamic_rb_mask = 0xFFFFFFFF; + mutex_lock(&adev->srbm_mutex); + vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + gfx_v8_0_mqd_init(ring); + gfx_v8_0_mqd_commit(adev, mqd); + vi_srbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); - if (ring->use_doorbell) - WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1); + if (adev->gfx.mec.mqd_backup[mqd_idx]) + memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation)); + } return 0; } -static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring) +static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - struct amdgpu_kiq *kiq = &adev->gfx.kiq; struct vi_mqd *mqd = ring->mqd_ptr; - bool is_kiq = (ring->funcs->type == AMDGPU_RING_TYPE_KIQ); - int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS; + int mqd_idx = ring - &adev->gfx.compute_ring[0]; - if (is_kiq) { - gfx_v8_0_kiq_setting(&kiq->ring); - } else { - mqd_idx = ring - &adev->gfx.compute_ring[0]; - } - - if (!adev->gfx.in_reset) { - memset((void *)mqd, 0, sizeof(*mqd)); + if (!adev->gfx.in_reset && !adev->gfx.in_suspend) { + memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation)); + ((struct vi_mqd_allocation *)mqd)->dyamic_cu_mask = 0xFFFFFFFF; + ((struct vi_mqd_allocation *)mqd)->dyamic_rb_mask = 0xFFFFFFFF; mutex_lock(&adev->srbm_mutex); vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); gfx_v8_0_mqd_init(ring); - if (is_kiq) - gfx_v8_0_kiq_init_register(ring); vi_srbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); if (adev->gfx.mec.mqd_backup[mqd_idx]) - memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); - } else { /* for GPU_RESET case */ + memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation)); + } else if (adev->gfx.in_reset) { /* for GPU_RESET case */ /* reset MQD to a clean status */ if (adev->gfx.mec.mqd_backup[mqd_idx]) - memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); - + memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation)); /* reset ring buffer */ ring->wptr = 0; amdgpu_ring_clear_ring(ring); - - if (is_kiq) { - mutex_lock(&adev->srbm_mutex); - vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); - gfx_v8_0_kiq_init_register(ring); - vi_srbm_select(adev, 0, 0, 0, 0); - mutex_unlock(&adev->srbm_mutex); - } + } else { + amdgpu_ring_clear_ring(ring); } - - if (is_kiq) - gfx_v8_0_kiq_enable(ring); - else - gfx_v8_0_map_queue_enable(&kiq->ring, ring); - return 0; } +static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev) +{ + if (adev->asic_type > CHIP_TONGA) { + WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2); + WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2); + } + /* enable doorbells */ + WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1); +} + static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev) { struct amdgpu_ring *ring = NULL; @@ -4981,13 +4942,6 @@ static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev) if (r) goto done; - ring->ready = true; - r = amdgpu_ring_test_ring(ring); - if (r) { - ring->ready = false; - goto done; - } - for (i = 0; i < adev->gfx.num_compute_rings; i++) { ring = &adev->gfx.compute_ring[i]; @@ -4996,272 +4950,41 @@ static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev) goto done; r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr); if (!r) { - r = gfx_v8_0_kiq_init_queue(ring); + r = gfx_v8_0_kcq_init_queue(ring); amdgpu_bo_kunmap(ring->mqd_obj); ring->mqd_ptr = NULL; } amdgpu_bo_unreserve(ring->mqd_obj); if (r) goto done; - - ring->ready = true; - r = amdgpu_ring_test_ring(ring); - if (r) - ring->ready = false; } -done: - return r; -} - -static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev) -{ - int r, i, j; - u32 tmp; - bool use_doorbell = true; - u64 hqd_gpu_addr; - u64 mqd_gpu_addr; - u64 eop_gpu_addr; - u64 wb_gpu_addr; - u32 *buf; - struct vi_mqd *mqd; - - /* init the queues. */ - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; - - if (ring->mqd_obj == NULL) { - r = amdgpu_bo_create(adev, - sizeof(struct vi_mqd), - PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_GTT, 0, NULL, - NULL, &ring->mqd_obj); - if (r) { - dev_warn(adev->dev, "(%d) create MQD bo failed\n", r); - return r; - } - } - - r = amdgpu_bo_reserve(ring->mqd_obj, false); - if (unlikely(r != 0)) { - gfx_v8_0_cp_compute_fini(adev); - return r; - } - r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT, - &mqd_gpu_addr); - if (r) { - dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r); - gfx_v8_0_cp_compute_fini(adev); - return r; - } - r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf); - if (r) { - dev_warn(adev->dev, "(%d) map MQD bo failed\n", r); - gfx_v8_0_cp_compute_fini(adev); - return r; - } - - /* init the mqd struct */ - memset(buf, 0, sizeof(struct vi_mqd)); - - mqd = (struct vi_mqd *)buf; - mqd->header = 0xC0310800; - mqd->compute_pipelinestat_enable = 0x00000001; - mqd->compute_static_thread_mgmt_se0 = 0xffffffff; - mqd->compute_static_thread_mgmt_se1 = 0xffffffff; - mqd->compute_static_thread_mgmt_se2 = 0xffffffff; - mqd->compute_static_thread_mgmt_se3 = 0xffffffff; - mqd->compute_misc_reserved = 0x00000003; - - mutex_lock(&adev->srbm_mutex); - vi_srbm_select(adev, ring->me, - ring->pipe, - ring->queue, 0); - - eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE); - eop_gpu_addr >>= 8; - - /* write the EOP addr */ - WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr); - WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr)); - - /* set the VMID assigned */ - WREG32(mmCP_HQD_VMID, 0); - - /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ - tmp = RREG32(mmCP_HQD_EOP_CONTROL); - tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, - (order_base_2(MEC_HPD_SIZE / 4) - 1)); - WREG32(mmCP_HQD_EOP_CONTROL, tmp); - - /* disable wptr polling */ - tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL); - tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0); - WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp); - - mqd->cp_hqd_eop_base_addr_lo = - RREG32(mmCP_HQD_EOP_BASE_ADDR); - mqd->cp_hqd_eop_base_addr_hi = - RREG32(mmCP_HQD_EOP_BASE_ADDR_HI); - - /* enable doorbell? */ - tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); - if (use_doorbell) { - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); - } else { - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0); - } - WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp); - mqd->cp_hqd_pq_doorbell_control = tmp; - - /* disable the queue if it's active */ - mqd->cp_hqd_dequeue_request = 0; - mqd->cp_hqd_pq_rptr = 0; - mqd->cp_hqd_pq_wptr= 0; - if (RREG32(mmCP_HQD_ACTIVE) & 1) { - WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1); - for (j = 0; j < adev->usec_timeout; j++) { - if (!(RREG32(mmCP_HQD_ACTIVE) & 1)) - break; - udelay(1); - } - WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request); - WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr); - WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr); - } - - /* set the pointer to the MQD */ - mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc; - mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr); - WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo); - WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi); - - /* set MQD vmid to 0 */ - tmp = RREG32(mmCP_MQD_CONTROL); - tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); - WREG32(mmCP_MQD_CONTROL, tmp); - mqd->cp_mqd_control = tmp; - - /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ - hqd_gpu_addr = ring->gpu_addr >> 8; - mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; - mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); - WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo); - WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi); - - /* set up the HQD, this is similar to CP_RB0_CNTL */ - tmp = RREG32(mmCP_HQD_PQ_CONTROL); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, - (order_base_2(ring->ring_size / 4) - 1)); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, - ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); -#ifdef __BIG_ENDIAN - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); -#endif - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); - WREG32(mmCP_HQD_PQ_CONTROL, tmp); - mqd->cp_hqd_pq_control = tmp; - - /* set the wb address wether it's enabled or not */ - wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); - mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; - mqd->cp_hqd_pq_rptr_report_addr_hi = - upper_32_bits(wb_gpu_addr) & 0xffff; - WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, - mqd->cp_hqd_pq_rptr_report_addr_lo); - WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, - mqd->cp_hqd_pq_rptr_report_addr_hi); - - /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ - wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); - mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; - mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; - WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo); - WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, - mqd->cp_hqd_pq_wptr_poll_addr_hi); - - /* enable the doorbell if requested */ - if (use_doorbell) { - if ((adev->asic_type == CHIP_CARRIZO) || - (adev->asic_type == CHIP_FIJI) || - (adev->asic_type == CHIP_STONEY) || - (adev->asic_type == CHIP_POLARIS11) || - (adev->asic_type == CHIP_POLARIS10) || - (adev->asic_type == CHIP_POLARIS12)) { - WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, - AMDGPU_DOORBELL_KIQ << 2); - WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, - AMDGPU_DOORBELL_MEC_RING7 << 2); - } - tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, - DOORBELL_OFFSET, ring->doorbell_index); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0); - mqd->cp_hqd_pq_doorbell_control = tmp; - - } else { - mqd->cp_hqd_pq_doorbell_control = 0; - } - WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, - mqd->cp_hqd_pq_doorbell_control); + gfx_v8_0_set_mec_doorbell_range(adev); - /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ - ring->wptr = 0; - mqd->cp_hqd_pq_wptr = lower_32_bits(ring->wptr); - WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr); - mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR); - - /* set the vmid for the queue */ - mqd->cp_hqd_vmid = 0; - WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid); - - tmp = RREG32(mmCP_HQD_PERSISTENT_STATE); - tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); - WREG32(mmCP_HQD_PERSISTENT_STATE, tmp); - mqd->cp_hqd_persistent_state = tmp; - if (adev->asic_type == CHIP_STONEY || - adev->asic_type == CHIP_POLARIS11 || - adev->asic_type == CHIP_POLARIS10 || - adev->asic_type == CHIP_POLARIS12) { - tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL); - tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1); - WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp); - } - - /* activate the queue */ - mqd->cp_hqd_active = 1; - WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active); - - vi_srbm_select(adev, 0, 0, 0, 0); - mutex_unlock(&adev->srbm_mutex); - - amdgpu_bo_kunmap(ring->mqd_obj); - amdgpu_bo_unreserve(ring->mqd_obj); - } + r = gfx_v8_0_kiq_kcq_enable(adev); + if (r) + goto done; - if (use_doorbell) { - tmp = RREG32(mmCP_PQ_STATUS); - tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1); - WREG32(mmCP_PQ_STATUS, tmp); + /* Test KIQ */ + ring = &adev->gfx.kiq.ring; + ring->ready = true; + r = amdgpu_ring_test_ring(ring); + if (r) { + ring->ready = false; + goto done; } - gfx_v8_0_cp_compute_enable(adev, true); - + /* Test KCQs */ for (i = 0; i < adev->gfx.num_compute_rings; i++) { - struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; - + ring = &adev->gfx.compute_ring[i]; ring->ready = true; r = amdgpu_ring_test_ring(ring); if (r) ring->ready = false; } - return 0; +done: + return r; } static int gfx_v8_0_cp_resume(struct amdgpu_device *adev) @@ -5314,10 +5037,7 @@ static int gfx_v8_0_cp_resume(struct amdgpu_device *adev) if (r) return r; - if (amdgpu_sriov_vf(adev)) - r = gfx_v8_0_kiq_resume(adev); - else - r = gfx_v8_0_cp_compute_resume(adev); + r = gfx_v8_0_kiq_resume(adev); if (r) return r; @@ -5361,7 +5081,6 @@ static int gfx_v8_0_hw_fini(void *handle) } gfx_v8_0_cp_enable(adev, false); gfx_v8_0_rlc_stop(adev); - gfx_v8_0_cp_compute_fini(adev); amdgpu_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE); @@ -5372,15 +5091,18 @@ static int gfx_v8_0_hw_fini(void *handle) static int gfx_v8_0_suspend(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - + adev->gfx.in_suspend = true; return gfx_v8_0_hw_fini(adev); } static int gfx_v8_0_resume(void *handle) { + int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - return gfx_v8_0_hw_init(adev); + r = gfx_v8_0_hw_init(adev); + adev->gfx.in_suspend = false; + return r; } static bool gfx_v8_0_is_idle(void *handle) @@ -5469,25 +5191,6 @@ static bool gfx_v8_0_check_soft_reset(void *handle) } } -static void gfx_v8_0_inactive_hqd(struct amdgpu_device *adev, - struct amdgpu_ring *ring) -{ - int i; - - mutex_lock(&adev->srbm_mutex); - vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); - if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) { - WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, 2); - for (i = 0; i < adev->usec_timeout; i++) { - if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK)) - break; - udelay(1); - } - } - vi_srbm_select(adev, 0, 0, 0, 0); - mutex_unlock(&adev->srbm_mutex); -} - static int gfx_v8_0_pre_soft_reset(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -5517,7 +5220,11 @@ static int gfx_v8_0_pre_soft_reset(void *handle) for (i = 0; i < adev->gfx.num_compute_rings; i++) { struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; - gfx_v8_0_inactive_hqd(adev, ring); + mutex_lock(&adev->srbm_mutex); + vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + gfx_v8_0_deactivate_hqd(adev, 2); + vi_srbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); } /* Disable MEC parsing/prefetching */ gfx_v8_0_cp_compute_enable(adev, false); @@ -5588,18 +5295,6 @@ static int gfx_v8_0_soft_reset(void *handle) return 0; } -static void gfx_v8_0_init_hqd(struct amdgpu_device *adev, - struct amdgpu_ring *ring) -{ - mutex_lock(&adev->srbm_mutex); - vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); - WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0); - WREG32(mmCP_HQD_PQ_RPTR, 0); - WREG32(mmCP_HQD_PQ_WPTR, 0); - vi_srbm_select(adev, 0, 0, 0, 0); - mutex_unlock(&adev->srbm_mutex); -} - static int gfx_v8_0_post_soft_reset(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -5625,9 +5320,13 @@ static int gfx_v8_0_post_soft_reset(void *handle) for (i = 0; i < adev->gfx.num_compute_rings; i++) { struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; - gfx_v8_0_init_hqd(adev, ring); + mutex_lock(&adev->srbm_mutex); + vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + gfx_v8_0_deactivate_hqd(adev, 2); + vi_srbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); } - gfx_v8_0_cp_compute_resume(adev); + gfx_v8_0_kiq_resume(adev); } gfx_v8_0_rlc_start(adev); @@ -5773,7 +5472,7 @@ static int gfx_v8_0_early_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS; - adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS; + adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; adev->gfx.funcs = &gfx_v8_0_gfx_funcs; gfx_v8_0_set_ring_funcs(adev); gfx_v8_0_set_irq_funcs(adev); @@ -6265,6 +5964,8 @@ static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); if (temp != data) WREG32(mmRLC_CGCG_CGLS_CTRL, data); + /* enable interrupts again for PG */ + gfx_v8_0_enable_gui_idle_interrupt(adev, true); } gfx_v8_0_wait_for_rlc_serdes(adev); @@ -6568,9 +6269,13 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, control |= ib->length_dw | (vm_id << 24); - if (amdgpu_sriov_vf(ring->adev) && ib->flags & AMDGPU_IB_FLAG_PREEMPT) + if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { control |= INDIRECT_BUFFER_PRE_ENB(1); + if (!(ib->flags & AMDGPU_IB_FLAG_CE)) + gfx_v8_0_ring_emit_de_meta(ring); + } + amdgpu_ring_write(ring, header); amdgpu_ring_write(ring, #ifdef __BIG_ENDIAN @@ -6753,8 +6458,7 @@ static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) uint32_t dw2 = 0; if (amdgpu_sriov_vf(ring->adev)) - gfx_v8_0_ring_emit_ce_meta_init(ring, - (flags & AMDGPU_VM_DOMAIN) ? AMDGPU_CSA_VADDR : ring->adev->virt.csa_vmid0_addr); + gfx_v8_0_ring_emit_ce_meta(ring); dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ if (flags & AMDGPU_HAVE_CTX_SWITCH) { @@ -6780,10 +6484,6 @@ static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); amdgpu_ring_write(ring, dw2); amdgpu_ring_write(ring, 0); - - if (amdgpu_sriov_vf(ring->adev)) - gfx_v8_0_ring_emit_de_meta_init(ring, - (flags & AMDGPU_VM_DOMAIN) ? AMDGPU_CSA_VADDR : ring->adev->virt.csa_vmid0_addr); } static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) @@ -6813,7 +6513,6 @@ static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigne ring->ring[offset] = (ring->ring_size >> 2) - offset + cur; } - static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) { struct amdgpu_device *adev = ring->adev; @@ -6851,15 +6550,27 @@ static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, int me, int pipe, enum amdgpu_interrupt_state state) { + u32 mec_int_cntl, mec_int_cntl_reg; + /* - * amdgpu controls only pipe 0 of MEC1. That's why this function only - * handles the setting of interrupts for this specific pipe. All other + * amdgpu controls only the first MEC. That's why this function only + * handles the setting of interrupts for this specific MEC. All other * pipes' interrupts are set by amdkfd. */ if (me == 1) { switch (pipe) { case 0: + mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL; + break; + case 1: + mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL; + break; + case 2: + mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL; + break; + case 3: + mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL; break; default: DRM_DEBUG("invalid pipe %d\n", pipe); @@ -6870,8 +6581,20 @@ static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, return; } - WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, TIME_STAMP_INT_ENABLE, - state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); + switch (state) { + case AMDGPU_IRQ_STATE_DISABLE: + mec_int_cntl = RREG32(mec_int_cntl_reg); + mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK; + WREG32(mec_int_cntl_reg, mec_int_cntl); + break; + case AMDGPU_IRQ_STATE_ENABLE: + mec_int_cntl = RREG32(mec_int_cntl_reg); + mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK; + WREG32(mec_int_cntl_reg, mec_int_cntl); + break; + default: + break; + } } static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev, @@ -6992,8 +6715,6 @@ static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev, { struct amdgpu_ring *ring = &(adev->gfx.kiq.ring); - BUG_ON(ring->funcs->type != AMDGPU_RING_TYPE_KIQ); - switch (type) { case AMDGPU_CP_KIQ_IRQ_DRIVER0: WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE, @@ -7023,8 +6744,6 @@ static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev, u8 me_id, pipe_id, queue_id; struct amdgpu_ring *ring = &(adev->gfx.kiq.ring); - BUG_ON(ring->funcs->type != AMDGPU_RING_TYPE_KIQ); - me_id = (entry->ring_id & 0x0c) >> 2; pipe_id = (entry->ring_id & 0x03) >> 0; queue_id = (entry->ring_id & 0x70) >> 4; @@ -7257,7 +6976,7 @@ static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev) data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) | RREG32(mmGC_USER_SHADER_ARRAY_CONFIG); - mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh); + mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh); return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask; } @@ -7268,9 +6987,15 @@ static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev) u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; unsigned disable_masks[4 * 2]; + u32 ao_cu_num; memset(cu_info, 0, sizeof(*cu_info)); + if (adev->flags & AMD_IS_APU) + ao_cu_num = 2; + else + ao_cu_num = adev->gfx.config.max_cu_per_sh; + amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2); mutex_lock(&adev->grbm_idx_mutex); @@ -7286,16 +7011,18 @@ static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev) bitmap = gfx_v8_0_get_cu_active_bitmap(adev); cu_info->bitmap[i][j] = bitmap; - for (k = 0; k < 16; k ++) { + for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { if (bitmap & mask) { - if (counter < 2) + if (counter < ao_cu_num) ao_bitmap |= mask; counter ++; } mask <<= 1; } active_cu_number += counter; - ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); + if (i < 2 && j < 2) + ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); + cu_info->ao_cu_bitmap[i][j] = ao_bitmap; } } gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); @@ -7323,7 +7050,7 @@ const struct amdgpu_ip_block_version gfx_v8_1_ip_block = .funcs = &gfx_v8_0_ip_funcs, }; -static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, uint64_t csa_addr) +static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring) { uint64_t ce_payload_addr; int cnt_ce; @@ -7333,10 +7060,12 @@ static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, uint64_t c } ce_payload = {}; if (ring->adev->virt.chained_ib_support) { - ce_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload); + ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 + + offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload); cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2; } else { - ce_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, ce_payload); + ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 + + offsetof(struct vi_gfx_meta_data, ce_payload); cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2; } @@ -7350,15 +7079,16 @@ static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, uint64_t c amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2); } -static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, uint64_t csa_addr) +static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring) { - uint64_t de_payload_addr, gds_addr; + uint64_t de_payload_addr, gds_addr, csa_addr; int cnt_de; static union { struct vi_de_ib_state regular; struct vi_de_ib_state_chained_ib chained; } de_payload = {}; + csa_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096; gds_addr = csa_addr + 4096; if (ring->adev->virt.chained_ib_support) { de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr); @@ -7381,68 +7111,3 @@ static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, uint64_t c amdgpu_ring_write(ring, upper_32_bits(de_payload_addr)); amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2); } - -/* create MQD for each compute queue */ -static int gfx_v8_0_compute_mqd_sw_init(struct amdgpu_device *adev) -{ - struct amdgpu_ring *ring = NULL; - int r, i; - - /* create MQD for KIQ */ - ring = &adev->gfx.kiq.ring; - if (!ring->mqd_obj) { - r = amdgpu_bo_create_kernel(adev, sizeof(struct vi_mqd), PAGE_SIZE, - AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, - &ring->mqd_gpu_addr, &ring->mqd_ptr); - if (r) { - dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r); - return r; - } - - /* prepare MQD backup */ - adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS] = kmalloc(sizeof(struct vi_mqd), GFP_KERNEL); - if (!adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]) - dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name); - } - - /* create MQD for each KCQ */ - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - ring = &adev->gfx.compute_ring[i]; - if (!ring->mqd_obj) { - r = amdgpu_bo_create_kernel(adev, sizeof(struct vi_mqd), PAGE_SIZE, - AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, - &ring->mqd_gpu_addr, &ring->mqd_ptr); - if (r) { - dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r); - return r; - } - - /* prepare MQD backup */ - adev->gfx.mec.mqd_backup[i] = kmalloc(sizeof(struct vi_mqd), GFP_KERNEL); - if (!adev->gfx.mec.mqd_backup[i]) - dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name); - } - } - - return 0; -} - -static void gfx_v8_0_compute_mqd_sw_fini(struct amdgpu_device *adev) -{ - struct amdgpu_ring *ring = NULL; - int i; - - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - ring = &adev->gfx.compute_ring[i]; - kfree(adev->gfx.mec.mqd_backup[i]); - amdgpu_bo_free_kernel(&ring->mqd_obj, - &ring->mqd_gpu_addr, - &ring->mqd_ptr); - } - - ring = &adev->gfx.kiq.ring; - kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]); - amdgpu_bo_free_kernel(&ring->mqd_obj, - &ring->mqd_gpu_addr, - &ring->mqd_ptr); -} |